-
相关版本
spark 版本: 3.3
spark-doris-connector 版本: 3.3_2.12-1.3.2 -
问题描述
通过 sparksql create view 方式创建 sink 表,再
insert into view_table select * from hive.table
写入数据,doris 相关 label 日志仅有一条且没有 ERROR 级别日志。另,我用 24.0.0版 本的 spark-doris-connector 写 doris 很慢很慢,不知道什么原因
2025-01-23 16:46:58,038 INFO (thrift-server-pool-204|977) [DatabaseTransactionMgr.abortTransaction():1652] abort transaction: TransactionState. transaction id: 13298901, label: spark_streamload_2025012
3_164656_aa5666eb078a4d7a99c57629fadc2ed7, db id: 36351, table id list: 72097280, callback id: -1, coordinator: BE: 10.0.4.17, transaction status: ABORTED, error replicas num: 0, replica ids: , prepare
time: 1737622016396, commit time: -1, finish time: 1737622018037, reason: [INTERNAL_ERROR]cancelled: closed successfully
spark 报错如下
Caused by: org.apache.doris.spark.exception.StreamLoadException: stream load error, load status:Fail, response:StreamLoadResponse(200,OK,{
"TxnId": 13298901,
"Label": "spark_streamload_20250123_164656_aa5666eb078a4d7a99c57629fadc2ed7",
"Comment": "",
"TwoPhaseCommit": "false",
"Status": "Fail",
"Message": "[INTERNAL_ERROR]cancelled: closed",
"NumberTotalRows": 0,
"NumberLoadedRows": 0,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 46959464,
"LoadTimeMs": 1641,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 7,
"ReadDataTimeMs": 167,
"WriteDataTimeMs": 0,
"ReceiveDataTimeMs": 1338,
"CommitAndPublishTimeMs": 0
}
)
at org.apache.doris.spark.load.StreamLoader.handleStreamLoadResponse(StreamLoader.scala:482)
at org.apache.doris.spark.load.StreamLoader.$anonfun$load$1(StreamLoader.scala:102)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.load.StreamLoader.load(StreamLoader.scala:99)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$write$1(DorisWriter.scala:78)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$4(DorisWriter.scala:98)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.sql.Utils$.retry(Utils.scala:182)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$3(DorisWriter.scala:97)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1(DorisWriter.scala:98)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1$adapted(DorisWriter.scala:94)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1011)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1011)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2268)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:136)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)