版本2.1.1
怎样才能在strea_load的返回信息中返回error_url?
错误日志:
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 7.0 failed 4 times, most recent failure: Lost task 1.3 in stage 7.0 (TID 156, emr-worker-2, executor 1): java.io.IOException: Failed to load batch data on BE: http://192.168.30.155:8040/api/imp_db/imp_base_target/_stream_load? node and exceeded the max 1 retry times.
at org.apache.doris.spark.writer.DorisWriter.org$apache$doris$spark$writer$DorisWriter$$flush$1(DorisWriter.scala:73)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2$$anonfun$apply$2.apply(DorisWriter.scala:60)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2$$anonfun$apply$2.apply(DorisWriter.scala:60)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2.apply(DorisWriter.scala:60)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2.apply(DorisWriter.scala:57)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:980)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:980)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.doris.spark.exception.StreamLoadException: stream load error: status: 200, resp msg: OK, resp content: {
"TxnId": 4711720,
"Label": "spark_streamload_20240712_102948_f3b046dc23c044b4a528a6a7b2ea0ad5",
"Comment": "",
"TwoPhaseCommit": "false",
"Status": "Fail",
"Message": "[INTERNAL_ERROR]cancelled: closed",
"NumberTotalRows": 4064,
"NumberLoadedRows": 4064,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 12647960,
"LoadTimeMs": 5348,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 7,
"ReadDataTimeMs": 160,
"WriteDataTimeMs": 3276,
"CommitAndPublishTimeMs": 0
}
at org.apache.doris.spark.load.DorisStreamLoad.load(DorisStreamLoad.java:216)
at org.apache.doris.spark.load.DorisStreamLoad.loadV2(DorisStreamLoad.java:174)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$1.apply$mcV$sp(DorisWriter.scala:69)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$1.apply(DorisWriter.scala:69)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$1.apply(DorisWriter.scala:69)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.doris.spark.sql.Utils$.retry(Utils.scala:170)
at org.apache.doris.spark.writer.DorisWriter.org$apache$doris$spark$writer$DorisWriter$$flush$1(DorisWriter.scala:68)
... 18 more
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 7.0 failed 4 times, most recent failure: Lost task 2.3 in stage 7.0 (TID 160, emr-worker-2, executor 2): java.io.IOException: Failed to load batch data on BE: http://192.168.30.254:8040/api/imp_db/imp_base_target/_stream_load? node and exceeded the max 1 retry times.
at org.apache.doris.spark.writer.DorisWriter.org$apache$doris$spark$writer$DorisWriter$$flush$1(DorisWriter.scala:73)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2$$anonfun$apply$2.apply(DorisWriter.scala:60)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2$$anonfun$apply$2.apply(DorisWriter.scala:60)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2.apply(DorisWriter.scala:60)
at org.apache.doris.spark.writer.DorisWriter$$anonfun$write$2.apply(DorisWriter.scala:57)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:980)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:980)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.doris.spark.exception.StreamLoadException: stream load error: status: 200, resp msg: OK, resp content: {
"TxnId": 4711584,
"Label": "spark_streamload_20240712_094136_5ce42aa44e4444d785344fd72707def3",
"Comment": "",
"TwoPhaseCommit": "false",
"Status": "Fail",
"Message": "[INTERNAL_ERROR]cancelled: [CANCELLED][END_OF_FILE]Encountered unqualified data, stop processing",
"NumberTotalRows": 4064,
"NumberLoadedRows": 4064,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 12643936,
"LoadTimeMs": 11414,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 9,
"ReadDataTimeMs": 140,
"WriteDataTimeMs": 7576,
"CommitAndPublishTimeMs": 0
}