代码:if name == "main":
# 创建 Spark 会话
spark = (SparkSession.builder.appName("test")
.enableHiveSupport().getOrCreate())
DT = sys.argv[1]
exec_sql_num = '''
select count(*) from hungry_studio.dwd_block_blast_ios_white_event_unique_data_hi where dt = '{DT}';
'''.format(DT=DT)
df = spark.sql(exec_sql_num)
df.show(1)
exec_sql = '''
select * from hungry_studio.dwd_block_blast_ios_white_event_unique_data_hi where dt = '{DT}';
'''.format(DT=DT)
df = spark.sql(exec_sql)
df.show(10)
(df.write.format("doris")
.option("doris.table.identifier", "test.dwd_block_blast_ios_white_event_unique_data_hi")
.option("doris.fenodes", "172.31.39.89:8030")
.option("doris.query.port", "8030")
.option("user", "root")
.option("password","123456")
.option("doris.database", "test")
.mode("append")
.save())
# 关闭 Spark 会话
spark.stop()
报错信息:
File "block_blast_ios_block_action_block_di_for_doris.py", line 40, in
.mode("append")
File "/mnt4/yarn/usercache/hadoop/appcache/application_1730687298271_22556/container_1730687298271_22556_02_000001/pyspark.zip/pyspark/sql/readwriter.py", line 1396, in save
File "/mnt4/yarn/usercache/hadoop/appcache/application_1730687298271_22556/container_1730687298271_22556_02_000001/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1323, in call
File "/mnt4/yarn/usercache/hadoop/appcache/application_1730687298271_22556/container_1730687298271_22556_02_000001/pyspark.zip/pyspark/errors/exceptions/captured.py", line 169, in deco
File "/mnt4/yarn/usercache/hadoop/appcache/application_1730687298271_22556/container_1730687298271_22556_02_000001/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o101.save.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 108 in stage 5.0 failed 4 times, most recent failure: Lost task 108.3 in stage 5.0 (TID 23125) (ip-10-0-1-167.us-east-2.compute.internal executor 75): org.apache.doris.spark.exception.StreamLoadException: failed to load data on http://172.31.39.89:8030/api/test/dwd_block_blast_ios_white_event_unique_data_hi/_stream_load
at org.apache.doris.spark.load.StreamLoader.load(StreamLoader.scala:110)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$write$1(DorisWriter.scala:78)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$4(DorisWriter.scala:98)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.sql.Utils$.retry(Utils.scala:182)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$3(DorisWriter.scala:97)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1(DorisWriter.scala:98)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1$adapted(DorisWriter.scala:94)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1009)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1009)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2311)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
at org.apache.doris.shaded.org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
at org.apache.doris.shaded.org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at org.apache.doris.shaded.org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at org.apache.doris.shaded.org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at org.apache.doris.shaded.org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at org.apache.doris.shaded.org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at org.apache.doris.shaded.org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.doris.shaded.org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at org.apache.doris.shaded.org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.doris.shaded.org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.doris.shaded.org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.doris.shaded.org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.doris.shaded.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.doris.shaded.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at org.apache.doris.spark.load.StreamLoader.$anonfun$load$1(StreamLoader.scala:101)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.load.StreamLoader.load(StreamLoader.scala:99)
... 19 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2974)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2910)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2909)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2909)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1263)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1263)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1263)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3173)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3112)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3101)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1028)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2271)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2292)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2311)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2336)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$1(RDD.scala:1009)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:405)
at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:1007)
at org.apache.doris.spark.writer.DorisWriter.doWrite(DorisWriter.scala:94)
at org.apache.doris.spark.writer.DorisWriter.write(DorisWriter.scala:78)
at org.apache.doris.spark.sql.DorisSourceProvider.createRelation(DorisSourceProvider.scala:75)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:104)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:250)
at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:123)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$9(SQLExecution.scala:160)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:250)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$8(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:271)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:159)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:69)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:101)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:97)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:554)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:107)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:554)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:530)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:97)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:84)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:82)
at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:856)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:387)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:360)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:247)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.lang.Thread.run(Thread.java:750)
Caused by: org.apache.doris.spark.exception.StreamLoadException: failed to load data on http://172.31.39.89:8030/api/test/dwd_block_blast_ios_white_event_unique_data_hi/_stream_load
at org.apache.doris.spark.load.StreamLoader.load(StreamLoader.scala:110)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$write$1(DorisWriter.scala:78)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$4(DorisWriter.scala:98)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.sql.Utils$.retry(Utils.scala:182)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$3(DorisWriter.scala:97)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1(DorisWriter.scala:98)
at org.apache.doris.spark.writer.DorisWriter.$anonfun$doWrite$1$adapted(DorisWriter.scala:94)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:1009)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:1009)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2311)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:563)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:566)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at org.apache.doris.shaded.org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
at org.apache.doris.shaded.org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
at org.apache.doris.shaded.org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at org.apache.doris.shaded.org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at org.apache.doris.shaded.org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at org.apache.doris.shaded.org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at org.apache.doris.shaded.org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at org.apache.doris.shaded.org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.doris.shaded.org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at org.apache.doris.shaded.org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at org.apache.doris.shaded.org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89)
at org.apache.doris.shaded.org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.doris.shaded.org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at org.apache.doris.shaded.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.doris.shaded.org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108)
at org.apache.doris.spark.load.StreamLoader.$anonfun$load$1(StreamLoader.scala:101)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.doris.spark.load.StreamLoader.load(StreamLoader.scala:99)
... 19 more