通过 Java 程序调用 stream load 导入数据,在一批次数据量较大时,可能会报错 Broken Pipe

Viewed 35

版本:doris-3.0.1
2pc:开启

写入差不多4000万时就报错,报错内容如下:

2024-09-29 14:52:58,352 INFO  org.apache.http.impl.execchain.RetryExec - I/O exception (java.net.SocketException) caught when processing request to {}->http://10.xxx.xxx.xxx:8040: Broken pipe (Write failed)
2024-09-29 14:53:01,267 ERROR org.apache.seatunnel.connectors.doris.sink.writer.DorisStreamLoad - loadFailed
java.util.concurrent.ExecutionException: org.apache.http.client.ClientProtocolException
at java.util.concurrent.FutureTask.report(FutureTask.java:122) ~[?:?]
at java.util.concurrent.FutureTask.get(FutureTask.java:191) ~[?:?]
at org.apache.seatunnel.connectors.doris.sink.writer.DorisStreamLoad.getLoadFailedMsg(DorisStreamLoad.java:210) ~[?:?]
at org.apache.seatunnel.connectors.doris.sink.writer.DorisSinkWriter.checkDone(DorisSinkWriter.java:195) ~[?:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:305) ~[?:?]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:305) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: org.apache.http.client.ClientProtocolException
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:187) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.seatunnel.connectors.doris.sink.writer.DorisStreamLoad.lambda$startStreamLoad$0(DorisStreamLoad.java:282) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
... 3 more
Caused by: org.apache.http.client.NonRepeatableRequestException: Cannot retry request with a non-repeatable request entity
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:108) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.seatunnel.connectors.doris.sink.writer.DorisStreamLoad.lambda$startStreamLoad$0(DorisStreamLoad.java:282) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
... 3 more
Caused by: java.net.SocketException: Broken pipe (Write failed)
at java.net.SocketOutputStream.socketWrite0(Native Method) ~[?:?]
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:110) ~[?:?]
at java.net.SocketOutputStream.write(SocketOutputStream.java:150) ~[?:?]
at org.apache.http.impl.io.SessionOutputBufferImpl.streamWrite(SessionOutputBufferImpl.java:126) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.io.SessionOutputBufferImpl.flushBuffer(SessionOutputBufferImpl.java:138) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.io.SessionOutputBufferImpl.write(SessionOutputBufferImpl.java:169) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.io.ChunkedOutputStream.flushCacheWithAppend(ChunkedOutputStream.java:124) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.io.ChunkedOutputStream.write(ChunkedOutputStream.java:181) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.entity.InputStreamEntity.writeTo(InputStreamEntity.java:136) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.RequestEntityProxy.writeTo(RequestEntityProxy.java:121) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.DefaultBHttpClientConnection.sendRequestEntity(DefaultBHttpClientConnection.java:158) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.conn.CPoolProxy.sendRequestEntity(CPoolProxy.java:152) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.protocol.HttpRequestExecutor.doSendRequest(HttpRequestExecutor.java:237) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:122) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:108) ~[seatunnel-hadoop3-3.1.4-uber-2.3.4-geega1.0.0-java11-SNAPSHOT-optional.jar:2.3.4-geega1.0.0-java11-SNAPSHOT]
at org.apache.seatunnel.connectors.doris.sink.writer.DorisStreamLoad.lambda$startStreamLoad$0(DorisStreamLoad.java:282) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
0 Answers