【已记录】1.2.7.1版本,使用欧拉系统总是报RPC超时,如何解决?

Viewed 116

欧拉系统版本:

Operating System: openEuler 22.03 (LTS-SP1)
Kernel: Linux 5.10.0-136.12.0.86.oe2203sp1.x86_64
Architecture: x86-64

日志:

2024-04-23 10:58:12,304 WARN (mysql-nio-pool-15|312) [ConnectProcessor.processOnce():693] Null packet received from network. remote: 172.20.195.223:55229
2024-04-23 10:58:12,305 WARN (mysql-nio-pool-15|312) [ReadListener.lambda$handleEvent$0():60] Exception happened in one session(org.apache.doris.qe.ConnectContext@3d74cd4a).
java.io.IOException: Error happened when receiving packet.
at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:694) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_352]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_352]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_352]
2024-04-23 10:58:17,486 WARN (mysql-nio-pool-17|399) [StmtExecutor.execute():593] execute Exception. stmt[302, 74221f5faca041d2-9effcb6940fe3ef5]
org.apache.doris.rpc.RpcException: timeout when waiting for send fragments RPC. Wait(sec): 5, host: 172.19.1.116
at org.apache.doris.qe.Coordinator.waitRpc(Coordinator.java:750) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.Coordinator.sendFragment(Coordinator.java:678) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.Coordinator.exec(Coordinator.java:553) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.sendResult(StmtExecutor.java:1151) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.handleQueryStmt(StmtExecutor.java:1131) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:522) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:409) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:333) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.dispatch(ConnectProcessor.java:476) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:703) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_352]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_352]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_352]
Caused by: java.util.concurrent.TimeoutException: Waited 5000 milliseconds (plus 98175 nanoseconds delay) for io.grpc.stub.ClientCalls$GrpcFuture@4311e0d6[status=PENDING, info=[GrpcFuture{clientCall={delegate=ClientCallImpl{method=MethodDescriptor{fullMethodName=doris.PBackendService/exec_plan_fragment, type=UNARY, idempotent=false, safe=false, sampledToLocalTracing=true, requestMarshaller=io.grpc.protobuf.lite.ProtoLiteUtils$MessageMarshaller@4723dc3, responseMarshaller=io.grpc.protobuf.lite.ProtoLiteUtils$MessageMarshaller@70740fd6, schemaDescriptor=org.apache.doris.proto.PBackendServiceGrpc$PBackendServiceMethodDescriptorSupplier@5774233b}}}}]]
at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:508) ~[guava-30.0-jre.jar:?]
at org.apache.doris.qe.Coordinator.waitRpc(Coordinator.java:717) ~[doris-fe.jar:1.2-SNAPSHOT]
... 13 more
2024-04-23 10:58:17,490 WARN (mysql-nio-pool-17|399) [ConnectProcessor.processOnce():693] Null packet received from network. remote: 172.20.195.223:55240
2024-04-23 10:58:17,490 WARN (mysql-nio-pool-17|399) [ReadListener.lambda$handleEvent$0():60] Exception happened in one session(org.apache.doris.qe.ConnectContext@48aaad49).
java.io.IOException: Error happened when receiving packet.
at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:694) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_352]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_352]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_352]
2024-04-23 10:58:22,514 WARN (mysql-nio-pool-17|399) [StmtExecutor.execute():593] execute Exception. stmt[303, 8580cf2ea5a04e55-b66665301fd7f31f]
org.apache.doris.rpc.RpcException: timeout when waiting for send fragments RPC. Wait(sec): 5, host: 172.19.1.115
at org.apache.doris.qe.Coordinator.waitRpc(Coordinator.java:750) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.Coordinator.sendFragment(Coordinator.java:678) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.Coordinator.exec(Coordinator.java:553) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.sendResult(StmtExecutor.java:1151) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.handleQueryStmt(StmtExecutor.java:1131) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:522) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:409) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:333) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.dispatch(ConnectProcessor.java:476) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:703) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_352]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_352]
at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_352]
Caused by: java.util.concurrent.TimeoutException: Waited 5000 milliseconds (plus 3 milliseconds, 312099 nanoseconds delay) for io.grpc.stub.ClientCalls$GrpcFuture@4161032d[status=PENDING, info=[GrpcFuture{clientCall={delegate=ClientCallImpl{method=MethodDescriptor{fullMethodName=doris.PBackendService/exec_plan_fragment, type=UNARY, idempotent=false, safe=false, sampledToLocalTracing=true, requestMarshaller=io.grpc.protobuf.lite.ProtoLiteUtils$MessageMarshaller@4723dc3, responseMarshaller=io.grpc.protobuf.lite.ProtoLiteUtils$MessageMarshaller@70740fd6, schemaDescriptor=org.apache.doris.proto.PBackendServiceGrpc$PBackendServiceMethodDescriptorSupplier@5774233b}}}}]]
at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:508) ~[guava-30.0-jre.jar:?]
at org.apache.doris.qe.Coordinator.waitRpc(Coordinator.java:717) ~[doris-fe.jar:1.2-SNAPSHOT]
... 13 more

2 Answers

操作系统和欧拉的版本麻烦贴下看看的,我们之前遇到过一个欧拉系统上的问题,怀疑可能 brpc 跟某个欧拉版本适配有问题。

我也碰到了这个问题,插眼跟踪~1