执行查询,BE节点掉点

Viewed 50

Doris版本1.1.1-rc03

执行如下SQL后,报错

SELECT
    DATE_FORMAT(
        IF(DAYOFWEEK('2024-08-08') = 1,
           DATE_SUB('2024-08-08', INTERVAL 13 DAY),
           DATE_SUB('2024-08-08', INTERVAL (DAYOFWEEK('2024-08-08') + 5) DAY)
        ),
        '%Y-%m-%d'
    ) AS start_date,
    DATE_FORMAT(
        IF(DAYOFWEEK('2024-08-08') = 1,
           DATE_SUB('2024-08-08', INTERVAL 7 DAY),
           DATE_SUB('2024-08-08', INTERVAL (DAYOFWEEK('2024-08-08') - 1) DAY)
        ),
        '%Y-%m-%d'
    ) AS end_date;

报错信息

org.jkiss.dbeaver.model.sql.DBSQLException: SQL 错误 [1105] [HY000]: RpcException, msg: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
	at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.executeStatement(JDBCStatementImpl.java:134)
	at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.executeStatement(SQLQueryJob.java:475)
	at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.lambda$0(SQLQueryJob.java:414)
	at org.jkiss.dbeaver.model.exec.DBExecUtils.tryExecuteRecover(DBExecUtils.java:152)
	at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.executeSingleQuery(SQLQueryJob.java:406)
	at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.extractData(SQLQueryJob.java:765)
	at org.jkiss.dbeaver.ui.editors.sql.SQLEditor$QueryResultsContainer.readData(SQLEditor.java:2848)
	at org.jkiss.dbeaver.ui.controls.resultset.ResultSetJobDataRead.lambda$0(ResultSetJobDataRead.java:99)
	at org.jkiss.dbeaver.model.exec.DBExecUtils.tryExecuteRecover(DBExecUtils.java:152)
	at org.jkiss.dbeaver.ui.controls.resultset.ResultSetJobDataRead.run(ResultSetJobDataRead.java:97)
	at org.jkiss.dbeaver.ui.controls.resultset.ResultSetViewer$17.run(ResultSetViewer.java:3429)
	at org.jkiss.dbeaver.model.runtime.AbstractJob.run(AbstractJob.java:103)
	at org.eclipse.core.internal.jobs.Worker.run(Worker.java:63)
Caused by: java.sql.SQLException: RpcException, msg: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
	at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:964)
	at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3973)
	at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3909)
	at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2527)
	at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2680)
	at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2483)
	at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2441)
	at com.mysql.jdbc.StatementImpl.executeInternal(StatementImpl.java:845)
	at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:745)
	at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.execute(JDBCStatementImpl.java:338)
	at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.executeStatement(JDBCStatementImpl.java:131)
	... 12 more

会有一台BE节点在执行完这条查询后宕机,且日志中无异常退出记录,查询是在15:32左右进行的。

I0808 09:40:38.202935 31335 env.cpp:46] Env init successfully.
*** Aborted at 1723102313 (unix time) try "date -d @1723102313" if you are using GNU date ***
*** SIGSEGV address not mapped to object (@0x0) received by PID 31335 (TID 0x7f9252b93700) from PID 0; stack trace: ***
 0# 0x0000562161F26768 in /opt/doris/be/lib/doris_be
 1# 0x00007F928A11C400 in /lib64/libc.so.6
 2# doris::vectorized::is_column_const(doris::vectorized::IColumn const&) in /opt/doris/be/lib/doris_be
 3# doris::vectorized::PreparedFunctionImpl::default_implementation_for_constant_arguments(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool, bool*) in /opt/doris/be/lib/doris_be
 4# doris::vectorized::PreparedFunctionImpl::execute(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool) in /opt/doris/be/lib/doris_be
 5# doris::vectorized::IFunctionBase::execute(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool) in /opt/doris/be/lib/doris_be
 6# doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) in /opt/doris/be/lib/doris_be
 7# doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) in /opt/doris/be/lib/doris_be
 8# doris::vectorized::VUnionNode::get_next_const(doris::RuntimeState*, doris::vectorized::Block*) in /opt/doris/be/lib/doris_be
 9# doris::vectorized::VUnionNode::get_next(doris::RuntimeState*, doris::vectorized::Block*, bool*) in /opt/doris/be/lib/doris_be
10# doris::PlanFragmentExecutor::get_vectorized_internal(doris::vectorized::Block**) in /opt/doris/be/lib/doris_be
11# doris::PlanFragmentExecutor::open_vectorized_internal() in /opt/doris/be/lib/doris_be
12# doris::PlanFragmentExecutor::open() in /opt/doris/be/lib/doris_be
13# doris::FragmentExecState::execute() in /opt/doris/be/lib/doris_be
14# doris::FragmentMgr::_exec_actual(std::shared_ptr<doris::FragmentExecState>, std::function<void (doris::PlanFragmentExecutor*)>) in /opt/doris/be/lib/doris_be
15# std::_Function_handler<void (), std::_Bind_result<void, void (doris::FragmentMgr::*(doris::FragmentMgr*, std::shared_ptr<doris::FragmentExecState>, std::function<void (doris::PlanFragmentExecutor*)>))(std::shared_ptr<doris::FragmentExecState>, std::function<void (doris::PlanFragmentExecutor*)>)> >::_M_invoke(std::_Any_data const&) in /opt/doris/be/lib/doris_be
16# doris::ThreadPool::dispatch_thread() in /opt/doris/be/lib/doris_be
17# doris::Thread::supervise_thread(void*) in /opt/doris/be/lib/doris_be
18# start_thread in /lib64/libpthread.so.0
19# __clone in /lib64/libc.so.6

start time: Thu Aug 8 15:34:08 CST 2024
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0808 15:34:08.435313 32471 env.cpp:46] Env init successfully.

FE日志

2024-08-08 15:31:54,134 WARN (doris-mysql-nio-pool-82795|2731020) [ResultReceiver.getNext():125] fetch result execution exception, finstId=hi: 1936819584305744859
lo: -8758279266432188426

java.util.concurrent.ExecutionException: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
	at com.google.common.util.concurrent.AbstractFuture.getDoneValue(AbstractFuture.java:564) ~[spark-dpp-1.0-SNAPSHOT.jar:1.0-SNAPSHOT]
	at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:443) ~[spark-dpp-1.0-SNAPSHOT.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ResultReceiver.getNext(ResultReceiver.java:79) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.Coordinator.getNext(Coordinator.java:844) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.sendResult(StmtExecutor.java:960) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.handleQueryStmt(StmtExecutor.java:938) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:392) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:306) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:212) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.dispatch(ConnectProcessor.java:349) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:538) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.mysql.nio.ReadListener.lambda$handleEvent$0(ReadListener.java:50) ~[doris-fe.jar:1.0-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_111]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_111]
	at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_111]
Caused by: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
	at io.grpc.Status.asRuntimeException(Status.java:533) ~[grpc-api-1.30.0.jar:1.30.0]
	at io.grpc.stub.ClientCalls$UnaryStreamToFuture.onClose(ClientCalls.java:515) ~[grpc-stub-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:426) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl.access$500(ClientCallImpl.java:66) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:689) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$900(ClientCallImpl.java:577) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:751) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:740) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) ~[grpc-core-1.30.0.jar:1.30.0]
	at io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) ~[grpc-core-1.30.0.jar:1.30.0]
	... 3 more
2024-08-08 15:31:54,136 WARN (doris-mysql-nio-pool-82795|2731020) [SimpleScheduler.addToBlacklist():196] add backend 10003 to black list. reason: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
2024-08-08 15:31:54,136 WARN (doris-mysql-nio-pool-82795|2731020) [Coordinator.getNext():846] get next fail, need cancel. query id: 1ae0f72674904bdb-8674576918a3fff5
2024-08-08 15:31:54,136 WARN (doris-mysql-nio-pool-82795|2731020) [Coordinator.updateStatus():828] one instance report fail throw updateStatus(), need cancel. job id: -1, query id: 1ae0f72674904bdb-8674576918a3fff5, instance id: NaN
2024-08-08 15:31:54,136 WARN (doris-mysql-nio-pool-82795|2731020) [StmtExecutor.execute():464] execute Exception. stmt[2014712, 1ae0f72674904bdb-8674576918a3fff5]
org.apache.doris.rpc.RpcException: io.grpc.StatusRuntimeException: UNAVAILABLE: Network closed for unknown reason
	at org.apache.doris.qe.Coordinator.getNext(Coordinator.java:863) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.sendResult(StmtExecutor.java:960) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.handleQueryStmt(StmtExecutor.java:938) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:392) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:306) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:212) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.dispatch(ConnectProcessor.java:349) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.qe.ConnectProcessor.processOnce(ConnectProcessor.java:538) ~[doris-fe.jar:1.0-SNAPSHOT]
	at org.apache.doris.mysql.nio.ReadListener.lambda$handleEvent$0(ReadListener.java:50) ~[doris-fe.jar:1.0-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_111]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_111]
	at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_111]

这条查询之前是可以正常使用的,今天开始出现这个问题。

1 Answers

这个问题应该已经是修了的,我看你的版本还是1.1,可以评估下,先升级一下。这个版本相对很老了,有不少问题都已经修了,而且这个版本也不维护了