服务器断电后某一台FE启动失败

Viewed 42

doris集群 3个FE,3个BE,某次机房停电后集群重启,3个BE运行正常,FE有一台启动一直报错:
2024-10-22 16:23:01,638 WARN (InternalSchemaInitializer|184) [InternalSchemaInitializer.run():82] Statistics storage initiated failed, will try again later
org.apache.doris.common.DdlException: errCode = 2, detailMessage = System has no available disk capacity or no available BE nodes
at org.apache.doris.system.SystemInfoService.checkAvailableCapacity(SystemInfoService.java:820) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.datasource.InternalCatalog.createTable(InternalCatalog.java:1123) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.catalog.InternalSchemaInitializer.createTbl(InternalSchemaInitializer.java:160) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.catalog.InternalSchemaInitializer.run(InternalSchemaInitializer.java:80) ~[doris-fe.jar:1.2-SNAPSHOT]
g2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] 2024-10-22 16:23:05,520 ERROR server.TThreadPoolServer: Thrift Error occurred during processing of message.
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] org.apache.thrift.protocol.TProtocolException: Required field 'version' was not present! Struct: TFrontendPingFrontendResult(status:FAILED, msg:invalid cluster id: 704537512, queryPort:0, rpcPort:0, replayedJournalId:0, version:null)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.doris.thrift.TFrontendPingFrontendResult.validate(TFrontendPingFrontendResult.java:1017)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.doris.thrift.FrontendService$ping_result.validate(FrontendService.java:31917)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.doris.thrift.FrontendService$ping_result$ping_resultStandardScheme.write(FrontendService.java:31976)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.doris.thrift.FrontendService$ping_result$ping_resultStandardScheme.write(FrontendService.java:31943)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.doris.thrift.FrontendService$ping_result.write(FrontendService.java:31894)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:58)
2024-10-22 16:23:05,521 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38)
2024-10-22 16:23:05,522 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:250)
2024-10-22 16:23:05,522 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
2024-10-22 16:23:05,522 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
2024-10-22 16:23:05,522 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.lang.Thread.run(Thread.java:745)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] 2024-10-22 16:23:05,522 WARN transport.TIOStreamTransport: Error closing output stream.
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] java.net.SocketException: Socket closed
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:116)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.net.SocketOutputStream.write(SocketOutputStream.java:153)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.io.FilterOutputStream.close(FilterOutputStream.java:158)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.transport.TIOStreamTransport.close(TIOStreamTransport.java:157)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.transport.TSocket.close(TSocket.java:264)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:267)
2024-10-22 16:23:05,523 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
2024-10-22 16:23:05,524 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
2024-10-22 16:23:05,524 ERROR (thrift-server-pool-0|204) [StreamEncoder.writeBytes():221] at java.lang.Thread.run(Thread.java:745)
2024-10-22 16:23:05,851 INFO (colocate group clone checker|95) [ColocateTableCheckerAndBalancer.matchGroups():586] finished to check tablets. unhealth/total/added/in_sched/not_ready: 0/0/0/0/0, cost: 0 ms
2024-10-22 16:23:05,857 INFO (tablet checker|45) [TabletChecker.checkTablets():347] finished to check tablets. unhealth/total/added/in_sched/not_ready: 0/0/0/0/0, cost: 0 ms
2024-10-22 16:23:05,886 INFO (InsertOverwriteDropDirtyPartitions|66) [InsertOverwriteManager.runAfterCatalogReady():260] start clean insert overwrite temp partitions
2024-10-22 16:23:06,638 INFO (InternalSchemaInitializer|184) [InternalCatalog.createDb():425] create database[__internal_schema] which already exists
2024-10-22 16:23:06,639 WARN (InternalSchemaInitializer|184) [InternalSchemaInitializer.run():82] Statistics storage initiated failed, will try again later
org.apache.doris.common.DdlException: errCode = 2, detailMessage = System has no available disk capacity or no available BE nodes
at org.apache.doris.system.SystemInfoService.checkAvailableCapacity(SystemInfoService.java:820) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.datasource.InternalCatalog.createTable(InternalCatalog.java:1123) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.catalog.InternalSchemaInitializer.createTbl(InternalSchemaInitializer.java:160) ~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.catalog.InternalSchemaInitializer.run(InternalSchemaInitializer.java:80) ~[doris-fe.jar:1.2-SNAPSHOT]

使用MYSQL命令查看frontends和backend状态如下:

1 Answers

1.看一下集群资源
2.检查FE与BE之间的网络连通性
从日志来看,FE没有获取到一个初始化的存储资源信息和TFrontendPingFrontendResult的Version信息