查询 Iceberg 导致 BE 挂掉

Viewed 11

看起来可能是查询 iceberg 时 position delete 的问题,但用 trino 查询都是正常的,很奇怪,而且也不是每次必现,80% 概率吧,随机挂几个BE

3 FE 5 BE, Version 2.1.4
BE limit cpu:32 Memory 256G
Openshift(K8s) 部署

SQL

insert into ads.ads_mes_cn_fassy_screw_dc_i_h
select
   SFC       ,
   OPERATION ,
   RESRCE    ,
   MATERIAL  ,
   TRANSACTION_ID ,
   SITE           ,
   P_DAY            ,
   SE              ,
   START_TIME ,
   RUN_TIME ,
   AST_CONTROLLER ,
   STEP1_ANGLE_MEA_VAL ,
   STEP1_TORQUE_MEA_VAL ,
   STEP2_ANGLE_MEA_VAL ,
   STEP2_TORQUE_MEA_VAL ,
   STEP3_ANGLE_MEA_VAL ,
   STEP3_TORQUE_MEA_VAL ,
   SCREW_MATERIAL_NUMBER ,
   UNIQUE_ID ,
   TASK_STEP ,
   TIMESTAMP_TS,
   VARIANT ,
   AST_PROGRAM ,
   RESULT_ ,
   AST_LAST_STEP ,
   P_HOUR,
   now(),
   now()
from iceberg_s3.ads.ads_mes_cn_fassy_screw_dc_i_h
where  P_HOUR = '2025-01-09-11';

日志:

RuntimeLogger W20250116 14:49:05.761315 2364 runtime_state.cpp:547] registe global ins:Fragment ac67127bd7754538-9285de9f200adbec ,mgr: 0x7efa0de77480 ,filter id:1
RuntimeLogger W20250116 14:49:05.801230 2437 runtime_state.cpp:547] registe global ins:Fragment d6ec01ce40c74d7b-84b5e5a78f6a4219 ,mgr: 0x7efb9658d8c0 ,filter id:2
RuntimeLogger W20250116 14:49:05.801282 2437 runtime_state.cpp:547] registe global ins:Fragment d6ec01ce40c74d7b-84b5e5a78f6a4219 ,mgr: 0x7efb9658d8c0 ,filter id:3
RuntimeLogger W20250116 14:49:06.041474 2452 runtime_state.cpp:547] registe global ins:Fragment 4de9708bdebf4b40-8e8a9f9041ae5801 ,mgr: 0x7efa0d635700 ,filter id:2
RuntimeLogger W20250116 14:49:06.041523 2452 runtime_state.cpp:547] registe global ins:Fragment 4de9708bdebf4b40-8e8a9f9041ae5801 ,mgr: 0x7efa0d635700 ,filter id:3
RuntimeLogger W20250116 14:49:06.042340 2471 runtime_state.cpp:547] registe global ins:Fragment 94b74721388e401e-8535d3bf19b91a20 ,mgr: 0x7ef9f97bf6c0 ,filter id:2
RuntimeLogger W20250116 14:49:06.042392 2471 runtime_state.cpp:547] registe global ins:Fragment 94b74721388e401e-8535d3bf19b91a20 ,mgr: 0x7ef9f97bf6c0 ,filter id:3
RuntimeLogger W20250116 14:49:09.010582 2547 input_messenger.cpp:362] Fail to read from Socket{id=8589960819 fd=5090 addr=172.51.20.179:43082:8060} (0x7efb81e85780): Connection reset by peer [104]
RuntimeLogger W20250116 14:49:09.015839 2490 input_messenger.cpp:362] Fail to read from Socket{id=26444 fd=5102 addr=172.51.17.60:47510:8060} (0x7ef9d0df8540): Connection reset by peer [104]
*** Query id: f58ccb2e25a44a51-b59aa44277981554 ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1737010152 (unix time) try "date -d @1737010152" if you are using GNU date ***
*** Current BE git commitID: e93678fd1e ***
*** SIGSEGV unknown detail explain (@0x0) received by PID 779 (TID 3411 OR 0x7efd341b3700) from PID 0; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_release/doris/be/src/common/signal_handler.h:421
1# os::Linux::chained_handler(int, siginfo*, void*) in /usr/local/openjdk-8/jre/lib/amd64/server/libjvm.so
2# JVM_handle_linux_signal in /usr/local/openjdk-8/jre/lib/amd64/server/libjvm.so
3# signalHandler(int, siginfo*, void*) in /usr/local/openjdk-8/jre/lib/amd64/server/libjvm.so
4# 0x00007F047E18CD60 in /lib/x86_64-linux-gnu/libc.so.6
5# memcpy at /home/zcp/repo_center/doris_release/doris/be/src/glibc-compatibility/memcpy/memcpy_x86_64.cpp:219
6# doris::vectorized::IcebergTableReader::_position_delete_base(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<doris::TIcebergDeleteFileDesc, std::allocator<doris::TIcebergDeleteFileDesc> > const&) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/format/table/iceberg_reader.cpp:343
7# doris::vectorized::IcebergTableReader::init_row_filters(doris::TFileRangeDesc const&) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/format/table/iceberg_reader.cpp:183
8# doris::vectorized::IcebergParquetReader::init_reader(std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::unordered_map<int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<int>, std::equal_to<int>, std::allocator<std::pair<int const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::variant<doris::ColumnValueRange<(doris::PrimitiveType)3>, doris::ColumnValueRange<(doris::PrimitiveType)4>, doris::ColumnValueRange<(doris::PrimitiveType)5>, doris::ColumnValueRange<(doris::PrimitiveType)6>, doris::ColumnValueRange<(doris::PrimitiveType)7>, doris::ColumnValueRange<(doris::PrimitiveType)36>, doris::ColumnValueRange<(doris::PrimitiveType)37>, doris::ColumnValueRange<(dor...
9# doris::vectorized::VFileScanner::_get_next_reader() at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/vfile_scanner.cpp:854
10# doris::vectorized::VFileScanner::_get_block_wrapped(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/vfile_scanner.cpp:334
11# doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/vfile_scanner.cpp:300
12# doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) in /opt/apache-doris/be/lib/doris_be
13# doris::vectorized::VScanner::get_block_after_projects(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/vscanner.cpp:96
14# doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:258
15# std::_Function_handler<void (), doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291
16# doris::ThreadPool::dispatch_thread() in /opt/apache-doris/be/lib/doris_be
17# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_release/doris/be/src/util/thread.cpp:499
18# start_thread in /lib/x86_64-linux-gnu/libpthread.so.0
19# __clone in /lib/x86_64-linux-gnu/libc.so.6
0 Answers