be 节点down之后无法正常拉起

Viewed 28

写数据时所有be节点全部down。之后
be 节点之后也无法正常拉起,拿到coredump文件。倒排索引部分的方法栈很深,顶端调用栈重复,截取了底端部分,如下所示,

#65229 0x0000559331ae83b7 in lucene::index::SDocumentsWriter<char>::ThreadState::quickSort(lucene::index::SDocumentsWriter<char>::Posting**, int, int) ()
#65230 0x0000559331ae83b7 in lucene::index::SDocumentsWriter<char>::ThreadState::quickSort(lucene::index::SDocumentsWriter<char>::Posting**, int, int) ()
#65231 0x0000559331ae7e77 in lucene::index::SDocumentsWriter<char>::ThreadState::FieldData::sortPostings() ()
#65232 0x0000559331aee4ce in lucene::index::SDocumentsWriter<char>::appendPostings(lucene::util::ArrayBase<lucene::index::SDocumentsWriter<char>::ThreadState::FieldData*>*, lucene::index::STermInfosWriter<char>*, lucene::store::IndexOutput*, lucene::store::IndexOutput*) ()
#65233 0x0000559331aed7f6 in lucene::index::SDocumentsWriter<char>::writeSegment(std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >&) ()
#65234 0x0000559331aece5f in lucene::index::SDocumentsWriter<char>::flush(bool) ()
#65235 0x0000559331ade7fd in lucene::index::IndexWriter::doFlush(bool) ()
#65236 0x0000559331ad250b in lucene::index::IndexWriter::flush(bool, bool) ()
#65237 0x0000559331ad1da7 in lucene::index::IndexWriter::closeInternal(bool) ()
#65238 0x0000559331ad1afd in lucene::index::IndexWriter::close(bool) ()
#65239 0x00005593243b1a84 in doris::segment_v2::InvertedIndexColumnWriterImpl<(doris::FieldType)17>::finish() ()
#65240 0x0000559324381d2c in doris::segment_v2::ArrayColumnWriter::write_inverted_index() ()
#65241 0x00005593243762cd in doris::segment_v2::SegmentWriter::finalize_columns_index(unsigned long*) ()
#65242 0x00005593243ea953 in doris::VerticalBetaRowsetWriter<doris::BetaRowsetWriter>::_flush_columns(doris::segment_v2::SegmentWriter*, bool) ()
#65243 0x00005593243e9379 in doris::VerticalBetaRowsetWriter<doris::BetaRowsetWriter>::flush_columns(bool) ()
#65244 0x00005593241c617f in doris::Merger::vertical_compact_one_group(std::shared_ptr<doris::BaseTablet>, doris::ReaderType, doris::TabletSchema const&, bool, std::vector<unsigned int, std::allocator<unsigned int> > const&, doris::vectorized::RowSourcesBuffer*, std::vector<std::shared_ptr<doris::RowsetReader>, std::allocator<std::shared_ptr<doris::RowsetReader> > > const&, doris::RowsetWriter*, long, doris::Merger::Statistics*, std::vector<unsigned int, std::allocator<unsigned int> >, long, doris::CompactionSampleInfo*) ()
#65245 0x00005593241c7930 in doris::Merger::vertical_merge_rowsets(std::shared_ptr<doris::BaseTablet>, doris::ReaderType, doris::TabletSchema const&, std::vector<std::shared_ptr<doris::RowsetReader>, std::allocator<std::shared_ptr<doris::RowsetReader> > > const&, doris::RowsetWriter*, long, long, doris::Merger::Statistics*) ()
#65246 0x00005593241a91d9 in doris::Compaction::merge_input_rowsets() ()
#65247 0x00005593241aec90 in doris::CompactionMixin::execute_compact_impl(long) ()
#65248 0x00005593241ae27b in doris::CompactionMixin::execute_compact() ()
#65249 0x000055932449c9d1 in doris::CumulativeCompaction::execute_compact() ()
#65250 0x0000559324488ecf in doris::Tablet::execute_compaction(doris::CompactionMixin&) ()
#65251 0x000055932414a8d3 in std::_Function_handler<void (), doris::StorageEngine::_submit_compaction_task(std::shared_ptr<doris::Tablet>, doris::CompactionType, bool)::$_0>::_M_invoke(std::_Any_data const&) ()
#65252 0x0000559324953428 in doris::ThreadPool::dispatch_thread() ()
#65253 0x00005593249483a1 in doris::Thread::supervise_thread(void*) ()
#65254 0x00007fbf2ec87ea5 in start_thread () from /lib64/libpthread.so.0
#65255 0x00007fbf2f6b6b0d in clone () from /lib64/libc.so.6
2 Answers

在社区大佬的建议下,给be设置 inverted_index_compaction_enable = true,之后be可正常拉起

内部在跟进中了,后续有结果会及时同步到帖子上。