同一个表 Routine Load 写入50万行/秒,Stream Load只有10万行/秒,差了5倍
建表SQL如下:
CREATE TABLE `log_test` (
`timestamp` DATETIME(3) NOT NULL,
`sourcetype` VARCHAR(1024) NULL,
`hostname` VARCHAR(1024) NULL,
`ip` VARCHAR(1024) NULL,
`message` TEXT NULL,
`path` VARCHAR(1024) NULL,
`rownumber` BIGINT NULL,
`seq` BIGINT NULL,
`filehashkey` VARCHAR(1024) NULL,
`collectiontime` DATETIME(3) NULL,
`storagetime` DATETIME(3) NULL DEFAULT CURRENT_TIMESTAMP,
`id` VARCHAR(1024) NULL,
`dataset` VARCHAR(1024) NOT NULL,
`field` VARIANT NULL,
INDEX idx_message_unicode (`message`) USING INVERTED PROPERTIES("parser" = "unicode"),
INDEX idx_message_ngrambf (`message`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "3")
) ENGINE=OLAP
DUPLICATE KEY(`timestamp`, `sourcetype`)
AUTO PARTITION BY RANGE (date_trunc(`timestamp`, 'day')) ()
DISTRIBUTED BY RANDOM BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "1",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "true",
"skip_write_index_on_load" = "true"
);
Stream Load 任务:
curl --location-trusted -u root: -H "Expect:100-continue" -H "format:json" -H "read_json_by_line:true" -H "fuzzy_parse:true" -T $file -XPUT http://192.168.101.94:58030/api/demo/log_test/_stream_load