采用stream load方式导入数据39W条记录导入了41s是快还是慢

Viewed 35

8be节点,3fe节点,doris版本:2.1.6 ssd磁盘
建表语句:
CREATE TABLE IF NOT EXISTS access_log_test003 (
date DATETIME,
http_host VARCHAR(65533),
status INT,
request_uri VARCHAR(65533),
url VARCHAR(65533),
method VARCHAR(20),
timestamp BIGINT,
ip VARCHAR(65533),
OriginalTime VARCHAR(100),
ServerIp VARCHAR(65533),
clientRealIp VARCHAR(65533),
size BIGINT,
responsetime DOUBLE,
upstreamhost VARCHAR(65533),
upstreamtime DOUBLE,
upstream_connect_time DOUBLE,
upstream_header_time DOUBLE,
referrer VARCHAR(65533),
user_agent VARCHAR(65533),
clusterName VARCHAR(65533),
route_id VARCHAR(65533),
route_name VARCHAR(65533),
service_id VARCHAR(65533),
service_name VARCHAR(65533),
collectTime VARCHAR(100),
id_key BIGINT
)
DUPLICATE KEY(date, http_host, status, request_uri)
PARTITION BY RANGE (date) ()
DISTRIBUTED BY HASH(id_key) BUCKETS 128 -- 假设集群有多个BE节点
PROPERTIES (
"compaction_policy" = "time_series", -- 时序类型数据
"replication_num" = "3", -- 根据集群规模调整副本数
"in_memory" = "false", -- 根据实际情况决定是否启用内存缓存
"storage_format" = "V2",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "HOUR",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.start" = "-72", -- 从三天前开始创建新分区
"dynamic_partition.end" = "24", -- 预先创建24小时的分区
"dynamic_partition.prefix" = "pt003", -- 分区前缀
"dynamic_partition.buckets" = "256",
"enable_single_replica_compaction" = "true", -- 使用单副本 Compaction
"dynamic_partition.retain_range_partition_count" = "3", -- 保留24小时的范围分区
"disable_auto_compaction" = "false" -- 不禁用自动压缩
);

stream load导入数据返回结果
{
"TxnId": 101472,
"Label": "doris_test_load_job_20241012_104105_30_536",
"Comment": "",
"TwoPhaseCommit": "true",
"Status": "Success",
"Message": "OK",
"NumberTotalRows": 390030,
"NumberLoadedRows": 390030,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 490055750,
"LoadTimeMs": 41112,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 41,
"ReadDataTimeMs": 1047,
"WriteDataTimeMs": 41061,
"CommitAndPublishTimeMs": 0
}

请问是快还是慢?
有没有可优化的空间

1 Answers

数据大概多大呢,机器什么配置,cpu/mem/io是否达到瓶颈。