doris 2.0.0 版本,有如下AGGREGATE表
- ddl如下:
CREATE TABLE `test_table` (
`k1` varchar(32) NOT NULL COMMENT '',
`k2` varchar(32) NOT NULL COMMENT '',
`k3` varchar(2) NOT NULL COMMENT '',
`k4` int(11) NULL COMMENT '数据日期',
`k5` varchar(8) NULL COMMENT '',
`k6` varchar(32) REPLACE NULL COMMENT '',
`v1` int(11) SUM NOT NULL COMMENT '',
`v2` double SUM NOT NULL COMMENT '',
`v3` int(11) SUM NULL COMMENT '',
`v4` tinyint(4) SUM NULL COMMENT '',
`v5` varchar(128) REPLACE NULL COMMENT '',
) ENGINE=OLAP
AGGREGATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
COMMENT ''
PARTITION BY RANGE(`k4`)()
DISTRIBUTED BY HASH(`k1`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"is_being_synced" = "false",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-740",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.buckets" = "2",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "7",
"dynamic_partition.hot_partition_num" = "0",
"dynamic_partition.reserved_history_periods" = "NULL",
"dynamic_partition.storage_policy" = "",
"dynamic_partition.storage_medium" = "HDD",
"storage_format" = "V2",
"estimate_partition_size" = "10G",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
- 查询sql如下
SELECT
k6 ,
k2 ,
k3,
k5 ,
sum(v1),
count(distinct(k4)) ,
sum(v2),
SUM(v4) ,
sum(v3)
from
table_test
WHERE
1 = 1
and k4 >= '20240201'
and k4 <= '20240229'
and v5 like CONCAT( '0-1-', '%')
group by
k6 ,
k2 ,
k3 ,
k5
- 数据分布情况
- 按照天分区,每天2个bucket,单天数据20w,一个月600w
- 问题: 多个字段聚合耗时长,总体耗时4s以上,有什么比较好的优化手段?