版本: 2.0.11
有个埋点数据的日志表, 基于明细表建立bitmap_union的物化视图后, 发现查询无法命中物化视图.
建表语句
CREATE TABLE jgp_event_log
(
create_time DATETIME not null COMMENT "时间",
project_id VARCHAR(256) COMMENT "",
data_source_id VARCHAR(256) COMMENT "",
user_id VARCHAR(256) COMMENT "登录用户ID",
session_id VARCHAR(256) COMMENT "访问会话ID"
)
DUPLICATE KEY(`create_time`)
PARTITION BY RANGE (create_time) ()
DISTRIBUTED BY HASH(create_time) BUCKETS 3
PROPERTIES(
"replication_allocation" = "tag.location.default: 2",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-365",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "3",
"dynamic_partition.create_history_partition" = "true"
);
创建物化视图
create materialized view mv_jgp_event_visit_day as
SELECT
to_date(create_time) create_date,
project_id,
data_source_id,
count(user_id),
bitmap_union(bitmap_hash64(session_id) ),
bitmap_union(bitmap_hash64(user_id) )
FROM
jfdb_growing.jgp_event_log
group by
to_date(create_time),
project_id,
data_source_id
查询语句无法命中视图, 使用bitmap_union_count(bitmap_hash64(session_id))也无法命中
select
to_date(create_time),
project_id,
data_source_id,
count(user_id),
count(distinct session_id) as sv,
count(distinct user_id) as uv
from
jfdb_growing.jgp_event_log
where
create_time BETWEEN '2024-01-01' and '2025-01-30'
group by
to_date(create_time),
project_id,
data_source_id
查询执行计划
PLAN FRAGMENT 0
OUTPUT EXPRS:
to_date(create_time)[#55]
project_id[#56]
data_source_id[#57]
count(user_id)[#58]
sv[#59]
uv[#60]
PARTITION: UNPARTITIONED
HAS_COLO_PLAN_NODE: false
VRESULT SINK
2836:VEXCHANGE
offset: 0
PLAN FRAGMENT 1
PARTITION: HASH_PARTITIONED: to_date(create_time)[#49], project_id[#50], data_source_id[#51]
HAS_COLO_PLAN_NODE: false
STREAM DATA SINK
EXCHANGE ID: 2836
UNPARTITIONED
2833:VAGGREGATE (merge finalize)
| output: count(partial_count(user_id)[#52])[#58], multi_distinct_count(DISTINCT partial_multi_distinct_count(DISTINCT session_id)[#53])[#59], multi_distinct_count(DISTINCT partial_multi_distinct_count(DISTINCT user_id)[#54])[#60]
| group by: to_date(create_time)[#49], project_id[#50], data_source_id[#51]
| cardinality=150,433
|
2830:VEXCHANGE
offset: 0
PLAN FRAGMENT 2
PARTITION: RANDOM
HAS_COLO_PLAN_NODE: false
STREAM DATA SINK
EXCHANGE ID: 2830
HASH_PARTITIONED: to_date(create_time)[#49], project_id[#50], data_source_id[#51]
2827:VAGGREGATE (update serialize)
| STREAMING
| output: partial_count(user_id[#47])[#52], partial_multi_distinct_count(DISTINCT session_id[#48])[#53], partial_multi_distinct_count(DISTINCT user_id[#47])[#54]
| group by: to_date(create_time)[#44], project_id[#45], data_source_id[#46]
| cardinality=150,433
|
2817:VOlapScanNode
TABLE: default_cluster:jfdb_growing.jgp_event_log(jgp_event_log), PREAGGREGATION: ON
PREDICATES: create_time[#0] <= '2025-01-30 00:00:00' AND create_time[#0] >= '2024-01-01 00:00:00'
partitions=18/369 (p20241220,p20241221,p20241222,p20241223,p20241224,p20241225,p20241226,p20241227,p20241228,p20241229,p20241230,p20241231,p20250101,p20250102,p20250103,p20250104,p20250105,p20250106), tablets=54/54, tabletList=169230875,169230878,169230881 ...
cardinality=503911, avgRowSize=0.0, numNodes=1
pushAggOp=NONE
projections: to_date(create_time[#0]), project_id[#1], data_source_id[#2], user_id[#3], session_id[#5]
project output tuple id: 1
但是我把查询改写为下面这样就可以命中
select * from (
SELECT
to_date(create_time) create_date,
project_id,
data_source_id,
count(user_id),
bitmap_union_count(bitmap_hash64(session_id) ),
bitmap_union_count(bitmap_hash64(user_id) )
FROM
jfdb_growing.jgp_event_log
group by to_date(create_time),project_id,data_source_id) t
where t.create_date BETWEEN '2025-01-01' and '2025-01-30'
改写后的执行计划
PLAN FRAGMENT 0
OUTPUT EXPRS:
create_date[#62]
project_id[#63]
data_source_id[#64]
count(user_id)[#65]
bitmap_union_count(bitmap_hash64(session_id))[#66]
bitmap_union_count(bitmap_hash64(user_id))[#67]
PARTITION: UNPARTITIONED
HAS_COLO_PLAN_NODE: false
VRESULT SINK
3780:VEXCHANGE
offset: 0
PLAN FRAGMENT 1
PARTITION: HASH_PARTITIONED: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]
HAS_COLO_PLAN_NODE: false
STREAM DATA SINK
EXCHANGE ID: 3780
UNPARTITIONED
3774:VAGGREGATE (merge finalize)
| output: sum(partial_sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END)[#53])[#59], bitmap_union_count(partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id))[#54])[#60], bitmap_union_count(partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id))[#55])[#61]
| group by: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]
| cardinality=31,498
| projections: create_date[#56], mv_project_id[#57], mv_data_source_id[#58], sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END)[#59], bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id))[#60], bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id))[#61]
| project output tuple id: 4
|
3771:VEXCHANGE
offset: 0
PLAN FRAGMENT 2
PARTITION: RANDOM
HAS_COLO_PLAN_NODE: false
STREAM DATA SINK
EXCHANGE ID: 3771
HASH_PARTITIONED: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]
3768:VAGGREGATE (update serialize)
| STREAMING
| output: partial_sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END[#3])[#53], partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id)[#4])[#54], partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id)[#5])[#55]
| group by: mv_to_date(create_time)[#0], mv_project_id[#1], mv_data_source_id[#2]
| cardinality=62,997
|
3758:VOlapScanNode
TABLE: default_cluster:jfdb_growing.jgp_event_log(mv_jgp_event_visit_day), PREAGGREGATION: ON
PREDICATES: mv_to_date(create_time)[#0] <= '2025-01-30' AND mv_to_date(create_time)[#0] >= '2025-01-01'
partitions=18/369 (p20241220,p20241221,p20241222,p20241223,p20241224,p20241225,p20241226,p20241227,p20241228,p20241229,p20241230,p20241231,p20250101,p20250102,p20250103,p20250104,p20250105,p20250106), tablets=54/54, tabletList=171171406,171171409,171171412 ...
cardinality=503977, avgRowSize=0.0, numNodes=1
pushAggOp=NONE
辛苦大家帮忙看看是怎么回事