【已解决】2.0.11,物化视图无法命中

Viewed 38

版本: 2.0.11
有个埋点数据的日志表, 基于明细表建立bitmap_union的物化视图后, 发现查询无法命中物化视图.
建表语句

CREATE TABLE jgp_event_log
(
    create_time DATETIME not null COMMENT "时间",
    project_id VARCHAR(256) COMMENT "",
    data_source_id VARCHAR(256) COMMENT "",
    user_id VARCHAR(256) COMMENT "登录用户ID",
    session_id VARCHAR(256) COMMENT "访问会话ID"
)
DUPLICATE KEY(`create_time`)
PARTITION BY RANGE (create_time) ()
DISTRIBUTED BY HASH(create_time) BUCKETS 3
PROPERTIES(
    "replication_allocation" = "tag.location.default: 2",
    "dynamic_partition.enable" = "true",
    "dynamic_partition.time_unit" = "DAY",
    "dynamic_partition.time_zone" = "Asia/Shanghai",
    "dynamic_partition.start" = "-365",
    "dynamic_partition.end" = "3",
    "dynamic_partition.prefix" = "p",
    "dynamic_partition.buckets" = "3",
    "dynamic_partition.create_history_partition" = "true"
 );

创建物化视图

create materialized view mv_jgp_event_visit_day as 
SELECT
	to_date(create_time) create_date,
	project_id,
	data_source_id,
	count(user_id),
	bitmap_union(bitmap_hash64(session_id) ),
	bitmap_union(bitmap_hash64(user_id) )
FROM
	jfdb_growing.jgp_event_log
group by
	to_date(create_time),
	project_id,
	data_source_id

查询语句无法命中视图, 使用bitmap_union_count(bitmap_hash64(session_id))也无法命中

select
	to_date(create_time),
	project_id,
	data_source_id,
	count(user_id),
	count(distinct session_id) as sv,
	count(distinct user_id) as uv
from
	jfdb_growing.jgp_event_log
where
	create_time BETWEEN '2024-01-01' and '2025-01-30'
group by
	to_date(create_time),
	project_id,
	data_source_id

查询执行计划

PLAN FRAGMENT 0
  OUTPUT EXPRS:
    to_date(create_time)[#55]
    project_id[#56]
    data_source_id[#57]
    count(user_id)[#58]
    sv[#59]
    uv[#60]
  PARTITION: UNPARTITIONED

  HAS_COLO_PLAN_NODE: false

  VRESULT SINK

  2836:VEXCHANGE
     offset: 0

PLAN FRAGMENT 1

  PARTITION: HASH_PARTITIONED: to_date(create_time)[#49], project_id[#50], data_source_id[#51]

  HAS_COLO_PLAN_NODE: false

  STREAM DATA SINK
    EXCHANGE ID: 2836
    UNPARTITIONED

  2833:VAGGREGATE (merge finalize)
  |  output: count(partial_count(user_id)[#52])[#58], multi_distinct_count(DISTINCT partial_multi_distinct_count(DISTINCT session_id)[#53])[#59], multi_distinct_count(DISTINCT partial_multi_distinct_count(DISTINCT user_id)[#54])[#60]
  |  group by: to_date(create_time)[#49], project_id[#50], data_source_id[#51]
  |  cardinality=150,433
  |  
  2830:VEXCHANGE
     offset: 0

PLAN FRAGMENT 2

  PARTITION: RANDOM

  HAS_COLO_PLAN_NODE: false

  STREAM DATA SINK
    EXCHANGE ID: 2830
    HASH_PARTITIONED: to_date(create_time)[#49], project_id[#50], data_source_id[#51]

  2827:VAGGREGATE (update serialize)
  |  STREAMING
  |  output: partial_count(user_id[#47])[#52], partial_multi_distinct_count(DISTINCT session_id[#48])[#53], partial_multi_distinct_count(DISTINCT user_id[#47])[#54]
  |  group by: to_date(create_time)[#44], project_id[#45], data_source_id[#46]
  |  cardinality=150,433
  |  
  2817:VOlapScanNode
     TABLE: default_cluster:jfdb_growing.jgp_event_log(jgp_event_log), PREAGGREGATION: ON
     PREDICATES: create_time[#0] <= '2025-01-30 00:00:00' AND create_time[#0] >= '2024-01-01 00:00:00'
     partitions=18/369 (p20241220,p20241221,p20241222,p20241223,p20241224,p20241225,p20241226,p20241227,p20241228,p20241229,p20241230,p20241231,p20250101,p20250102,p20250103,p20250104,p20250105,p20250106), tablets=54/54, tabletList=169230875,169230878,169230881 ...
     cardinality=503911, avgRowSize=0.0, numNodes=1
     pushAggOp=NONE
     projections: to_date(create_time[#0]), project_id[#1], data_source_id[#2], user_id[#3], session_id[#5]
     project output tuple id: 1

但是我把查询改写为下面这样就可以命中

select * from (
	SELECT
        to_date(create_time) create_date,
        project_id,
        data_source_id,
        count(user_id),
        bitmap_union_count(bitmap_hash64(session_id) ),
        bitmap_union_count(bitmap_hash64(user_id) )
    FROM
        jfdb_growing.jgp_event_log
    group by to_date(create_time),project_id,data_source_id) t 
where t.create_date BETWEEN '2025-01-01' and '2025-01-30'

改写后的执行计划

PLAN FRAGMENT 0
  OUTPUT EXPRS:
    create_date[#62]
    project_id[#63]
    data_source_id[#64]
    count(user_id)[#65]
    bitmap_union_count(bitmap_hash64(session_id))[#66]
    bitmap_union_count(bitmap_hash64(user_id))[#67]
  PARTITION: UNPARTITIONED

  HAS_COLO_PLAN_NODE: false

  VRESULT SINK

  3780:VEXCHANGE
     offset: 0

PLAN FRAGMENT 1

  PARTITION: HASH_PARTITIONED: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]

  HAS_COLO_PLAN_NODE: false

  STREAM DATA SINK
    EXCHANGE ID: 3780
    UNPARTITIONED

  3774:VAGGREGATE (merge finalize)
  |  output: sum(partial_sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END)[#53])[#59], bitmap_union_count(partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id))[#54])[#60], bitmap_union_count(partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id))[#55])[#61]
  |  group by: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]
  |  cardinality=31,498
  |  projections: create_date[#56], mv_project_id[#57], mv_data_source_id[#58], sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END)[#59], bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id))[#60], bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id))[#61]
  |  project output tuple id: 4
  |  
  3771:VEXCHANGE
     offset: 0

PLAN FRAGMENT 2

  PARTITION: RANDOM

  HAS_COLO_PLAN_NODE: false

  STREAM DATA SINK
    EXCHANGE ID: 3771
    HASH_PARTITIONED: create_date[#50], mv_project_id[#51], mv_data_source_id[#52]

  3768:VAGGREGATE (update serialize)
  |  STREAMING
  |  output: partial_sum(mva_SUM__CASE WHEN user_id IS NULL THEN 0 ELSE 1 END[#3])[#53], partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(session_id)[#4])[#54], partial_bitmap_union_count(mva_BITMAP_UNION__bitmap_hash64(user_id)[#5])[#55]
  |  group by: mv_to_date(create_time)[#0], mv_project_id[#1], mv_data_source_id[#2]
  |  cardinality=62,997
  |  
  3758:VOlapScanNode
     TABLE: default_cluster:jfdb_growing.jgp_event_log(mv_jgp_event_visit_day), PREAGGREGATION: ON
     PREDICATES: mv_to_date(create_time)[#0] <= '2025-01-30' AND mv_to_date(create_time)[#0] >= '2025-01-01'
     partitions=18/369 (p20241220,p20241221,p20241222,p20241223,p20241224,p20241225,p20241226,p20241227,p20241228,p20241229,p20241230,p20241231,p20250101,p20250102,p20250103,p20250104,p20250105,p20250106), tablets=54/54, tabletList=171171406,171171409,171171412 ...
     cardinality=503977, avgRowSize=0.0, numNodes=1
     pushAggOp=NONE

辛苦大家帮忙看看是怎么回事

1 Answers

这个符合预期,不能命中的查询使用了 create_time between ...,这个条件需要补偿到物化视图上。create_time在物化视图的select后找不到,如果写成 create_date是可以在物化视图上找到的,就可以改写