带in或exists子查询的sql命中不了doris异步物化视图

Viewed 41

-- 基表
CREATE TABLE t_transaction_member (
operator_id int NOT NULL DEFAULT "0" COMMENT '交易发起人id',
type_id int NOT NULL DEFAULT "0" COMMENT '交易类型',
operator_type int NOT NULL DEFAULT "0" COMMENT '会员类型',
created_at datetime NOT NULL COMMENT '创建时间',
user_id int NOT NULL DEFAULT "0" COMMENT '交易接受人id'
) ENGINE=OLAP
UNIQUE KEY(operator_id, type_id, operator_type, created_at, user_id)
PARTITION BY RANGE(created_at) (
FROM
('2024-05-01') TO ('2025-06-30') INTERVAL 1 DAY
)
DISTRIBUTED BY HASH(operator_id) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);

-- 插入一些数据
insert into t_transaction_member values
(10000,28,2,"2024-12-20 10:10:00",20000),
(10000,28,2,"2024-12-20 10:15:00",20000),
(10000,28,2,"2024-12-20 11:20:00",20000),
(10001,27,2,"2024-12-20 12:30:00",20000),
(10000,28,2,"2024-12-20 13:40:00",20001),
(10000,28,2,"2024-12-21 14:50:00",20001);

-- 创建小时级物化视图
create MATERIALIZED view t_transaction_member_1h_mv refresh on commit
distributed by hash(operator_id) buckets 3
properties ("replication_allocation" = "tag.location.default: 1")
as select user_id, operator_id, date_trunc(created_at, 'hour') as created_at_60m, count(user_id) as num
from t_transaction_member
WHERE type_id = 28 AND operator_type = 2
group by user_id, operator_id, date_trunc(created_at, 'hour');

-- 执行不带in或exists子查询的查询可以命中mv
SELECT COUNT(user_id) AS num, user_id FROM (
SELECT COUNT(user_id) AS num, user_id, date_trunc(created_at, "hour") as created_at_60m
FROM t_transaction_member
WHERE operator_id = 10000 // 常量
AND type_id = 28
AND operator_type = 2
GROUP BY user_id, date_trunc(created_at, "hour")
) t WHERE created_at_60m >= "2024-12-20 10:00:00" AND created_at_60m < "2024-12-21 00:00:00"
GROUP BY user_id;

-- 执行带in或exists子查询的查询就命中不了mv
SELECT COUNT(user_id) AS num, user_id FROM (
SELECT COUNT(user_id) AS num, user_id, date_trunc(created_at, "hour") as created_at_60m
FROM t_transaction_member
WHERE operator_id in (select user_id from other_table where user_type = xxx) // 带有in子查询
AND type_id = 28
AND operator_type = 2
GROUP BY user_id, date_trunc(created_at, "hour")
) t WHERE created_at_60m >= "2024-12-20 10:00:00" AND created_at_60m < "2024-12-21 00:00:00"
GROUP BY user_id;

1 Answers

感谢反馈!
对于物化视图,当前in+子查询的改写机制,当前还未兼容,后续会规划。