期望
利用异步物化视图实现对数据的分钟级实时聚合。
但是现有的业务场景下满足不了。
实现
创建一个表
CREATE TABLE `oss_interface_report_06` (
`ge_time` DATETIME NOT NULL COMMENT '事件发生时间',
`app_id` VARCHAR(192) NOT NULL COMMENT '应用key',
`interface_code` VARCHAR(192) NOT NULL COMMENT '接口key',
`result_status` TINYINT NOT NULL COMMENT '查询结果通知, 0:失败; 1:成功',
`application_name` VARCHAR(192) NOT NULL COMMENT '应用名称'
) ENGINE=OLAP
DUPLICATE KEY(`ge_time`, `app_id`, `interface_code`)
COMMENT '查询、删除、移动接口调用明细表'
AUTO PARTITION BY RANGE date_trunc(ge_time, 'hour')()
DISTRIBUTED BY HASH(`ge_time`, `app_id`, `interface_code`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);
插入一些数据
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:15.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:22.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 01:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 01:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 04:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 02:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-23 00:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-23 00:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-23 00:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:16.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:35.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:11.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:10.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 03:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:09.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:09.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:10.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:10.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:10.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:12.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:08.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:03:09.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:13.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:13.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:13.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
INSERT INTO oss_interface_report_06 (ge_time, app_id, interface_code, result_status, application_name) VALUES('2024-01-22 00:02:13.0', 'img_and_app', '/file/del/single', 1, 'application_name_01');
根据业务需求创建物化视图
因为业务要求是要按分钟进行聚合。 因此: 物化视图逻辑按分钟聚合。
CREATE MATERIALIZED VIEW mv1
BUILD DEFERRED REFRESH AUTO ON MANUAL
partition by(ge_time)
DISTRIBUTED BY RANDOM BUCKETS 2
PROPERTIES ('replication_num' = '1')
AS
select
date_trunc(ge_time, 'minute') as ge_time_minute,
app_id,
interface_code,
count(*) as total,
sum(if(result_status = 1, 1, 0)) as success_total,
sum(if(result_status = 1, 0, 1)) as failure_total
from
oss_interface_report_06
group by date_trunc(ge_time, 'minute'), app_id,interface_code;
但创建语法都出现了报错,
报错如下:
根据要求: 异步物化视图的逻辑group by后必须要存在基表的分区字段(好像在哪见过这个要求)。 且只能是原始字段,不能通过函数转换。
根据物化视图要求创建新的物化视图
CREATE MATERIALIZED VIEW mv2
BUILD DEFERRED REFRESH AUTO ON MANUAL
partition by(ge_time)
DISTRIBUTED BY RANDOM BUCKETS 2
PROPERTIES ('replication_num' = '1')
AS
select
ge_time,
app_id,
interface_code,
count(*) as total,
sum(if(result_status = 1, 1, 0)) as success_total,
sum(if(result_status = 1, 0, 1)) as failure_total
from
oss_interface_report_06
group by ge_time, app_id,interface_code;
该物化视图符合物化视图的创建规则。 但不符合业务规则。 因此业务要求是按分钟聚合。这种物化视图的逻辑是按ge_time的原始粒度(秒)聚合。如果原始粒度太细, 聚合程度不够,可能满足不了加速查询的要求。
验证
REFRESH MATERIALIZED VIEW mv2 ;
select * from mv2;
经过验证, 物化视图逻辑确实是按ge_time原始粒度(秒)进行聚合的。 没办法符合按分钟聚合的业务要求。
总结
1: 因为异步物化视图的的刷新粒度是分区,为了减少刷新代价,所以基表采用按小时分区, 而不是常规的按天。
2: 业务粒度相对于分区粒度是下钻, 相对于原始粒度是上卷。 显然目前的物化视图规则不能满足要求。(或者是我用法不对)。
3: 物化视图语法中的partition by, 不像表分区粒度那样支持函数。不够灵活。
4: 将基表和物化视图的分区强绑定是为了同步生命周期吗? 各自分开分区逻辑, 分别设置生命周期也可以。