2.1.6中在row_number函数后使用with ** as关联查询后结果出现随机匹配

Viewed 64

测试表与数据

CREATE TABLE `test1` (
  `id` varchar(255) NOT NULL COMMENT 'id',
  `name` varchar(255) NULL COMMENT '名称',
  `storage_time` datetime REPLACE NULL DEFAULT CURRENT_TIMESTAMP COMMENT '插入时间'
) ENGINE=OLAP
AGGREGATE KEY(`id`,`name`)
COMMENT ''
DISTRIBUTED BY HASH(`id`,`name`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_format" = "V2",
"inverted_index_storage_format" = "DEFAULT",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);

CREATE TABLE test2 (
  `data_time` datetime NOT NULL COMMENT '时间',
  `id` varchar(255) NOT NULL COMMENT 'id',
  `data_id` varchar(255) REPLACE_IF_NOT_NULL NULL COMMENT 'id',
  `data_uuid` varchar(255) REPLACE_IF_NOT_NULL NULL COMMENT 'uuid',
  INDEX Time_index (`data_time`) USING BITMAP
) ENGINE=OLAP
AGGREGATE KEY(`data_time`, `id`)
COMMENT ''
PARTITION BY RANGE(`data_time`)
()
DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-10",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.buckets" = "2",
"dynamic_partition.create_history_partition" = "false",
"dynamic_partition.history_partition_num" = "-1",
"dynamic_partition.hot_partition_num" = "0",
"dynamic_partition.reserved_history_periods" = "NULL",
"dynamic_partition.storage_policy" = "",
"dynamic_partition.storage_medium" = "HDD",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);


insert into test1 values
('1', 't1', '2024-10-11 12:10:00'),('1', 't2', '2024-10-11 12:20:00'),('1', 't3', '2024-10-11 12:30:00'),
('2', 't1', '2024-10-11 12:10:00'),('2', 't2', '2024-10-11 12:20:00'),('2', 't3', '2024-10-11 12:30:00'),
('3', 't1', '2024-10-11 12:00:00'),('3', 't2', '2024-10-11 12:20:00'),('3', 't3', '2024-10-11 12:30:00'),
('4', 't1', '2024-10-11 12:00:00'),('4', 't2', '2024-10-11 12:20:00'),('4', 't3', '2024-10-11 12:30:00');

insert into test1 values
('5', 't1', '2024-10-11 12:10:00'),
('6', 't1', '2024-10-11 12:10:00'),
('7', 't1', '2024-10-11 12:20:00'),
('8', 't1', '2024-10-11 12:20:00');

insert into test2 values
('2024-10-11 12:10:00','1', 'dataId1','uuid1'),('2024-10-11 12:20:00','2', 'dataId2','uuid2'),('2024-10-11 12:30:00','3', 'dataId3','uuid3'),('2024-10-11 12:40:00','4', 'dataId4','uuid4'),('2024-10-11 12:50:00','5', 'dataId5','uuid5');

insert into test2 values
('2024-10-12 12:10:00','10', 'dataId1','uuid1');

执行的sql

with t1 as (
select * from (
select
t1.id,
t1.name,
row_number()
over (partition by t1.id order by storage_time) row_
from test1 t1 left join test2 t2 on t1.id=t2.id
) t where row_ = 1
),
t2 as (
select *
from test2 
)
select * from t1 
left join t2 on t1.id = t2.id

创建后执行正常,但过一段时间再执行或者表中insert多一些数据 关联出来的右表会有随机匹配的情况
15fdfc91e190b73a4d8cb74dc539647.png

4b26564919df2c24de41feafdfa1eea.png

fff39a47f60f7500e921bed990839aa.png

加上 order by tb1.id 后执行正常的情况
image.png

版本为官网下载的 doris-2.1.6-rc04-653e315ba5

1 Answers

先执行这个命令暂时规避下,预计下个版本修复:
set enable_local_shuffle=false;