问题1:(已知表中存在problem_name='xxx'的记录)
-- 不能查出数
select * from test.table_a where problem_name='xxx';
-- 可以正常查询
select * from test.table_a where trim(problem_name)='xxx';
问题2:
-- 以下查询每次执行结果都不一致
select problem_name,count(1)
from test.table_a a
join test.table_a b
on a.module_id = b.module_id
group by 1
-- 以下查询正常
select problem_name,count(1)
from test.table_a a
join test.table_a b
on trim(a.module_id) = trim(b.module_id)
group by 1
建表语句如下:
-- A表 50亿
CREATE TABLE test.table_a(
module_id char(30) not null,
data_id char(100) not null,
data_date DATE,
material_code CHAR(30),
problem_name CHAR(200),
consume_time VARCHAR(30)
) ENGINE=OLAP
unique KEY(module_id,data_id,data_date)
PARTITION BY RANGE(data_date)(
from ('2019-01-01') to ('2024-01-01') interval 1 month
)
DISTRIBUTED BY HASH(module_id) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"enable_unique_key_merge_on_write" = "true",
"bloom_filter_columns" = "material_code,problem_name",
"colocate_with" = "module_id_join_group"
);
CREATE INDEX idx_problem_name ON test.table_a(problem_name) USING INVERTED;
CREATE INDEX idx_material_code ON test.table_a(material_code) USING INVERTED;
-- B表 5亿
CREATE TABLE test.table_b(
module_id char(30) not null,
data_date DATE,
consume_time VARCHAR(30)
) ENGINE=OLAP
unique KEY(module_id)
PARTITION BY RANGE(data_date)(
from ('2019-01-01') to ('2024-01-01') interval 1 month
)
DISTRIBUTED BY HASH(module_id) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"enable_unique_key_merge_on_write" = "true",
"colocate_with" = "module_id_join_group"
);