【已解决】dors2.1.3版本insert select批量插入,插入数据丢失

Viewed 180

建表语句

CREATE TABLE `rt_patent_valid_rights_only_patentee_test` (
  `apply_num` VARCHAR(50) NULL COMMENT '申请号',
  `interval_sort` INT NULL COMMENT '排序',
  `sort` INT NULL COMMENT '排序',
  `legal_public_date` INT NULL COMMENT '法律状态公告日',
  `start_date` INT NULL COMMENT '权利起',
  `end_date` INT NULL COMMENT '权利止',
  `patentee_type` VARCHAR(20) NULL COMMENT '当前权利人类型',
  `patentee_id` VARCHAR(50) NULL COMMENT '当前权利人编号',
  `patentee_name` VARCHAR(1000) NULL COMMENT '当前权利人名称',
  `is_deleted` INT NULL DEFAULT "2" COMMENT '是否删除',
  `process_update_time` DATETIME(3) NULL,
  `patent_type_code` INT NULL COMMENT '专利类型代码',
  `rec_id` VARCHAR(50) NULL COMMENT '记录号',
  INDEX idx_rt_patent_valid_rights_only_patent_start_date (`start_date`) USING INVERTED COMMENT '''''',
  INDEX idx_rt_patent_valid_rights_only_patent_type_code (`patent_type_code`) USING INVERTED COMMENT '''''',
  INDEX idx_rt_patent_valid_rights_only_apply_num (`apply_num`) USING INVERTED COMMENT '''''',
  INDEX idx_rt_patent_valid_rights_only_process_update_time (`process_update_time`) USING INVERTED COMMENT ''''''
) ENGINE=OLAP
UNIQUE KEY(`apply_num`, `interval_sort`, `sort`)
COMMENT '专利-与权利区间表中的权利人关系表'
DISTRIBUTED BY HASH(`apply_num`, `interval_sort`, `sort`) BUCKETS 9
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);

执行sql

insert
	into
	ipo_dw.rt_patent_valid_rights_only_patentee_test
(rec_id,
	apply_num,
	interval_sort,
	sort,
	legal_public_date,
	start_date,
	end_date,
	patentee_type,
	patentee_id,
	patentee_name,
	is_deleted,
	process_update_time,
	patent_type_code)
select
	pp.rec_id as rec_id,
	apply_num as apply_num,
	interval_sort as interval_sort,
	sort as sort,
	legal_public_date as legal_public_date,
	pp.start_date as start_date,
	pp.end_date as end_date,
	uu.patent_unit_class_code as patentee_type,
	uu.unit_id as patentee_id,
	patentee_name as patentee_name,
	pp.is_deleted as is_deleted,
	now() as process_update_time,
	patent_type_code as patent_type_code
from
	ipo_dw.rt_patent_valid_rights_only_patentee111 pp
inner join ipo_dw.lt_bo_bu_unit_unknown_name uu
on
	pp.patentee_name = uu.unit_name
	and pp.start_date >= uu.start_date
	and pp.start_date < uu.end_date
	and uu.is_deleted = 2

--插入完毕后数据缺失
image.png
---指定查询时候能查到
image.png

3 Answers

我这边解决了,方法参考:

enable_nereids_dml_with_pipeline 这个参数被默认设置成了 false,改成 true之后就可以了。


我也遇到了类似的问题 insert的结果总数和select的结果总数不一致

你可以试试建一张临时表,使用duplicate key而不是unique key,跑一下insert into select语句,看看这个问题还有没有?

指定没插入数据单条数据insert select是可以,批量就存在该问题