2.1.7,冷备数据到远程hdfs,不能指定父目录,只能/data

Viewed 60

使用doris 2.1.7,涉及到数据量比较大,需要冷备到hdfs,发现目录/data/{tablet_id}/{rowset_id}_{seg_num}.dat,父目录/data不能指定,因为hdfs还冷备了其他数据,直接使用/data不合适,查看源码发现这里好像不可以配置,
image.png

2 Answers

可以设置这个参数来指定不同的目录
"s3.root.path"

CREATE TABLE IF NOT EXISTS example_tbl_by_default_t02
(
timestamp DATETIME NOT NULL COMMENT "日志时间",
type INT NOT NULL COMMENT "日志类型",
error_code INT COMMENT "错误码",
error_msg VARCHAR(1024) COMMENT "错误详细信息",
op_id BIGINT COMMENT "负责人id",
op_time DATETIME COMMENT "处理时间"
)
auto partition by list(error_msg)()
DISTRIBUTED BY HASH(type) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);

CREATE RESOURCE "remote_hdfs_kerberos_t02" PROPERTIES (
"type"="hdfs",
"fs.defaultFS"="qione01:9000",
"hadoop.kerberos.principal"="root/admin",
"hadoop.kerberos.keytab"="/opt/keytab_store/root.keytab",
"s3.root.path"="/data/doris_cold_data"
);

CREATE STORAGE POLICY test_policy_hdfs_kerberos02
PROPERTIES(
"storage_resource" = "remote_hdfs_kerberos_t02",
"cooldown_ttl" = "30"
);

ALTER TABLE example_tbl_by_default_t02 set ("storage_policy" = "test_policy_hdfs_kerberos02");

insert into example_tbl_by_default_t02 values
('2024-07-30 05:10:00', 1, 1, 'error_msg01', 1001, '2024-07-30 10:18:00'),
('2024-07-30 10:10:00', 1, 1, 'error_msg01', 1002, '2024-07-30 10:18:00'),
('2024-07-30 10:10:00', 1, 2, 'error_msg01', 1003, '2024-07-30 10:18:00'),
('2024-07-30 10:10:00', 2, 0, 'error_msg01', 1004, '2024-07-30 10:18:00'),
('2024-07-30 10:18:00', 1, 1, 'error_msg01', 1005, '2024-07-30 10:18:00');

查看tablet信息:
image.png

查询冷备到hdfs数据:
image.png