环境
doris版本:2.0.3
spark:3.1.1
spark-doris-conn:master分支
1 doris建表语句
CREATE TABLE test.`day_bitmap_user_id_flow_product_attribution_event_di` (
`cur_date` varchar(50) NULL,
`event_name` varchar(50) NULL,
`city_name` varchar(50) NULL,
`city_code` varchar(50) NULL,
`store_id` varchar(50) NULL,
`tax_category_a_code` varchar(50) NULL,
`tax_category_b_code` varchar(50) NULL,
`tax_category_c_code` varchar(50) NULL,
`tax_category_d_code` varchar(50) NULL,
`product_id` varchar(50) NULL,
`user_id` bitmap BITMAP_UNION NOT NULL COMMENT 'user_id bitmap'
) ENGINE=OLAP
AGGREGATE KEY(cur_date, event_name, city_name, city_code, store_id, tax_category_a_code, tax_category_b_code, tax_category_c_code, tax_category_d_code, product_id)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`cur_date`, `event_name`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 3"
);
2 spark sql导数命令
spark-sql --driver-memory 3G --num-executors 10 --executor-memory 9g --executor-cores 3 --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --conf spark.dynamicAllocation.enabled=false --jars s3://xxxxxxxx/spark-doris-connector-3.1_2.12-1.4.0-SNAPSHOT.jar --name test_doris_write
create or replace temporary view doris_tbl USING doris
OPTIONS(
"table.identifier"="test.day_bitmap_data",
"fenodes"="xxx:8030",
"user"="xxx",
"password"="xxxx",
"doris.request.retries"="3",
"sink.max-retries"="3",
"sink.batch.size"="10000",
"doris.sink.auto-redirect"="false",
"doris.sink.batch.interval.ms"="100",
"doris.sink.task.use.repartition"="true",
"doris.sink.task.partition.size"="5",
"doris.sink.enable-2pc"="true",
"sink.properties.format"="csv",
"sink.properties.line_delimiter"="\x171615",
"sink.properties.column_separator"="\x001112",
"doris.ignore-type"="bitmap",
"doris.write.fields"="cur_date,event_name,city_name,city_code,store_id,tax_category_a_code,tax_category_b_code,tax_category_c_code,tax_category_d_code,product_id,user_id,user_id=bitmap_hash64(user_id)"
);
INSERT INTO doris_tbl
SELECT
'20240101' as cur_date, event_name, city_name, city_code, store_id, tax_category_a_code, tax_category_b_code, tax_category_c_code, tax_category_d_code, product_id, user_id
FROM dwd_xxx.xxxx
WHERE cur_date = '20240101' AND user_id IS NOT NULL AND place_id <> '00000000-0000-0000-0000-000000000000' AND place_id <> 'default';
3 报错日志
Error in query: unknown requires that the data to be inserted have the same number of columns as the target table: target table has 10 column(s) but the inserted data has 11 column(s), including 0 partition column(s) having constant value(s).
4 目前怀疑是自己的使用姿势有问题,但是都是按照官网来的
官网链接:https://doris.apache.org/zh-CN/docs/ecosystem/spark-doris-connector