FE 与 BE 端口已配置 9095/9096,使用 golang driver 连接
import (
"database/sql"
_ "github.com/apache/arrow-adbc/go/adbc/sqldriver/flightsql"
)
db, err := sql.Open("flightsql", "uri=grpc://192.168.0.101:9095;username=root;password=")
建表 SQL
CREATE TABLE metadata
(
ID BIGINT NOT NULL AUTO_INCREMENT(1),
ctime DATETIME NOT NULL,
name VARCHAR(256) NOT NULL,
parent_path VARCHAR(256) NOT NULL,
mime_type VARCHAR(256) NOT NULL,
permissions VARCHAR(256) NOT NULL,
owner_id VARCHAR(256) NOT NULL,
group_id VARCHAR(256) NOT NULL,
mtime DATETIME NOT NULL,
size BIGINT NOT NULL,
atime DATETIME NOT NULL,
is_dir TINYINT NOT NULL,
INDEX idx_name_ngrambf (`name`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "3") COMMENT 'name ngram_bf index',
INDEX idx_parent_path_ngrambf (`parent_path`) USING NGRAM_BF PROPERTIES("bf_size" = "10240", "gram_size" = "5") COMMENT 'parent_path ngram_bf index',
INDEX idx_name_inverted (`name`) USING INVERTED,
INDEX idx_parent_path_inverted (`parent_path`) USING INVERTED,
INDEX idx_uid_inverted (`owner_id`) USING INVERTED COMMENT 'inverted_uid',
INDEX idx_gid_inverted (`group_id`) USING INVERTED COMMENT 'inverted_gid',
INDEX idx_size_inverted (`size`) USING INVERTED,
INDEX idx_is_dir_inverted (`is_dir`) USING INVERTED,
INDEX idx_ctime_inverted (`ctime`) USING INVERTED,
INDEX idx_atime_inverted (`atime`) USING INVERTED,
INDEX idx_mtime_inverted (`mtime`) USING INVERTED,
INDEX idx_pmt_inverted (`permissions`) USING INVERTED
) ENGINE = OLAP UNIQUE KEY (`id`, `ctime`)
AUTO PARTITION BY RANGE (date_trunc(`ctime`, 'month'))
()
DISTRIBUTED BY HASH (`ctime`) BUCKETS AUTO
PROPERTIES (
"replication_num"="1",
"min_load_replica_num" = "-1",
"bloom_filter_columns" = "size",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"compression" = "LZ4",
"estimate_partition_size" = "10G",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"store_row_column" = "true",
"row_store_page_size" = "16384",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
表中已 mock 上亿数据,使用如下代码查询:
rows, err := db.Query("use testdb; SET enable_sql_cache=false; SET enable_partition_cache=false; select id, name, parent_path from metadata LIMIT 1000000;")
// ...
for rows.Next() {
// ...
count++
}
fmt.Println(count)
实际 count 的值始终小于 LIMIT 的 100w,如772416
。研究发现在 LIMIT > 40640 时便会出现此现象,并且实际 count 的值均为 4064 的倍数或 0。前往 Doris Profiling 中检查查询能发现每个 Fragment 产生的 InputRows 均为 4064,猜测是分片?但无法解释为何存在不符合最大值的情况。
另外用下列代码查询 SELECT COUNT(*) FROM metadata;
会直接报错:
row := db.QueryRow("SELECT COUNT(*) FROM metadata;");
var count int
if err = row.Scan(&count); err != nil //......
报错内容为 Invalid Argument: [FlightSQL] [RUNTIME_ERROR]ArrowFlightBatchLocalReader fetch arrow data failedTStatus: (InvalidArgument; DoGet: endpoint 0: [uri:"grpc+tcp://192.168.0.101:9096"])
顺便,通过 go/python driver 执行的查询在 QueryProfile 里的 Task State 永远都是 RUNNING
,这正常吗?