doris 审计日志系统内部表存在slow sql

Viewed 89

使用的doris版本是 2.1.5
发现有不少的慢sql,应该是来自系统内部。并有大量warning。

// warning
2024-08-09 02:07:03,115 WARN (Analysis Job Executor-0|199) [OlapAnalysisTask.doSample():103] Scale factor is infinite or Nan, will set scale factor to 1.
// slow sql
2024-08-09 01:57:01,049 [slow_query] |Client=|User=root|Ctl=internal|Db=__internal_schema|State=OK|ErrorCode=0|ErrorMessage=|Time(ms)=1345|ScanBytes=0|ScanRows=0|ReturnRows=21|StmtId=1|QueryId=9e3b715f8d634436-a0d1dc2df739f158|IsQuery=false|isNereids=true|feIp=10.0.27.89|Stmt=OriginStatement{originStmt='INSERT INTO internal.__internal_schema.column_statistics VALUES ('10545--1-platform_transaction_id',0,10539,10545,-1,'platform_transaction_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-amount',0,10539,10545,-1,'amount',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-id',0,10539,10545,-1,'id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-user_id',0,10539,10545,-1,'user_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-order_status',0,10539,10545,-1,'order_status',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-round_id',0,10539,10545,-1,'round_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-remark',0,10539,10545,-1,'remark',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-currency',0,10539,10545,-1,'currency',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-loss',0,10539,10545,-1,'win_loss',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-platform_api_id',0,10539,10545,-1,'platform_api_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-amount',0,10539,10545,-1,'amount',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-amount',0,10539,10545,-1,'amount',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-platform_factory_id',0,10539,10545,-1,'platform_factory_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-platform_category_id',0,10539,10545,-1,'platform_category_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-result_type',0,10539,10545,-1,'result_type',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-id',0,10539,10545,-1,'id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-amount',0,10539,10545,-1,'amount',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-rollback_transaction_id',0,10539,10545,-1,'rollback_transaction_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-platform_game_id',0,10539,10545,-1,'platform_game_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-transaction_id',0,10539,10545,-1,'transaction_id',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53'),('10545--1-date',0,10539,10545,-1,'date',null,0,0,0,NULL,NULL,0,'2024-08-09 01:56:53')', idx=0}|CpuTimeMS=0|ShuffleSendBytes=0|ShuffleSendRows=0|SqlHash=null|peakMemoryBytes=13335593|SqlDigest=|TraceId=|WorkloadGroup=normal|FuzzyVariables=
2024-08-09 02:02:05,065 [slow_query] |Client=|User=root|Ctl=internal|Db=__internal_schema|State=OK|ErrorCode=0|ErrorMessage=|Time(ms)=1037|ScanBytes=156553216|ScanRows=4198112|ReturnRows=0|StmtId=0|QueryId=14a8093693094b7a-b8aa0de8b84052ef|IsQuery=true|isNereids=true|feIp=10.0.27.89|Stmt=SELECT CONCAT('10545', '-', '-1', '-', 'id') AS `id`, 0 AS `catalog_id`, 10539 AS `db_id`, 10545 AS `tbl_id`, -1 AS `idx_id`, 'id' AS `col_id`, NULL AS `part_id`, 10470124 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 10470124) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 2.496272087097168 as `null_count`, SUBSTRING(CAST('1236961419530862669' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('pro-win-704487568743:cq9' AS STRING), 1, 1024) AS `max`, SUM(LENGTH(`column_key`) * count) * 2.496272087097168 AS `data_size`, NOW() FROM (     SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`     FROM     (SELECT SUBSTRING(CAST(`id` AS STRING), 1, 1024) AS `colValue`          FROM `internal`.`api`.`record`      TABLET(11651) limit 4194304) as `t0`     GROUP BY `t0`.`colValue` ) as `t1` |CpuTimeMS=1388|ShuffleSendBytes=40910386|ShuffleSendRows=4194305|SqlHash=null|peakMemoryBytes=457735872|SqlDigest=d41d8cd98f00b204e9800998ecf8427e|TraceId=|WorkloadGroup=normal|FuzzyVariables=
CREATE TABLE IF NOT EXISTS api.record
(
		`date` datetimev2  COMMENT "时间",
		`user_id` int COMMENT "用户id",
		`platform_category_id` int  COMMENT "分类",
    `platform_factory_id` int    COMMENT "厂商",
		`platform_game_id` int  COMMENT "游戏",
		`platform_api_id` int    COMMENT "api",
		`id` varchar(256) COMMENT '',
		`amount` DECIMAL(14,4)   COMMENT '金币数量',
		`transaction_id` VARCHAR(256)    COMMENT '交易标识符',
		`platform_transaction_id` VARCHAR(256)   COMMENT '平台方交易标识符',
		`result_type` VARCHAR(64)   COMMENT '订单结果',
		`round_id` VARCHAR(256)   COMMENT '游戏回合',
		`remark` varchar(256)   COMMENT "备注",
		`rollback_transaction_id`  VARCHAR(256)   COMMENT '回滚交易标识符',
		`order_status` int  COMMENT '订单状态',
)
DUPLICATE KEY(`date`,`user_id`,`platform_category_id`)
PARTITION BY RANGE(`date`)()
DISTRIBUTED BY HASH(`date`,`user_id`) BUCKETS AUTO
PROPERTIES (
	"estimate_partition_size" = "5G",
	"storage_policy" = "doris_policy",
	"replication_num" = "1",
	"dynamic_partition.enable" = "true",
  "dynamic_partition.time_unit" = "WEEK",
  "dynamic_partition.end" = "1",
  "dynamic_partition.prefix" = "p",
  "dynamic_partition.buckets" = "32",
	"dynamic_partition.time_zone"="Asia/Shanghai"
);

这种slow sql,是否需要处理?是怎么发生的?

1 Answers

在收集大表的时候,基本上io和cpu占用不超过10%。一般情况下,收集任务执行的频率不会很高,只有导入一定量的数据以后才会再次收集。所以对于长期运行的系统,一般资源占用很低。之前slow query是5秒以上,现在是修改到1秒了吗?