一个查询占满了BE节点的内存

Viewed 66

目前每个BE节点是16G内存,共两个节点。查询是跨域4个月的数据统计SQL,执行后会提示:

数据源执行SQL失败:java.lang.RuntimeException: SQL execute error by datasource... java.sql.SQLException: errCode = 2, detailMessage = (...)[MEM_ALLOC_FAILED]Create Expr failed because [E11] Allocator sys memory check failed: Cannot alloc:64, consuming tracker:<Query#Id=4185385b46c94b58-b2359a29da33298b>, peak used 444672, current used 444672, exec node:<>, process memory used 12.03 GB exceed limit 13.96 GB or sys available memory 1.54 GB less than low water mark 1.55 GB. 0# doris:

在GreenPlum上查询不会爆内存,但速度确实是慢。

Doris的SQL如下,表数据行数可看每张表上的注释:

with base as (
select
					fac.code as fac_code,
					fac.`name` as fac_name,
					poi.code as point_code,
					poi.`name` as point_name,
					dict.dict_code as pt,
					dict.dict_name as pt_name,
					dev.code as dev_code,
					dev.`name` as dev_name
from 

-- 约75000行					
	bi_ads.dim_40101_device_5mmf dev
	
-- 约23000行					
LEFT join bi_ads.dim_40103_watch_point_5mmf poi   
							on
	dev.watchpoint_code = poi.code
	and dev.s_stat = '1'
	
-- 约10000行
left join bi_ads.dim_40102_facilities_5mmf fac
							on
	poi.facilities_code = fac.code
	and fac.s_stat = '1'
	
-- 约1000行
left join bi_ads.dim_401_dict dict
							on
	dict.dict_code = poi.process_section_type
	and dict.type_code = 'dc401_production_process_section'
	and dict.s_stat = '1'
order by
	dev_name
)


select 
	flow.fac_name,
	flow.fac_code,
	liq.liquid_level,
	flow.instant_flow,
	energy.input_power,
	pressure.pressure,
	wc.plum_comb,
	wc.frequency,
	flow.collection_time_str_2min
from

		(
			select 
				t.fac_name,
				t.fac_code,
				sum(t.instant_flow) as instant_flow,
				collection_time_str_2min from (
							select 
									base.fac_name,
									base.fac_code,
									base.dev_code,
									avg(flow.instant_flow) as instant_flow,
									collection_time_str_2min
							from
									-- 约40000000行
									bi_ads.dwd_40104_collect_flow_di flow 
									inner join base
										on flow.device_code = base.dev_code and base.pt in ('103100100','103100300')
									where flow.collection_time_dt >= '${start_time}'
									GROUP BY base.fac_name,base.fac_code,base.dev_code,collection_time_str_2min
							) t
							GROUP BY t.fac_name,t.fac_code,t.collection_time_str_2min
		) flow

LEFT JOIN	
		(
				select 
						base.fac_name,
						base.fac_code,
						max(pre.pressure) AS pressure,
						collection_time_str_2min
				from
					    -- 约1460000000行
						bi_ads.dwd_40104_collect_pressure_di pre 
						inner join base
							on pre.device_code = base.dev_code and base.pt in ('103100100','103100300')
						where pre.collection_time_dt >= '${start_time}'
						GROUP BY base.fac_name,base.fac_code,collection_time_str_2min
		) pressure 	
		ON flow.fac_code = pressure.fac_code and flow.collection_time_str_2min = pressure.collection_time_str_2min
-- 		

LEFT JOIN		
		(			
				select 
				
						base.fac_name,
						base.fac_code,
						
						sum(
							case 
								when base.pt = '103100200' then (case when energy.phase_ab_voltage < 300 then energy.phase_ab_voltage*1000 else energy.phase_ab_voltage end)*energy.phase_a_current*1.732*0.95/1000
								when base.pt = '103100400' then 380*energy.phase_a_current*1.732*0.95/1000
								else NULL
							END
						) AS input_power,
						

						collection_time_str_2min
				from
					(
						select 
							device_code,
							collection_time_str_2min,
							avg(phase_a_current) as phase_a_current,
							avg(phase_ab_voltage) as phase_ab_voltage
						from
							
					   		 -- 约31000000行
							bi_ads.dwd_40104_energy_consumption_di
						where device_code in (
									    '050301000020100041200',  
											'050301000020100041193',  
											'050301000020100041187',  
											'050301000020100041191', 


											'050302000020100041219',  
											'050302000020100041215',  
											'050302000020100041209'  
							)
						and collection_time_dt >= '2023-12-01'
						GROUP BY
							device_code,
							collection_time_str_2min
					) energy 
						inner join base
							on energy.device_code = base.dev_code and base.pt in ('103100200','103100400')
						GROUP BY base.fac_name,base.fac_code,collection_time_str_2min
		) energy	
		ON flow.fac_code = energy.fac_code and flow.collection_time_str_2min = energy.collection_time_str_2min

LEFT JOIN	
-- 启停状态	
		(
				select 
						base.fac_name,
						base.fac_code,
						GROUP_CONCAT(
								cast(
										(
											case 
												when wc.frequency !=0 and wc.frequency is not null then wc.frequency
												else NULL
											end 
										)
									 	as char
									)
								order by base.dev_name	
								,							
								',' 
						) as frequency,
						GROUP_CONCAT(
							case 
								when wc.startup_state = '02' then replace(base.dev_name,'#送水泵','#泵')
								else null
							end 
							order by base.dev_name
							,
							',' 
						) as plum_comb,
						collection_time_str_2min
				from
						(
						select
							collection_time_str_2min,
							device_code,
							avg(frequency) as frequency,
							max(startup_state) as startup_state
						from
							-- 约10000000行
							bi_ads.dwd_40104_working_condition_di wc
						where
							wc.collection_time_dt >= '2023-12-01'
						group by
							collection_time_str_2min,
							device_code ) wc
		inner join base
							on wc.device_code = base.dev_code and base.pt in ('103100200','103100400')
							GROUP BY base.fac_name,base.fac_code,collection_time_str_2min
		) wc		
		ON flow.fac_code = wc.fac_code and flow.collection_time_str_2min = wc.collection_time_str_2min


left join	
		(
select 
						avg(liq.liquid_level) as liquid_level,

	collection_time_str_2min
from
							-- 约33000000行
						bi_ads.dwd_40104_collect_liquid_di liq
inner join base
							on
	liq.device_code = base.dev_code
	and base.pt = '103090000'
	where liq.collection_time_dt >= '2023-12-01'
GROUP BY
	collection_time_str_2min
order by
	collection_time_str_2min
		) liq
		ON
flow.collection_time_str_2min = liq.collection_time_str_2min
1 Answers
  1. 可以先将这几张表进行analyze ,收集下统计信息,然后重试一下,具体可以参考这里:收集统计信息

  2. GP的节点数量和配置和Doris节点配置是否保持一致

  3. Doris BE 是否和其他服务进行混合部署