Doris2.1.7查询Hive Catalog表报错get file split failed for table , err: java.lang.NullPointerException

Viewed 33

查询hive catalog表失败,explain SQL也报错相同错误

get file split failed for table: huan_ad_monitor_impression_log_orc, err: java.lang.NullPointerException

SQL如下:

SELECT 
  dt,
  case when grouping(substring(logDate,12,2))=1 then '-' else substring(logDate,12,2) end as h,
  provincename,
  cityname,
  realip,
  count(1) as imp,
  count(distinct mac) as uv,
  count(1) over(partition by case when grouping(dt)=1 then '-' else dt end,
                             case when grouping(substring(logDate,12,2))=1 then '-' else substring(logDate,12,2) end,
                             provincename,cityname) as totalIpUv 
from hive.ad_monitor.huan_ad_monitor_impression_log_orc  
where dt = '2024-11-30' and hpid in ('8160','8161','8162','8163')
group by grouping sets((dt,provincename,cityname,realip,substring(logDate,12,2)),(dt,provincename,cityname,realip))

注意:

如果把SQL中的count(1) as imp,去掉就可以正常查询

查询SQL失败

image.png

explain也失败

image.png

去掉count(1) as imp,就可以正常查询

image.png

3 Answers

方便提供一下hive catalog的创建语句和对应的hive表结构吗。另外查询的具体报错去fe.log看看

建Catalog语句

CREATE CATALOG hive PROPERTIES (
"yarn.resourcemanager.principal" = "yarn/_HOST@HADOOP.COM",
"uri" = "thrift://ali-odp-master-01.huan.tv:9083,thrift://ali-odp-master-02.huan.tv:9083",
"type" = "hms",
"oss.secret_key" = "",
"oss.endpoint" = "",
"oss.access_key" = "",
"metadata_refresh_interval_sec" = "3600",
"ipc.client.fallback-to-simple-auth-allowed" = "true",
"hive.version" = "3.1.3",
"hive.metastore.uris" = "thrift://ali-odp-master-01.huan.tv:9083,thrift://ali-odp-master-02.huan.tv:9083",
"hive.metastore.sasl.enabled" = "true",
"hive.metastore.kerberos.principal" = "hive/_HOST@HADOOP.COM",
"hadoop.security.authentication" = "kerberos",
"hadoop.kerberos.principal" = "hive@HUAN.TV",
"hadoop.kerberos.keytab" = "/root/hive.keytab",
"dfs.nameservices" = "ha-nn",
"dfs.namenode.rpc-address.ha-nn.nn2" = "ali-odp-master-02.huan.tv:8020",
"dfs.namenode.rpc-address.ha-nn.nn1" = "ali-odp-master-01.huan.tv:8020",
"dfs.ha.namenodes.ha-nn" = "nn1,nn2",
"dfs.client.failover.proxy.provider.ha-nn" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"access_controller.properties.ranger.service.name" = "hive",
"access_controller.class" = "org.apache.doris.catalog.authorizer.ranger.hive.RangerHiveAccessControllerFactory"
);

建表语句

CREATE EXTERNAL TABLE ad_monitor.huan_ad_monitor_impression_log_orc(
motype string,
third string,
logdate string,
logtime string,
serialnum string,
hk string,
hm string,
hp string,
mac string,
wifimac string,
hdnum string,
hmac string,
realip string,
ip string,
time string,
nc string,
platform string,
sign string,
signver string,
signcorrect string,
hmzk string,
hmzp string,
hmzsalt string,
provincename string,
provincecode string,
cityname string,
citycode string,
countyname string,
countycode string,
ua string,
hpid string,
hpsid string,
tr string,
hupadid string,
hadid string,
hcreativeid string,
hgroupid string,
hplayid string,
hvideoid string,
source string,
hbd string,
ipipprovincename string,
ipipcityname string,
ipipcountyname string,
ipipcountycode string,
hchip string,
appversion string,
sdkversion string,
idfa string,
idfamd5 string,
imeimd5 string,
oaid string,
oaidmd5 string,
hprid string,
hadvid string,
os string,
hds string,
hprice string)
PARTITIONED BY (
dt string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'

不只是上面那个报错,以下SQL也报这个错
image.png