doris 排序查询数据异常

Viewed 71

版本 doris 3.0.2,执行下面语句存在问题
截屏2024-11-12 15.00.36.png

如果不要 where 语句是正常的
截屏2024-11-12 15.00.12.png 生产上都有 order by 数字类型 然后 rank 限制没有问题

with tmp as (
select
goods_sn, site_l1_cd, site_tp,
row_number () over (partition by site_l1_cd order by c7d_sale_cnt_no_device_cd desc ) as rank
FROM
(select t0.goods_sn as goods_sn, t0.site_l1_cd as site_l1_cd, t0.site_tp as site_tp, t0.tag_val as goods_sn_cd, t1.tag_val as brand_cd, t2.tag_val as c7d_sale_cnt_no_device_cd from (select * from dmp.tag_4391 where dt = '20241111') t0 full outer join (select * from dmp.tag_4972 where dt = '20241111') t1 on t1.goods_sn = t0.goods_sn and t1.site_l1_cd = t0.site_l1_cd and t1.site_tp = t0.site_tp full outer join (select * from dmp.tag_5386 where dt = '20241111') t2 on t2.goods_sn = t0.goods_sn and t2.site_l1_cd = t0.site_l1_cd and t2.site_tp = t0.site_tp ) a
where
1 = 1
and (brand_cd IN ('Acinaci')) and site_tp='shein'
)
select count(distinct s.goods_sn) as cnt
from (select * from tmp where rank <= 100) s;

2 Answers

版本是:Doris Version: doris-3.0.1-rc04-ada1a55576
修复PR:https://github.com/apache/doris/pull/40761 是3.0.2merge的。

截屏2024-11-12 15.38.39.png where rank >100
或者 having rank >100 可以,<不行 。截屏2024-11-12 15.40.36.png

rank > 100 explain

PLAN FRAGMENT 0
  OUTPUT EXPRS:
    goods_sn[#18]
    site_tp[#19]
    rank[#20]
  PARTITION: HASH_PARTITIONED: site_tp[#17]
""
  HAS_COLO_PLAN_NODE: false
""
  VRESULT SINK
     MYSQL_PROTOCAL
""
  7:VANALYTIC(564)
  |  functions: [row_number()]
  |  partition by: site_tp[#19]
  |  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  |  predicates: (rank[#20] > 100)
  |  distribute expr lists: site_tp[#19]
  |  
  6:VSORT(561)
  |  order by: site_tp[#19] ASC
  |  offset: 0
  |  distribute expr lists: site_tp[#17]
  |  
  5:VEXCHANGE
     offset: 0
"     distribute expr lists: goods_sn[#16], site_tp[#17]"
""
PLAN FRAGMENT 1
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#10], site_tp[#11]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 05
    HASH_PARTITIONED: site_tp[#17]
""
  4:VHASH JOIN(552)
  |  join op: RIGHT OUTER JOIN(PARTITIONED)[]
  |  equal join conjunct: (goods_sn[#10] = goods_sn[#4])
  |  equal join conjunct: (site_tp[#11] = site_tp[#5])
"  |  runtime filters: RF000[min_max] <- goods_sn[#4](1/1/2048), RF001[bloom] <- goods_sn[#4](1/1/2048), RF002[min_max] <- site_tp[#5](1/1/2048), RF003[bloom] <- site_tp[#5](1/1/2048)"
  |  cardinality=1
  |  vec output tuple id: 5
  |  output tuple id: 5
  |  vIntermediate tuple ids: 4 
  |  hash output slot ids: 4 5 
"  |  final projections: goods_sn[#14], site_tp[#15]"
  |  final project output tuple id: 5
"  |  distribute expr lists: goods_sn[#10], site_tp[#11]"
"  |  distribute expr lists: goods_sn[#4], site_tp[#5]"
  |  
  |----1:VEXCHANGE
  |       offset: 0
"  |       distribute expr lists: goods_sn[#4], site_tp[#5]"
  |    
  3:VEXCHANGE
     offset: 0
"     distribute expr lists: goods_sn[#10], site_tp[#11]"
""
PLAN FRAGMENT 2
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#6], site_tp[#7]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 03
"    HASH_PARTITIONED: goods_sn[#10], site_tp[#11]"
""
  2:VOlapScanNode(530)
"     TABLE: dmp.tag_5274(tag_5274), PREAGGREGATION: ON"
     PREDICATES: (site_tp[#7] = 'shein') AND (dt[#9] = '20241016')
"     runtime filters: RF000[min_max] -> goods_sn[#6], RF001[bloom] -> goods_sn[#6], RF002[min_max] -> site_tp[#7], RF003[bloom] -> site_tp[#7]"
     partitions=1/3 (p20241016)
"     tablets=100/100, tabletList=56601,56605,56609 ..."
"     cardinality=131818, avgRowSize=0.0, numNodes=1"
     pushAggOp=NONE
"     final projections: goods_sn[#6], site_tp[#7]"
     final project output tuple id: 3
""
PLAN FRAGMENT 3
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#0], site_tp[#1]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 01
"    HASH_PARTITIONED: goods_sn[#4], site_tp[#5]"
""
  0:VOlapScanNode(540)
"     TABLE: dmp.tag_2003095(tag_2003095), PREAGGREGATION: ON"
     PREDICATES: (site_tp[#1] = 'shein') AND (dt[#3] = '20241105') AND tag_val[#2] IS NOT NULL
     partitions=1/1 (p20241105)
"     tablets=100/100, tabletList=64264,64268,64272 ..."
"     cardinality=-1, avgRowSize=0.0, numNodes=1"
     pushAggOp=NONE
"     final projections: goods_sn[#0], site_tp[#1]"
     final project output tuple id: 1

rank <100 explain

PLAN FRAGMENT 0
  OUTPUT EXPRS:
    goods_sn[#20]
    site_tp[#21]
    rank[#22]
  PARTITION: HASH_PARTITIONED: site_tp[#19]
""
  HAS_COLO_PLAN_NODE: false
""
  VRESULT SINK
     MYSQL_PROTOCAL
""
  8:VANALYTIC(583)
  |  functions: [row_number()]
  |  partition by: site_tp[#21]
  |  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  |  predicates: (rank[#22] < 100)
  |  distribute expr lists: site_tp[#21]
  |  
  7:VSORT(580)
  |  order by: site_tp[#21] ASC
  |  offset: 0
  |  distribute expr lists: site_tp[#19]
  |  
  6:VEXCHANGE
     offset: 0
"     distribute expr lists: goods_sn[#18], site_tp[#19]"
""
PLAN FRAGMENT 1
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#10], site_tp[#11]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 06
    HASH_PARTITIONED: site_tp[#19]
""
  5:VPartitionTopN(574)
  |  functions: row_number
  |  partition by: site_tp[#17]
  |  order by: 
  |  has global limit: false
  |  partition limit: 99
  |  partition topn phase: TWO_PHASE_LOCAL_PTOPN
"  |  distribute expr lists: goods_sn[#16], site_tp[#17]"
  |  
  4:VHASH JOIN(568)
  |  join op: RIGHT OUTER JOIN(PARTITIONED)[]
  |  equal join conjunct: (goods_sn[#10] = goods_sn[#4])
  |  equal join conjunct: (site_tp[#11] = site_tp[#5])
"  |  runtime filters: RF000[min_max] <- goods_sn[#4](1/1/2048), RF001[bloom] <- goods_sn[#4](1/1/2048), RF002[min_max] <- site_tp[#5](1/1/2048), RF003[bloom] <- site_tp[#5](1/1/2048)"
  |  cardinality=1
  |  vec output tuple id: 5
  |  output tuple id: 5
  |  vIntermediate tuple ids: 4 
  |  hash output slot ids: 4 5 
"  |  final projections: goods_sn[#14], site_tp[#15]"
  |  final project output tuple id: 5
"  |  distribute expr lists: goods_sn[#10], site_tp[#11]"
"  |  distribute expr lists: goods_sn[#4], site_tp[#5]"
  |  
  |----1:VEXCHANGE
  |       offset: 0
"  |       distribute expr lists: goods_sn[#4], site_tp[#5]"
  |    
  3:VEXCHANGE
     offset: 0
"     distribute expr lists: goods_sn[#10], site_tp[#11]"
""
PLAN FRAGMENT 2
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#6], site_tp[#7]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 03
"    HASH_PARTITIONED: goods_sn[#10], site_tp[#11]"
""
  2:VOlapScanNode(546)
"     TABLE: dmp.tag_5274(tag_5274), PREAGGREGATION: ON"
     PREDICATES: (site_tp[#7] = 'shein') AND (dt[#9] = '20241016')
"     runtime filters: RF000[min_max] -> goods_sn[#6], RF001[bloom] -> goods_sn[#6], RF002[min_max] -> site_tp[#7], RF003[bloom] -> site_tp[#7]"
     partitions=1/3 (p20241016)
"     tablets=100/100, tabletList=56601,56605,56609 ..."
"     cardinality=131818, avgRowSize=0.0, numNodes=1"
     pushAggOp=NONE
"     final projections: goods_sn[#6], site_tp[#7]"
     final project output tuple id: 3
""
PLAN FRAGMENT 3
""
"  PARTITION: HASH_PARTITIONED: goods_sn[#0], site_tp[#1]"
""
  HAS_COLO_PLAN_NODE: false
""
  STREAM DATA SINK
    EXCHANGE ID: 01
"    HASH_PARTITIONED: goods_sn[#4], site_tp[#5]"
""
  0:VOlapScanNode(556)
"     TABLE: dmp.tag_2003095(tag_2003095), PREAGGREGATION: ON"
     PREDICATES: (site_tp[#1] = 'shein') AND (dt[#3] = '20241105') AND tag_val[#2] IS NOT NULL
     partitions=1/1 (p20241105)
"     tablets=100/100, tabletList=64264,64268,64272 ..."
"     cardinality=-1, avgRowSize=0.0, numNodes=1"
     pushAggOp=NONE
"     final projections: goods_sn[#0], site_tp[#1]"
     final project output tuple id: 1