部署在k8s上,采用helm chart方式部署,doris 版本 2.1.1
老师们好
我们在测试db过程发现,在并行执行多个sql,执行作业全部卡住不动,同时客户端工具无法连接上Doris,猜测是资源不满足Doris的运行要求,或是某些配置不正确。
调度作业的执行方式是:
mysql -uroot -hdoris-db.bigdata.test.zrzkwlw.com -P9030 -e "insert into table select xxx from xxx "
测试过程发现在大约6-10个并行,结果是作业全部卡住
查看Doris资源消耗,并未看到拉满的情况。我的环境资源分配如下:
- be 3个实例,资源分别分配了4c 8g;
- fe 3个实例,资源也分别分配了4c 8g
卡住期间be资源消耗情况
fe资源消耗情况
客户端连接也卡住了
请问老师们,看看是否我的配置有问题?
be yaml
kind: StatefulSet
apiVersion: apps/v1
metadata:
name: doriscluster-helm-bigdata-test-be
namespace: bigdata-test
labels:
app.doris.ownerreference/name: doriscluster-helm-bigdata-test
app.kubernetes.io/component: be
annotations:
app.doris.components/hash: '3505525361'
spec:
replicas: 3
selector:
matchLabels:
app.doris.ownerreference/name: doriscluster-helm-bigdata-test-be
app.kubernetes.io/component: be
template:
metadata:
name: doriscluster-helm-bigdata-test-be
creationTimestamp: null
labels:
app.doris.cluster: doriscluster-helm-bigdata-test
app.doris.ownerreference/name: doriscluster-helm-bigdata-test-be
app.kubernetes.io/component: be
spec:
volumes:
- name: be-storage
persistentVolumeClaim:
claimName: be-storage
- name: be-log
persistentVolumeClaim:
claimName: be-log
- name: podinfo
downwardAPI:
items:
- path: labels
fieldRef:
apiVersion: v1
fieldPath: metadata.labels
- path: annotations
fieldRef:
apiVersion: v1
fieldPath: metadata.annotations
defaultMode: 420
- name: doriscluster-helm-be-configmap
configMap:
name: doriscluster-helm-be-configmap
defaultMode: 420
initContainers:
- name: default-init
image: 'selectdb/alpine:latest'
command:
- /bin/sh
args:
- '-c'
- sysctl -w vm.max_map_count=2000000 && swapoff -a
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
containers:
- name: be
image: 'selectdb/doris.be-ubuntu:2.1.1'
command:
- /opt/apache-doris/be_entrypoint.sh
args:
- $(ENV_FE_ADDR)
ports:
- name: be-port
containerPort: 9060
protocol: TCP
- name: webserver-port
containerPort: 8040
protocol: TCP
- name: heartbeat-port
containerPort: 9050
protocol: TCP
- name: brpc-port
containerPort: 8060
protocol: TCP
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: HOST_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: USER
value: root
- name: DORIS_ROOT
value: /opt/apache-doris
- name: CONFIGMAP_MOUNT_PATH
value: /etc/doris
- name: ENV_FE_ADDR
value: doriscluster-helm-bigdata-test-fe-service
- name: FE_QUERY_PORT
value: '9030'
resources:
limits:
cpu: '4'
memory: 8Gi
requests:
cpu: '4'
memory: 8Gi
volumeMounts:
- name: podinfo
mountPath: /etc/podinfo
- name: be-storage
mountPath: /opt/apache-doris/be/storage
- name: be-log
mountPath: /opt/apache-doris/be/log
- name: doriscluster-helm-be-configmap
mountPath: /etc/doris
livenessProbe:
tcpSocket:
port: 9050
initialDelaySeconds: 80
timeoutSeconds: 180
periodSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /api/health
port: 8040
scheme: HTTP
timeoutSeconds: 1
periodSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
tcpSocket:
port: 9050
timeoutSeconds: 1
periodSeconds: 5
successThreshold: 1
failureThreshold: 60
lifecycle:
preStop:
exec:
command:
- /opt/apache-doris/be_prestop.sh
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
restartPolicy: Always
terminationGracePeriodSeconds: 30
dnsPolicy: ClusterFirst
securityContext: {}
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 20
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/component
operator: In
values:
- be
topologyKey: kubernetes.io/hostname
- weight: 80
podAffinityTerm:
labelSelector:
matchLabels:
kubernetes.io/hostname: fe
topologyKey: kubernetes.io/hostname
schedulerName: default-scheduler
volumeClaimTemplates:
- kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: be-storage
creationTimestamp: null
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: be-volume-bigdata-test
volumeMode: Filesystem
status:
phase: Pending
- kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: be-log
creationTimestamp: null
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: be-log-volume-bigdata-test
volumeMode: Filesystem
status:
phase: Pending
serviceName: doriscluster-helm-bigdata-test-be-internal
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
rollingUpdate:
partition: 0
revisionHistoryLimit: 5
fe yaml
kind: StatefulSet
apiVersion: apps/v1
metadata:
name: doriscluster-helm-bigdata-test-fe
namespace: bigdata-test
labels:
app.doris.ownerreference/name: doriscluster-helm-bigdata-test
app.kubernetes.io/component: fe
annotations:
app.doris.components/hash: '3362463793'
spec:
replicas: 3
selector:
matchLabels:
app.doris.ownerreference/name: doriscluster-helm-bigdata-test-fe
app.kubernetes.io/component: fe
template:
metadata:
name: doriscluster-helm-bigdata-test-fe
creationTimestamp: null
labels:
app.doris.cluster: doriscluster-helm-bigdata-test
app.doris.ownerreference/name: doriscluster-helm-bigdata-test-fe
app.kubernetes.io/component: fe
spec:
volumes:
- name: fe-meta
persistentVolumeClaim:
claimName: fe-meta
- name: fe-log
persistentVolumeClaim:
claimName: fe-log
- name: podinfo
downwardAPI:
items:
- path: labels
fieldRef:
apiVersion: v1
fieldPath: metadata.labels
- path: annotations
fieldRef:
apiVersion: v1
fieldPath: metadata.annotations
defaultMode: 420
- name: doriscluster-helm-fe-configmap
configMap:
name: doriscluster-helm-fe-configmap
defaultMode: 420
containers:
- name: fe
image: 'selectdb/doris.fe-ubuntu:2.1.1'
command:
- /opt/apache-doris/fe_entrypoint.sh
args:
- $(ENV_FE_ADDR)
ports:
- name: http-port
containerPort: 8030
protocol: TCP
- name: rpc-port
containerPort: 9020
protocol: TCP
- name: query-port
containerPort: 9030
protocol: TCP
- name: edit-log-port
containerPort: 9010
protocol: TCP
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: HOST_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: USER
value: root
- name: DORIS_ROOT
value: /opt/apache-doris
- name: CONFIGMAP_MOUNT_PATH
value: /etc/doris
- name: ENV_FE_ADDR
value: doriscluster-helm-bigdata-test-fe-service
- name: FE_QUERY_PORT
value: '9030'
- name: ELECT_NUMBER
value: '3'
resources:
limits:
cpu: '4'
memory: 8Gi
requests:
cpu: '4'
memory: 8Gi
volumeMounts:
- name: podinfo
mountPath: /etc/podinfo
- name: fe-meta
mountPath: /opt/apache-doris/fe/doris-meta
- name: fe-log
mountPath: /opt/apache-doris/fe/log
- name: doriscluster-helm-fe-configmap
mountPath: /etc/doris
livenessProbe:
tcpSocket:
port: 9030
initialDelaySeconds: 80
timeoutSeconds: 180
periodSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /api/health
port: 8030
scheme: HTTP
timeoutSeconds: 1
periodSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
tcpSocket:
port: 9030
timeoutSeconds: 1
periodSeconds: 5
successThreshold: 1
failureThreshold: 60
lifecycle:
preStop:
exec:
command:
- /opt/apache-doris/fe_prestop.sh
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
restartPolicy: Always
terminationGracePeriodSeconds: 30
dnsPolicy: ClusterFirst
securityContext: {}
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 20
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/component
operator: In
values:
- fe
topologyKey: kubernetes.io/hostname
schedulerName: default-scheduler
volumeClaimTemplates:
- kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: fe-meta
creationTimestamp: null
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: fe-volume-bigdata-test
volumeMode: Filesystem
status:
phase: Pending
- kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: fe-log
creationTimestamp: null
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: fe-log-volume-bigdata-test
volumeMode: Filesystem
status:
phase: Pending
serviceName: doriscluster-helm-bigdata-test-fe-internal
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
rollingUpdate:
partition: 0
revisionHistoryLimit: 5
fe.conf
# Licensed to the Apache Software Foundation (ASF) under one
CUR_DATE=`date +%Y%m%d-%H%M%S`
# the output dir of stderr and stdout
LOG_DIR = ${DORIS_HOME}/log
JAVA_OPTS="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx8192m -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:$DORIS_HOME/log/fe.gc.log.$CUR_DATE"
# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx8192m -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time"
##
## the lowercase properties are read by main program.
##
# INFO, WARN, ERROR, FATAL
sys_log_level = INFO
# NORMAL, BRIEF, ASYNC
sys_log_mode = NORMAL
# store metadata, must be created before start FE.
# Default value is ${DORIS_HOME}/doris-meta
meta_dir = ${DORIS_HOME}/doris-meta
# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers
http_port = 8030
rpc_port = 9020
query_port = 9030
edit_log_port = 9010
# Choose one if there are more than one ip except loopback address.
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24 or IP format, e.g. 10.10.10.1
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
# Advanced configurations
# log_roll_size_mb = 1024
# sys_log_dir = ${DORIS_HOME}/log
# sys_log_roll_num = 10
# sys_log_verbose_modules = org.apache.doris
# audit_log_dir = ${DORIS_HOME}/log
# audit_log_modules = slow_query, query
# audit_log_roll_num = 10
# meta_delay_toleration_second = 10
# qe_max_connection = 1024
# qe_query_timeout_second = 300
# qe_slow_log_ms = 5000
enable_fqdn_mode = true
be.conf
CUR_DATE=`date +%Y%m%d-%H%M%S`
PPROF_TMPDIR="$DORIS_HOME/log/"
JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDLE_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"
# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDLE_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"
# since 1.2, the JAVA_HOME need to be set to run BE process.
# JAVA_HOME=/path/to/jdk/
# https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
# https://jemalloc.net/jemalloc.3.html
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,lg_tcache_max:20,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false"
JEMALLOC_PROF_PRFIX=""
# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO
# ports for admin, web, heartbeat service
be_port = 9060
webserver_port = 8040
heartbeat_service_port = 9050
brpc_port = 8060
# HTTPS configures
enable_https = false
# path of certificate in PEM format.
ssl_certificate_path = "$DORIS_HOME/conf/cert.pem"
# path of private key in PEM format.
ssl_private_key_path = "$DORIS_HOME/conf/key.pem"
# enable auth check
enable_auth = false
# Choose one if there are more than one ip except loopback address.
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24 or IP format, e.g. 10.10.10.1
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
# data root path, separate by ';'
# you can specify the storage medium of each root path, HDD or SSD
# you can add capacity limit at the end of each root path, separate by ','
# eg:
# storage_root_path = /home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris
# /home/disk1/doris.HDD, capacity limit is 50GB, HDD;
# /home/disk2/doris.SSD, capacity limit is 1GB, SSD;
# /home/disk2/doris, capacity limit is disk capacity, HDD(default)
#
# you also can specify the properties by setting '<property>:<value>', separate by ','
# property 'medium' has a higher priority than the extension of path
#
# Default value is ${DORIS_HOME}/storage, you should create it by hand.
storage_root_path = ${DORIS_HOME}/storage
# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers
# Advanced configurations
# sys_log_dir = ${DORIS_HOME}/log
# sys_log_roll_mode = SIZE-MB-1024
# sys_log_roll_num = 10
# sys_log_verbose_modules = *
# log_buffer_level = -1
# palo_cgroups