Doris混合部署(物理节点+K8S)模式探索

Viewed 95

目的

存算分离,BE混合节点部署在虚拟机或实体机,计算节点部署在K8S,以实现弹性扩缩容。

网络方案

在FE和BE节点物理节点添加一条路由,将网络打到K8S节点上

ip route add 10.244.0.0/16 via 10.10.10.90 dev ens192

注:10.244.0.0/16为容器CIDR,10.10.10.90为K8S master节点

构建Docker镜像

下载doris发行包及所需jdbc驱动

可自由选择开源版本doris和selectdb版本的doris,本文以selectdb-doris-2.1.4-bin-x64.tar.gz为例,下载地址:https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/enterprise-doris-release-output/selectdb-doris-2.1.4-bin-x64.tar.gz

如果需要连接外部catalog,如mysql、oracle等,需要下载相应驱动并添加到镜像里,本文假设将驱动放置到jdbc_drivers目录下。

修改entry_point.sh脚本

#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

set -eo pipefail
shopt -s nullglob

# Obtain necessary and basic information to complete initialization

# logging functions
# usage: doris_[note|warn|error] $log_meg
#    ie: doris_warn "task may be risky!"
#   out: 2023-01-08T19:08:16+08:00 [Warn] [Entrypoint]: task may be risky!
doris_log() {
    local type="$1"
    shift
    # accept argument string or stdin
    local text="$*"
    if [ "$#" -eq 0 ]; then text="$(cat)"; fi
    local dt="$(date -Iseconds)"
    printf '%s [%s] [Entrypoint]: %s\n' "$dt" "$type" "$text"
}
doris_note() {
    doris_log Note "$@"
}
doris_warn() {
    doris_log Warn "$@" >&2
}
doris_error() {
    doris_log ERROR "$@" >&2
    exit 1
}

# check to see if this file is being run or sourced from another script
_is_sourced() {
    [ "${#FUNCNAME[@]}" -ge 2 ] &&
        [ "${FUNCNAME[0]}" = '_is_sourced' ] &&
        [ "${FUNCNAME[1]}" = 'source' ]
}

docker_setup_env() {
    declare -g DATABASE_ALREADY_EXISTS
    if [ -d "${DORIS_HOME}/be/storage/data" ]; then
        DATABASE_ALREADY_EXISTS='true'
    fi
}
get_doris_args() {
    local feServerArray=($(echo "${FE_SERVERS}" | awk '{gsub (/,/," "); print $0}'))
    for i in "${feServerArray[@]}"; do
        val=${i}
        val=${val// /}
        tmpFeId=$(echo "${val}" | awk -F ':' '{ sub(/fe/, ""); sub(/ /, ""); print$1}')
        tmpFeIp=$(echo "${val}" | awk -F ':' '{ sub(/ /, ""); print$2}')
        feIpArray[$tmpFeId]=${tmpFeIp}
    done

    declare -g MASTER_FE_IP BE_HOST_IP BE_HEARTBEAT_PORT FE_PASSWORD QUERY_PORT
    MASTER_FE_IP=${feIpArray[1]}
    doris_note "masterFe = ${MASTER_FE_IP}"

    FE_PASSWORD=`echo "${FE_PASSWORD}"`
    doris_note "fePassword = ${FE_PASSWORD}"
    QUERY_PORT=`echo "${QUERY_PORT}"`
    doris_note "queryPort = ${QUERY_PORT}"

    #BE_HOST_IP=$(echo "${BE_ADDR}" | awk -F ':' '{ sub(/ /, ""); print$1}')
    BE_HOST_IP=`ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`
    doris_note "container BE_HOST_IP = ${BE_HOST_IP}"

    #BE_HEARTBEAT_PORT=$(echo "${BE_ADDR}" | awk -F ':' '{ sub(/ /, ""); print$2}')
    BE_HEARTBEAT_PORT=9050
    doris_note "be_addr = ${BE_HOST_IP}:${BE_HEARTBEAT_PORT}"
}
# Execute sql script, passed via stdin
# usage: docker_process_sql [mysql-cli-args]
#    ie: docker_process_sql --database=mydb <<<'INSERT ...'
#    ie: docker_process_sql --database=mydb <my-file.sql
docker_process_sql() {
    set +e
    mysql -uroot -P${QUERY_PORT} -h${MASTER_FE_IP} -p${FE_PASSWORD} --comments "$@" 2>/dev/null
}

check_be_status() {
    set +e
    local is_fe_start=false
    for i in {1..300}; do
        if [[ $(($i % 20)) == 1 ]]; then
            doris_warn "start check be status~"
        fi
        docker_process_sql <<<"show backends;" | grep "[[:space:]]${BE_HOST_IP}[[:space:]]" | grep "[[:space:]]${BE_HEARTBEAT_PORT}[[:space:]]" | grep "[[:space:]]true[[:space:]]"
        be_join_status=$?
        if [[ "${be_join_status}" == 0 ]]; then
            doris_note "Verify that BE is registered to FE successfully"
            is_fe_start=true
            break
        else
            if [[ $(($i % 20)) == 1 ]]; then
                doris_note "register is failed, wait next~"
            fi
        fi
        sleep 1
    done
    if ! [[ $is_fe_start ]]; then
        doris_error "Failed to register BE to FE!Tried 30 times!Maybe FE Start Failed!"
    fi
}

# usage: docker_process_init_files [file [file [...]]]
#    ie: docker_process_init_files /always-initdb.d/*
# process initializer files, based on file extensions
docker_process_init_files() {
    local f
    for f; do
        case "$f" in
        *.sh)
            if [ -x "$f" ]; then
                doris_note "$0: running $f"
                "$f"
            else
                doris_note "$0: sourcing $f"
                . "$f"
            fi
            ;;
        *.sql)
            doris_note "$0: running $f"
            docker_process_sql <"$f"
            echo
            ;;
        *.sql.bz2)
            doris_note "$0: running $f"
            bunzip2 -c "$f" | docker_process_sql
            echo
            ;;
        *.sql.gz)
            doris_note "$0: running $f"
            gunzip -c "$f" | docker_process_sql
            echo
            ;;
        *.sql.xz)
            doris_note "$0: running $f"
            xzcat "$f" | docker_process_sql
            echo
            ;;
        *.sql.zst)
            doris_note "$0: running $f"
            zstd -dc "$f" | docker_process_sql
            echo
            ;;
        *) doris_warn "$0: ignoring $f" ;;
        esac
        echo
    done
}

_main() {
    docker_setup_env
    # get init args
    get_doris_args
    # Start Doris BE
    {
        set +e
        bash init_be.sh 2>/dev/null
    } &
    # check BE started status
    check_be_status
    if [ -z ${DATABASE_ALREADY_EXISTS} ]; then
        # run script
        docker_process_init_files /docker-entrypoint-initdb.d/*
    fi

    # keep BE started status
    wait
    exec "$@"
}

if ! _is_sourced; then
    _main "$@"
fi

修改init_be.sh脚本

#!/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

set -eo pipefail
shopt -s nullglob

DORIS_HOME="/opt/selectdb-doris"

# Obtain necessary and basic information to complete initialization

# logging functions
# usage: doris_[note|warn|error] $log_meg
#    ie: doris_warn "task may be risky!"
#   out: 2023-01-08T19:08:16+08:00 [Warn] [Entrypoint]: task may be risky!
doris_log() {
  local type="$1"
  shift
  # accept argument string or stdin
  local text="$*"
  if [ "$#" -eq 0 ]; then text="$(cat)"; fi
  local dt="$(date -Iseconds)"
  printf '%s [%s] [Entrypoint]: %s\n' "$dt" "$type" "$text"
}
doris_note() {
  doris_log Note "$@"
}
doris_warn() {
  doris_log Warn "$@" >&2
}
doris_error() {
  doris_log ERROR "$@" >&2
  exit 1
}

# check to see if this file is being run or sourced from another script
_is_sourced() {
  [ "${#FUNCNAME[@]}" -ge 2 ] &&
    [ "${FUNCNAME[0]}" = '_is_sourced' ] &&
    [ "${FUNCNAME[1]}" = 'source' ]
}

docker_setup_env() {
  declare -g DATABASE_ALREADY_EXISTS
  if [ -d "${DORIS_HOME}/be/storage/data" ]; then
    DATABASE_ALREADY_EXISTS='true'
  fi
}

# Check the variables required for startup
docker_required_variables_env() {
  if [[ $FE_SERVERS =~ ^.+:[1-2]{0,1}[0-9]{0,1}[0-9]{1}(\.[1-2]{0,1}[0-9]{0,1}[0-9]{1}){3}:[1-6]{0,1}[0-9]{1,4}(,.+:[1-2]{0,1}[0-9]{0,1}[0-9]{1}(\.[1-2]{0,1}[0-9]{0,1}[0-9]{1}){3}:[1-6]{0,1}[0-9]{1,4})*$ ]]; then
    doris_warn "FE_SERVERS" $FE_SERVERS
  else
    doris_error "FE_SERVERS rule error!example: \$FE_NAME:\$FE_HOST_IP:\$FE_EDIT_LOG_PORT[,\$FE_NAME:\$FE_HOST_IP:\$FE_EDIT_LOG_PORT]..."
  fi
  # 不检验BE_ADDR参数,使用容器IP进行注册
  # if [[ $BE_ADDR =~ ^[1-2]{0,1}[0-9]{0,1}[0-9]{1}(\.[1-2]{0,1}[0-9]{0,1}[0-9]{1}){3}:[1-6]{0,1}[0-9]{1,4}$ ]]; then
  #   doris_warn "BE_ADDR" $BE_ADDR
  # else
  #   doris_error "BE_ADDR rule error!example: \$BE_HOST_IP:\$HEARTBEAT_SERVICE_PORT"
  # fi
}

get_doris_be_args() {
  local feServerArray=($(echo "${FE_SERVERS}" | awk '{gsub (/,/," "); print $0}'))
  for i in "${feServerArray[@]}"; do
    val=${i}
    val=${val// /}
    tmpFeId=$(echo "${val}" | awk -F ':' '{ sub(/fe/, ""); sub(/ /, ""); print$1}')
    tmpFeIp=$(echo "${val}" | awk -F ':' '{ sub(/ /, ""); print$2}')
    tmpFeEditLogPort=$(echo "${val}" | awk -F ':' '{ sub(/ /, ""); print$3}')
    check_arg "tmpFeIp" $tmpFeIp
    feIpArray[$tmpFeId]=${tmpFeIp}
    check_arg "tmpFeEditLogPort" $tmpFeEditLogPort
    feEditLogPortArray[$tmpFeId]=${tmpFeEditLogPort}
  done


  declare -g MASTER_FE_IP BE_HOST_IP BE_HEARTBEAT_PORT PRIORITY_NETWORKS FE_PASSWORD QUERY_PORT
  MASTER_FE_IP=${feIpArray[1]}
  check_arg "MASTER_FE_IP" $MASTER_FE_IP

  FE_PASSWORD=`echo "${FE_PASSWORD}"`
  check_arg "FE_PASSWORD" $FE_PASSWORD
  QUERY_PORT=`echo "${QUERY_PORT}"`
  check_arg "QUERY_PORT" $QUERY_PORT

  # BE_HOST_IP=$(echo "${BE_ADDR}" | awk -F ':' '{ sub(/ /, ""); print$1}')
  # 使用容器IP进行注册
  BE_HOST_IP=`ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`
  doris_note "container BE_HOST_IP = ${BE_HOST_IP}"
  check_arg "BE_HOST_IP" $BE_HOST_IP
  # BE_HEARTBEAT_PORT=$(echo "${BE_ADDR}" | awk -F ':' '{ sub(/ /, ""); print$2}')
  BE_HEARTBEAT_PORT=9050
  check_arg "BE_HEARTBEAT_PORT" $BE_HEARTBEAT_PORT

  PRIORITY_NETWORKS=$(echo "${BE_HOST_IP}" | awk -F '.' '{print$1"."$2"."$3".0/24"}')
  check_arg "priority_networks" $PRIORITY_NETWORKS

  doris_note "feIpArray = ${feIpArray[*]}"
  doris_note "feEditLogPortArray = ${feEditLogPortArray[*]}"
  doris_note "masterFe = ${feIpArray[1]}:${feEditLogPortArray[1]}"
  doris_note "be_addr = ${BE_HOST_IP}:${BE_HEARTBEAT_PORT}"
  doris_note "priority_networks = ${PRIORITY_NETWORKS}"
  # wait fe start
  check_be_status true
}

add_priority_networks() {
  doris_note "add priority_networks ${1} to ${DORIS_HOME}/be/conf/be.conf"
  echo "priority_networks = ${1}" >>${DORIS_HOME}/be/conf/be.conf
}

# Execute sql script, passed via stdin
# usage: docker_process_sql sql_script
docker_process_sql() {
  set +e
  mysql -uroot -P${QUERY_PORT} -h${MASTER_FE_IP} -p${FE_PASSWORD} --comments "$@" 2>/dev/null
}

register_be_to_fe() {
  set +e
  # check fe status
  local is_fe_start=false
  for i in {1..300}; do
    docker_process_sql <<<"alter system add backend '${BE_HOST_IP}:${BE_HEARTBEAT_PORT}'"
    register_be_status=$?
    if [[ $register_be_status == 0 ]]; then
      doris_note "BE successfully registered to FE!"
      is_fe_start=true
      break
    else
      check_be_status
      if [ -n "$BE_ALREADY_EXISTS" ]; then
        doris_warn "Same backend already exists! No need to register again!"
        break
      fi
      if [[ $(( $i % 20 )) == 1 ]]; then
        doris_warn "register_be_status: ${register_be_status}"
        doris_warn "BE failed registered to FE!"
      fi
    fi
    if [[ $(( $i % 20 )) == 1 ]]; then
      doris_note "Register BE to FE is failed. retry."
    fi
    sleep 1
  done
  if ! [[ $is_fe_start ]]; then
    doris_error "Failed to register BE to FE!Tried 30 times!Maybe FE Start Failed!"
  fi
}

# Check whether the passed parameters are empty to avoid subsequent task execution failures. At the same time,
# enumeration checks can be added, such as checking whether a certain parameter appears repeatedly, etc.
check_arg() {
  if [ -z $2 ]; then
    doris_error "$1 is null!"
  fi
}

# 这里可用 docker_process_sql() 函数封装,为了方便调试,暂未封装
check_be_status() {
  set +e
  for i in {1..300}; do
    if [[ $1 == true ]]; then
      docker_process_sql <<<"show frontends" | grep "[[:space:]]${MASTER_FE_IP}[[:space:]]"
    else
      docker_process_sql <<<"show backends" | grep "[[:space:]]${BE_HOST_IP}[[:space:]]" | grep "[[:space:]]${BE_HEARTBEAT_PORT}[[:space:]]"
    fi
    be_join_status=$?
    if [[ "${be_join_status}" == 0 ]]; then
      if [[ $1 == true ]]; then
        doris_note "MASTER FE is started!"
      else
        doris_note "Init Check - Verify that BE is registered to FE successfully"
        BE_ALREADY_EXISTS=true
      fi
      break
    fi
    if [[ $(( $i % 20 )) == 1 ]]; then
      if [[ $1 == true ]]; then
        doris_note "MASTER FE is not started. retry."
      else
        doris_note "BE is not register. retry."
      fi
    fi
    sleep 1
  done
}

_main() {
  docker_setup_env
  docker_required_variables_env
  get_doris_be_args

  if [ -z "$DATABASE_ALREADY_EXISTS" ]; then
    add_priority_networks $PRIORITY_NETWORKS
  fi

  register_be_to_fe
  check_be_status
  doris_note "Ready to start BE!"
  start_be.sh --console
  exec "$@"
}

if ! _is_sourced; then
  _main "$@"
fi

添加注销BE脚本unregister_be.sh

该脚本用于BE下线时自动向FE发送注销命令

#!/bin/bash

fe_ip=`echo "${FE_SERVERS}" | awk -F ':' '{ sub(/ /, ""); print$2}'`
fe_password=`echo "${FE_PASSWORD}"`
query_port=`echo "${QUERY_PORT}"`
be_addr=`ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`

echo "FE_IP=${fe_ip}, QUERY_PORT=${query_port}, FE_PASSWORD=${fe_password}, FE_PORT=${fe_port}, BE_ADDR=${be_addr}"

mysql -h${fe_ip} -P${query_port} -uroot -p${fe_password} -e "alter system DECOMMISSION backend '${be_addr}:9050'"

创建Dockerfile

# 选择基础镜像
FROM openjdk:8u342-jdk

# 发行版本名称 apache/selectdb
ARG REL_NAME=selectdb
ARG DORIS_VERSION=2.1.4

# 设置环境变量
ENV JAVA_HOME="/usr/local/openjdk-8/" \
    PATH="/opt/${REL_NAME}-doris/be/bin:$PATH"

# 下载软件至镜像内,可根据需要替换
ADD ${REL_NAME}-doris-${DORIS_VERSION}-bin-x64.tar.gz /opt/

RUN apt-get update && \
    apt-get install -y default-mysql-client && \
    apt-get install -y net-tools && \
    apt-get install -y iputils-ping && \
    apt-get clean && \
    mkdir /opt/${REL_NAME}-doris && \
    cd /opt && \
    mv /opt/${REL_NAME}-doris-${DORIS_VERSION}-bin-x64/be /opt/${REL_NAME}-doris/be && \
    rm -rf /opt/${REL_NAME}-doris-${DORIS_VERSION}-bin-x64

RUN mkdir /opt/${REL_NAME}-doris/be/jdbc_drivers
ADD jdbc_drivers/*.jar /opt/${REL_NAME}-doris/be/jdbc_drivers/

ADD init_be.sh /opt/${REL_NAME}-doris/be/bin
RUN chmod 755 /opt/${REL_NAME}-doris/be/bin/init_be.sh
ADD entry_point.sh  /usr/local/bin/
RUN chmod 755 /usr/local/bin/entry_point.sh
ADD unregister_be.sh  /opt/${REL_NAME}-doris/be/bin
RUN chmod 755 /opt/${REL_NAME}-doris/be/bin/unregister_be.sh

ENTRYPOINT ["/usr/local/bin/entry_point.sh"]

构建镜像并推送至镜像仓库

截止当前步骤,Docker镜像构建目录如下:
image.png
执行构建命令:

docker build -t 10.10.10.91:5000/registry/selectdb/doris:2.1.4-be-x86_64 .

推送至镜像仓库:

docker push 10.10.10.91:5000/registry/selectdb/doris:2.1.4-be-x86_64

部署

创建Config挂载doris配置

data:
  be.conf: >-
    PPROF_TMPDIR="$DORIS_HOME/log/"
    sys_log_level = ERROR
    JAVA_OPTS="-XX:+UseG1GC"
    be_port = 9060
    webserver_port = 8040
    heartbeat_service_port = 9050
    brpc_port = 8060
    #Specify node type as calculation node
    be_node_role=computation
    priority_networks = 10.244.0.0/16
    enable_file_cache = true
    file_cache_path = [{"path": "/tmp/filecache","total_size":53687091200,"query_limit": "10737418240"}]
kind: ConfigMap
metadata:
  name: doris-cn0-conf
  namespace: default

部署计算节点

---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: doris-cn-cluster0
  namespace: default
spec:
  podManagementPolicy: OrderedReady
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: doris-cn-cluster0
  serviceName: doris-cn-cluster0
  template:
    metadata:
      labels:
        app: doris-cn-cluster0
      name: doris-cn-cluster0
    spec:
      containers:
        - env:
            - name: BUILD_TYPE
              value: k8s
            - name: FE_SERVERS
              value: 'fe1:10.10.10.151:9010'
            - name: FE_PASSWORD
              value: 'dorispwd'
            - name: QUERY_PORT
              value: '9030'
            - name: TZ
              value: Asia/Shanghai
          image: '10.10.10.91:5000/registry/selectdb/doris:2.1.4-be-x86_64'
          imagePullPolicy: Always
          lifecycle:
            preStop:
              exec:
                command:
                  - bash
                  - '-c'
                  - /opt/selectdb-doris/be/bin/unregister_be.sh
          name: doris-cn-cluster0
          ports:
            - containerPort: 9060
              name: be-port
              protocol: TCP
            - containerPort: 8040
              name: webserver-port
              protocol: TCP
            - containerPort: 9050
              name: heartbeat-port
              protocol: TCP
            - containerPort: 8060
              name: brpc-port
              protocol: TCP
          resources:
            limits:
              cpu: '4'
              memory: 8Gi
            requests:
              cpu: '1'
              memory: 2Gi
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
          volumeMounts:
            - mountPath: /opt/selectdb-doris/be/conf
              name: conf
            - mountPath: /etc/pki
              name: sys
              readOnly: true
      dnsPolicy: ClusterFirst
      imagePullSecrets:
        - name: harbor
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 30
      volumes:
        - configMap:
            defaultMode: 420
            name: doris-cn0-conf
          name: conf
        - hostPath:
            path: /etc/pki
            type: ''
          name: sys
  updateStrategy:
    rollingUpdate:
      partition: 0
    type: RollingUpdate
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app: doris-cn-cluster0
  name: doris-cn-cluster0
  namespace: default
spec:
  clusterIP: None
  clusterIPs:
    - None
  internalTrafficPolicy: Cluster
  ipFamilies:
    - IPv4
  ipFamilyPolicy: SingleStack
  ports:
    - name: be-port
      port: 9060
      protocol: TCP
      targetPort: 9060
    - name: webserver-port
      port: 8040
      protocol: TCP
      targetPort: 8040
    - name: heartbeat-port
      port: 9050
      protocol: TCP
      targetPort: 9050
    - name: brpc-port
      port: 8060
      protocol: TCP
      targetPort: 8060
  selector:
    app: doris-cn-cluster0
  sessionAffinity: None
  type: ClusterIP

开启HPA

---
apiVersion: autoscaling/v2beta2
kind: HorizontalPodAutoscaler
metadata:
  name: doris-cn-cluster0
  namespace: default
spec:
  maxReplicas: 5
  metrics:
    - resource:
        name: memory
        target:
          averageUtilization: 85
          type: Utilization
      type: Resource
  minReplicas: 3
  scaleTargetRef:
    apiVersion: apps/v1
    kind: StatefulSet
    name: doris-cn-cluster0
1 Answers

很用心的实现,棒!不过对于开箱即用的小白使用门槛有些高,当前的2.0版本的cn 节点是一个实验性产物,并不是完全的存算分离。如果想使用存算分离的话,可以看下doris最新的3.0的版本,以及后续发布的 doris 存算分离版本的 operator