Kubernetes运行hive(五)

目的:

hive 可以基于hadoop mr或者Spark进行高层次的数据处理

前提:

安装之前先要保证hadoop的目录可以为root用户读写:

hadoop fs -chown root:root /

1: 构建镜像

下载依赖

mkdir -p hive/image-build

cd hive/image-build

wget http://download.oracle.com/otn-pub/java/jdk/8u181-b13/96a7b8442fe848ef90c96a2fad6ed6d1/jdk-8u181-linux-x64.tar.gz

wget http://mirror.bit.edu.cn/apache/hive/hive-2.3.3/apache-hive-2.3.3-bin.tar.gz

download mysql-connector-java-5.1.47.tar.gz

tar -zxvf mysql-connector-java-5.1.47.tar.gz

wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.9.1/hadoop-2.9.1.tar.gz

构建hive镜像

Dockerfile

FROM centos:7.5.1804

ADD jdk-8u181-linux-x64.tar.gz /opt

ADD hadoop-2.9.1.tar.gz /opt

ADD apache-hive-3.0.0-bin.tar.gz /opt

RUN yum install -y which && mv /opt/apache-hive-3.0.0-bin /opt/apache-hive-3.0.0

ADD mysql-connector-java-5.1.47.jar /opt/apache-hive-3.0.0/lib

ENV JAVA_HOME /opt/jdk1.8.0_181

ENV HADOOP_HOME /opt/hadoop-2.9.1

ENV HADOOP_CONF_DIR /opt/hadoop-2.9.1/etc/hadoop

ENV HIVE_HOME /opt/apache-hive-3.0.0

ENV PATH $JAVA_HOME/bin:$PATH

构建

docker build -t hive .

docker tag hive xxx.xxx.xxx.xxx:5000/hive

docker push xxx.xxx.xxx.xxx:5000/hive

2: 构建DBtool镜像

安装gradle 并编译DB tool

git clone https://github.com/chenlein/database-tools.git

cd database-tools/

unzip gradle-4.10.2-bin.zip

mkdir /opt/gradle

mv gradle-4.10.2 /opt/gradle/

edit build.gradle to remove dm driver

"compile group: 'dm', name: 'Dm7JdbcDriver', version: '7.1', classifier: 'jdk17-20170808'"

add in /etc/profile

export PATH=.:/opt/gradle/gradle-4.10.2/bin:$PATH

gradle --version

gradle build

ls build/distributions/database-tools-1.0-SNAPSHOT.tar

cp build/distributions/database-tools-1.0-SNAPSHOT.tar ./

DockerFile

FROM java:8
CMD ["mkdir", "-p", "/root/db_tools"]
WORKDIR /root/db_tools
ADD database-tools-1.0-SNAPSHOT.tar .
RUN ["chmod", "+x", "./database-tools-1.0-SNAPSHOT/bin/database-tools"]
CMD ["./database-tools-1.0-SNAPSHOT/bin/database-tools"]

编译镜像

docker build -t database-tools:1.0-SNAPSHOT .

docker tag database-tools:1.0-SNAPSHOT 172.2.2.11:5000/database-tools:1.0-SNAPSHOT

docker push 172.2.2.11:5000/database-tools:1.0-SNAPSHOT

3: 部署hive

1)部署local volume 用于hive部署的PVC

local-volumes.yaml

apiVersion: v1
kind: PersistentVolume
metadata:
  name: hive-data-1
  labels:
    type: local
    app: hive
spec:
  capacity:
    storage: 100Gi
  accessModes:
    - ReadWriteOnce
  hostPath:
    path: /home/hive/data1
  persistentVolumeReclaimPolicy: Recycle

2)部署mysql服务

mysql.yaml

apiVersion: v1
kind: Secret
metadata:
  name: hive-metadata-mysql-secret
  labels:
    app: hive-metadata-mysql
type: Opaque
data:
  mysql-root-password: RGFtZW5nQDc3Nw==
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: hive-metadata-mysql
  name: hive-metadata-mysql
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: hive-metadata-mysql
  template:
    metadata:
      labels:
        app: hive-metadata-mysql
    spec:
      initContainers:
        - name: remove-lost-found
          image: busybox:1.29.2
          imagePullPolicy: IfNotPresent
          command: ["rm", "-rf", "/var/lib/mysql/lost+found"]
          volumeMounts:
            - name: data
              mountPath: /var/lib/mysql
      containers:
        - name: mysql
          image: mysql:5.7
          volumeMounts:
            - name: data
              mountPath: /var/lib/mysql
          ports:
            - containerPort: 3306
              protocol: TCP
          env:
            - name: MYSQL_ROOT_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: hive-metadata-mysql-secret
                  key: mysql-root-password
      volumes:
        - name: data
          emptyDir: {}
---
kind: Service
apiVersion: v1
metadata:
  labels:
    app: hive-metadata-mysql
  name: hive-metadata-mysql-service
spec:
  ports:
    - name: tcp
      port: 3306
      targetPort: 3306
  selector:
    app: hive-metadata-mysql
  type: NodePort

3)部署hive配置

hive-config.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: hive-custom-config-cm
  labels:
    app: hive
data:
  bootstrap.sh: |-
    #!/bin/bash
    set -x
    cd /root/bootstrap
    # Apply custom config file context
    for cfg in ./*; do
      if [[ ! "$cfg" =~ bootstrap.sh ]]; then
        echo $cfg
        cat $cfg
        cat $cfg > $HIVE_HOME/conf/${cfg##*/}
      fi
    done
    # Replace hive metadata password
    sed -i 's/${HIVE_METADATA_PASSWORD}/'$HIVE_METADATA_PASSWORD'/g' `grep '${HIVE_METADATA_PASSWORD}' -rl $HIVE_HOME/conf`
    # initSchema
    echo "step 1"
    #if [[ ! -e $HADOOP_CONF_DIR/hive-metastore-initialization.out ]]; then
      echo "step 2"
      $HADOOP_HOME/bin/hadoop fs -mkdir -p hdfs://172.2.2.11:9000/tmp
      $HADOOP_HOME/bin/hadoop fs -mkdir -p hdfs://172.2.2.11:9000/user/hive/warehouse
      $HADOOP_HOME/bin/hadoop fs -chmod g+w hdfs://172.2.2.11:9000/tmp
      $HADOOP_HOME/bin/hadoop fs -chmod g+w hdfs://172.2.2.11:9000/user/hive/warehouse
      $HIVE_HOME/bin/schematool -dbType mysql -initSchema --verbose &> $HADOOP_CONF_DIR/hive-metastore-initialization.out
    #fi
    echo "step 3"
    $HIVE_HOME/bin/hiveserver2 &
    $HIVE_HOME/bin/hive --service metastore &
    cp $HIVE_HOME/conf/hive-env.sh.template $HIVE_HOME/conf/hive-env.sh && echo "export HADOOP_CLIENT_OPTS=\"-Xmx512m -XX:MaxPermSize=1024m \$HADOOP_CLIENT_OPTS\"" >> $HIVE_HOME/conf/hive-env.sh
    # keep running
    sleep infinity

hive-site.xml

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
      <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
      </property>
      <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>${HIVE_METADATA_PASSWORD}</value>
      </property>
      <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://hive-metadata-mysql-service:3306/metastore?createDatabaseIfNotExist=true&useSSL=false</value>
      </property>
      <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
      </property>
      <property>
        <name>system:java.io.tmpdir</name>
        <value>/tmp</value>
      </property>
      <property>
        <name>system:user.name</name>
        <value>hive</value>
      </property>
      <property>
        <name>hive.server2.authentication</name>
        <value>NOSASL</value>
      </property>
      <property>
        <name>hive.metastore.schema.verification</name>
        <value>false</value>
      </property>
      <property>
        <name>datanucleus.fixedDatastore</name>
        <value>false</value>
      </property>
      <property>
        <name>datanucleus.autoCreateSchema</name>
        <value>true</value>
      </property>
      <property>
        <name>datanucleus.autoCreateTables</name>
        <value>true</value>
      </property>
      <property>
        <name>datanucleus.autoCreateColumns</name>
        <value>true</value>
      </property>
      <property>
        <name>datanucleus.schema.autoCreateAll</name>
        <value>true</value>
        <description>creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once</description>
      </property>
    </configuration>

4)部署hive

hive-deploy.yaml


apiVersion: v1
kind: ConfigMap
metadata:
  name: hive-metastore-database
  labels:
    app: hive
data:
  execute.sql: |-
    -- create database
    CREATE DATABASE metastore DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
    -- create user and grant authorization
    GRANT ALL ON metastore.* TO 'hive'@'%' IDENTIFIED BY '${IDENTIFIED}';
---
apiVersion: v1
kind: Secret
metadata:
  name: hive-metastore-secret
  labels:
    app: hive
type: Opaque
data:
  database-dba-password: RGFtZW5nQDc3Nw==
  database-user-password: RGFtZW5nQDc3Nw==
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hive
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: hive
  template:
    metadata:
      labels:
        app: hive
    spec:
      nodeName: k8s-node-01
      initContainers:
        - name: init-dababase
          image:  172.2.2.11:5000/database-tools:1.0-SNAPSHOT
          env:
            - name: DRIVER_NAME
              value: "com.mysql.jdbc.Driver"
            - name: URL
              value: "jdbc:mysql://hive-metadata-mysql-service:3306/mysql?useUnicode=true&characterEncoding=utf8&useSSL=false"
            - name: USERNAME
              value: "root"
            - name: PASSWORD
              valueFrom:
                secretKeyRef:
                  name: hive-metastore-secret
                  key: database-dba-password
            - name: IDENTIFIED
              valueFrom:
                secretKeyRef:
                  name: hive-metastore-secret
                  key: database-user-password
          volumeMounts:
            - name: init-dababase-volume
              mountPath: /root/db_tools/script
      containers:
        - name: hive
          image: 172.2.2.11:5000/hive
          command: ["bash", "-c", "cp /root/bootstrap/bootstrap.sh /tmp/ && chmod +x /tmp/bootstrap.sh && /tmp/bootstrap.sh"]
          ports:
            - containerPort: 10000
            - containerPort: 10002
            - containerPort: 9083
          env:
            - name: HADOOP_CONF_DIR
              value: /opt/hadoop-2.9.1/etc/hadoop
            - name: HIVE_METADATA_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: hive-metastore-secret
                  key: database-user-password
          volumeMounts:
            - name: hadoop-config-volume
              mountPath: /etc/hadoop
            - name: hive-custom-config-volume
              mountPath: /root/bootstrap
          readinessProbe:
            initialDelaySeconds: 20
            periodSeconds: 5
            tcpSocket:
              port: 10000
      volumes:
        - name: hadoop-config-volume
          persistentVolumeClaim:
            claimName: hive-data
        - name: hive-custom-config-volume
          configMap:
            name: hive-custom-config-cm
        - name: init-dababase-volume
          configMap:
            name: hive-metastore-database
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: hive-data
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 100Gi
---
kind: Service
apiVersion: v1
metadata:
  labels:
    app: hive
  name: hive-service
spec:
  ports:
    - port: 10000
      targetPort: 10000
      name: thrift
    - port: 10002
      targetPort: 10002
      name: webui
    - port: 9083
      targetPort: 9083
      name: metastore
  selector:
    app: hive
  type: NodePort

4: 查看部署结果

Pod

kubectl get pods | grep hive

service

kubectl get svc | grep hive-service

5:运行

kubectl exec -it hive-xxxxx bash

发布了117 篇原创文章 · 获赞 24 · 访问量 11万+

猜你喜欢

转载自blog.csdn.net/CodeAsWind/article/details/104624945