python 连接 hive数据库环境搭建

首先需要安装以下Python 包:(我用的是Python 2)

在安装Python包之前需要安装一些依赖工具:

Debian/Ubuntu: apt-get install python-dev libsasl2-dev gcc
CentOS/RHEL: yum install gcc-c++ python-devel.x86_64 cyrus-sasl-devel.x86_64 mysql-devel
bitarray-0.8.3.tar.gz
impyla-0.14.0.tar.gz
MySQL-python-1.2.5.zip

新建HiveHelper.py
# -*- coding: utf-8 -*-
from impala.dbapi import connect

class HiveHelper(object):
    def __init__(self):
        self.host = ''
        self.port = 
        self.database = ''
        self.user = ''
        self.password = ''
        self.auth_mechanism = ''
        self.connect()

    def connect(self):
        """ connect
        Args:
            null
        Returns:
            null
        """
        self.conn = connect(
            host=self.host,
            port=self.port,
            database=self.database,
            user=self.user,
            password=self.password,
            auth_mechanism=self.auth_mechanism)
        self.cursor = self.conn.cursor()

    def close(self):
        """ close
        Args:
            null
        Returns:
            null
        """
        self.cursor.close()
        self.conn.close()

    def query(self, sql):
        """ query
        Args:
            sql
        Returns:
            sql result
        """
        result = ()
        try:
            self.cursor.execute(sql)
            result = self.cursor.fetchall()
        except Exception as e:
            print e.message
        return result

    def execute(self, sql):
        try:
            self.cursor.execute(sql)
            self.conn.commit()
        except:
            self.conn.rollback()

  

新建HiveSearch.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

import os,sys
import re
import json
import math
import time
import operator
import urlparse
import threading
import HiveHelper
import requests
import calendar
import datetime


class PcdnSearch:
    def __init__(self):
        self.sql = HiveHelper.HiveHelper()
        self.punch_data = {}
        self.flow_data = {}

    def get_flow_data(self, time, clientid):
        sql = ""

     #   print sql 
        try:
            result = self.sql.query(sql)
            for item in result:
                guid = item[1]
                flow = item[3]
                band = round(float(flow)*8/1000/1000/3600*1.1, 2)
                self.flow_data[guid] = (flow, band)
        except Exception, e:
            pass

    def get_punch_data(self, time, clientid):
        sql = ""



    def get_result(self):
        


if __name__ == '__main__':
    if len(sys.argv) == 3:
        run_time = sys.argv[1]
        client_id = int(sys.argv[2])
    else:
        print 'param error'
        exit(0) 
    cb = PcdnSearch() 
    cb.get_punch_data(run_time, client_id)
    cb.get_flow_data(run_time, client_id)
    cb.get_result()

  





猜你喜欢

转载自www.cnblogs.com/sweetsunnyflower/p/11319993.html
今日推荐