HBase读写实例

1.基于python,需要thrift

2.将不同的手机号段的MD5值存入hbase,提供md5彩虹板

3.查询速度检测

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import hashlib,time
from thrift.transport.TSocket import TSocket
from thrift.transport.TTransport import TBufferedTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
import threading
from hbase.ttypes import *

def get_token(phone_number):
    md5str = hashlib.md5(phone_number).hexdigest()
    #print md5str,len(md5str)
    return md5str
def putHbaseData(tableName):
    print "putHbaseData---tableName:",tableName
    host = 'localhost'
    port = 9090
    cf = "cf"
    phone_col = "phoneNumber"
    transport = TBufferedTransport(TSocket(host, port))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    nameLists = client.getTableNames()
    if tableName not in nameLists:
        print "create table:",tableName
        contents = ColumnDescriptor(name=cf + ":", maxVersions=1)
        client.createTable(tableName, [contents])
    else:
        print "nametable already exists:",tableName

    minNumInt = int(tableName + "00000000")
    maxNumInt = int(str(int(tableName) + 1) + "00000000")
    #maxNumInt = int(str(int(tableName) ) + "00000010")

    #print "put data to table:", tableName
    while minNumInt < maxNumInt:
        md5Str = get_token(str(minNumInt))
        mutations = [Mutation(column=cf + ":" + phone_col, value=str(minNumInt))]
        #print tableName,md5Str,minNumInt
        client.mutateRow(tableName, md5Str, mutations)
        minNumInt += 1
    transport.close()
def getHbaseData(tableName,rowKey):
    host = 'localhost'
    port = 9090
    cf = "cf"
    col = "phoneNumber"
    transport = TBufferedTransport(TSocket(host, port))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    result = client.getRow(tableName, rowKey)
    print result
    for r in result:
        print 'the row is ', r.row
        print 'the values is ', r.columns.get(cf+":"+col).value
    transport.close()

def run():
    #prePhoneStr = "133/153/180/181/189/177/130/131/132/155/156/185/186/145/176/134/135/136/137/138/139/150/151/152/157/158/159/182/183/184/187/188/147/178"
    prePhoneStr = "139/175"
    prePhoneNumbers=prePhoneStr.split("/")
    threads = []
    print "prePhoneNumbers:",prePhoneNumbers
    for tableName in prePhoneNumbers:
        threadId = threading.Thread(target=putHbaseData, args=(tableName,),name="threadName"+tableName)
        threads.append(threadId)
    print "threads:",threads
    print "AAAAAAA"
    for t in threads:
        print "BBBBBBB",t
        t.setDaemon(True)
        t.start()
    for t in threads:
        t.join()
    print "Exiting Main Thread"

if __name__ == '__main__':
    print "start ....."
    start_time = time.time()
    run()
    end_time = time.time()
    t = end_time - start_time
    print "脚本用时:", t

发布了114 篇原创文章 · 获赞 18 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/WangYouJin321/article/details/100259093