本文通过python代码调用Yarn restfull API ，将各队列的vcore和memory定时采集存储到TIDB，然后使用Apache Zeppelin JDBC读取数据，简单绘制vcore和memory使用率的趋势图。可以观察集群vcore和memory的分配情况，防止其中一项成为集群或者队列的性能瓶颈。

一、python代码：

import json
import urllib2
import datetime
# yarn rest api:
# http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Writeable_APIs

def getActiveRN(master1,master2):
    activemaster=""
    response=urllib2.urlopen("http://"+master1+"/ws/v1/cluster/info")
    jsonstring=response.read()
    print(jsonstring)
    j1=json.loads(jsonstring)
    print(master1 +" resourcemanager state is :"+j1['clusterInfo']['haState'])

    response=urllib2.urlopen("http://"+master2+"/ws/v1/cluster/info")
    jsonstring=response.read()
    print(jsonstring)
    j2=json.loads(jsonstring)
    print(master2 +" resourcemanager state is :"+j2['clusterInfo']['haState'])

    if j1['clusterInfo']['haState']=='ACTIVE':
        print("active master is "+master1)
        activemaster=master1
    elif j2['clusterInfo']['haState']=='ACTIVE':
        print("active master is "+master2)
        activemaster=master2
    else :
        raise Exception("on active resourcemanger in %s,%s "%(master1,master2))
    return activemaster


def getClusterScheduler(activeResourceManger):
    response=urllib2.urlopen("http://"+master1+"/ws/v1/cluster/scheduler")
    jsonstring=response.read()
    print(jsonstring)
    jsonarray=json.loads(jsonstring)
    print(jsonarray)
    return jsonarray


def getQueueInfo(queuename,ClusterScheduler):
    jsonarray=ClusterScheduler['scheduler']['schedulerInfo']['queues']['queue']
    print("**************** %s scheduler info :%s"%(queuename,jsonarray))

    print("*********************************************************")

    print("**************** %s scheduler1 info :%s"%(queuename,jsonarray[1]))
    for i in range(0,len(jsonarray)):
        if(jsonarray[i]['queueName']==queuename):
            print("find queuename:%s info %s"%(queuename,jsonarray[i]))
            return jsonarray[i]

def findSubQueueInfo(queuename,parenetClusterScheduler):
    print("*********************begin findSubQueueInfo:%s**********"%queuename)
    jsonarray=parenetClusterScheduler['queues']['queue']
    for i in range(0,len(jsonarray)):
        if(jsonarray[i]['queueName']==queuename):
          print("*********************finish findSubQueueInfo:%s**********" % queuename)
          return jsonarray[i]

def clusteMetrics(activeResourceManger):
    response = urllib2.urlopen("http://" + activeResourceManger + "/ws/v1/cluster/metrics")
    jsonstring = response.read()
    jsonarray = json.loads(jsonstring)
    return jsonarray


if __name__ == "__main__":
    master1="192.168.240.1:8088"
    master2="192.168.240.2:8088"
    parenetQueue=['sto','dm','bd','event']
    bdchildrenQueue = ['vip', 'tenhive', 'AthenaSysService', 'default']
    activemaster=getActiveRN(master1,master2)
    clustemetrics=clusteMetrics(activemaster)
    clustemetrics['clusterMetrics']['totalVirtualCores']

    allvcore = clustemetrics['clusterMetrics']['totalVirtualCores']
    allmemory = clustemetrics['clusterMetrics']['totalMB']

    clusterScheduler=getClusterScheduler(activemaster)
    bd=getQueueInfo('bd',clusterScheduler)
    #defaultQueueInfo=findSubQueueInfo('default', bd)
    #print(defaultQueueInfo['resourcesUsed']['vCores'])

    currentAllvcore=0
    currentAllmemory=0
    currentAllvcorePercentage=0.0
    currentAllmemoryPercentage=0.0
    fo = open("QueueInfo.txt", "a+")
    now_time='\''+datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'\''

    for i in parenetQueue:
        queueInfo = getQueueInfo( i, clusterScheduler)
        currentAllvcore=currentAllvcore +queueInfo['resourcesUsed']['vCores']
        currentAllvcorePercentage=currentAllvcorePercentage+(queueInfo['resourcesUsed']['vCores']*1.0/allvcore)
        currentAllmemory=currentAllmemory +queueInfo['resourcesUsed']['memory']
        currentAllmemoryPercentage=currentAllmemoryPercentage +queueInfo['resourcesUsed']['memory']*1.0/allmemory
        queuename='\'root.'+i+'\''
        fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
                 "(curr_time,queuename,currentAllvcore,currentAllmemory,"
                 "currentAllvcorePercentage,currentAllmemoryPercentage) "
                 "VALUES (%s,%s,%s,%s,%s,%s);\n" % (
            now_time,queuename,currentAllvcore, currentAllmemory, currentAllvcorePercentage, currentAllmemoryPercentage)
                 )



    for i in bdchildrenQueue :
        queueInfo = findSubQueueInfo(i, bd)
        currentAllvcore = currentAllvcore + queueInfo['resourcesUsed']['vCores']
        currentAllvcorePercentage = currentAllvcorePercentage + (queueInfo['resourcesUsed']['vCores'] * 1.0 / allvcore)
        currentAllmemory = currentAllmemory + queueInfo['resourcesUsed']['memory']
        currentAllmemoryPercentage = currentAllmemoryPercentage + queueInfo['resourcesUsed']['memory'] * 1.0 / allmemory
        queuename = '\'root.bd.' + i + '\''
        fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
                 "(curr_time,queuename,currentAllvcore,currentAllmemory,"
                 "currentAllvcorePercentage,currentAllmemoryPercentage) "
                 "VALUES (%s,%s,%s,%s,%s,%s);\n" % (
                     now_time, queuename, currentAllvcore, currentAllmemory, currentAllvcorePercentage,
                     currentAllmemoryPercentage)
                 )


    print("currentAllvcore = %s"%currentAllvcore)
    print("currentAllallmemory = %s"%currentAllmemory)
    print("currentAllvcorePercentage = %s"%currentAllvcorePercentage)
    print("currentAllallmemoryPercentage = %s"%currentAllmemoryPercentage)


    fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
             "(curr_time,queuename,currentAllvcore,currentAllmemory,"
             "currentAllvcorePercentage,currentAllmemoryPercentage) "
             "VALUES (%s,'root',%s,%s,%s,%s);\n"%(now_time,currentAllvcore,currentAllmemory,currentAllvcorePercentage,currentAllmemoryPercentage)
            )
    fo.close()

2、zeppelin TIDB jdbc 配置

创建TIDB 表并设置zeppelin jdbc 连接参数：

CREATE TABLE `yarn_vcore_memory_monitor` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `curr_time` datetime NOT NULL,
  `queuename` varchar(50) DEFAULT NULL,
  `currentAllvcore` int(11) DEFAULT NULL,
  `currentAllmemory` int(11) DEFAULT NULL,
  `currentAllvcorePercentage` double DEFAULT NULL,
  `currentAllmemoryPercentage` double DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001;

3、zeppelin查看数据

SELECT id, currentAllvcorePercentage, currentAllmemoryPercentage
FROM yarn_monitor.yarn_vcore_memory_monitor
WHERE curr_time < DATE_FORMAT(date_add(curdate(), INTERVAL 1 DAY), '%Y-%m-%d %H:%i:%S') #明天早上
	AND curr_time > DATE_FORMAT(curdate(), '%Y-%m-%d %H:%i:%S');#今天早上

观察下面vcore和memory使用百分比趋势图，如果其中一项成为性能瓶颈，需要增加相应队列的资源。

Python采集Yarn Queue信息

一、python代码：

2、zeppelin TIDB jdbc 配置

猜你喜欢