yarn配置标签调度

yarn配置标签调度

实现的目标(测试使用的是只有2个NodeManager节点):新建两个标签normal、highmem,配置两个队列dev、prd,其中dev最多使用集群50%的资源,prd可使用集群100%的资源

按照以下修改完配置,重启yarn,再新建标签、给机器添加标签
新建标签
yarn rmadmin -addToClusterNodeLabels “normal,highmem”

给机器添加标签
yarn rmadmin -replaceLabelsOnNode “主机名01:45454=normal 主机名02:45454=highmem”

配置可参考如下
一、在yarn-site.xml中新增以下配置

        <property>
                <name>yarn.nodemanager.address</name>
                <value>0.0.0.0:45454</value>
        </property>
        <property>
                <name>yarn.node-labels.enabled</name>
                <value>true</value>
        </property>
        <property>
                <name>yarn.node-labels.fs-store.root-dir</name>
                <value>/user/node-label</value>
        </property>
        <property>
                <name>yarn.resourcemanager.scheduler.class</name>
                <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
        </property>
        <property>
                <name>yarn.node-labels.manager-class</name>
                <value>org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager</value>
        </property>

二、修改capacity-scheduler.xml

<configuration>

  <property>
    <name>yarn.scheduler.capacity.maximum-applications</name>
    <value>10000</value>
    <description>
      Maximum number of applications that can be pending and running.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.8</value>
    <description>
      Maximum percent of resources in the cluster which can be used to run
      application masters i.e. controls number of concurrent running
      applications.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.resource-calculator</name>
    <value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
    <description>
      The ResourceCalculator implementation to be used to compare
      Resources in the scheduler.
      The default i.e. DefaultResourceCalculator only uses Memory while
      DominantResourceCalculator uses dominant-resource to compare
      multi-dimensional resources such as Memory, CPU etc.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>dev,prd</value>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.dev.capacity</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.capacity</name>
    <value>50</value>
  </property>
    <property>
    <name>yarn.scheduler.capacity.root.dev.maximum-capacity</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.maximum-capacity</name>
    <value>100</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.accessible-node-labels</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.dev.accessible-node-labels</name>
    <value>normal</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.accessible-node-labels</name>
    <value>highmem</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.accessible-node-labels.normal.capacity</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.accessible-node-labels.highmem.capacity</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.dev.accessible-node-labels.normal.capacity</name>
    <value>100</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.accessible-node-labels.highmem.capacity</name>
    <value>100</value>
  </property>
 <property>
    <name>yarn.scheduler.capacity.root.dev.default-node-label-expression</name>
    <value>normal</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.default-node-label-expression</name>
    <value>highmem</value>
  </property>
    <property>
    <name>yarn.scheduler.capacity.root.dev.state</name>
    <value>RUNNING</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.state</name>
    <value>RUNNING</value>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.dev.acl_submit_applications</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.acl_submit_applications</name>
    <value>*</value>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.dev.acl_administer_queue</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.prd.acl_administer_queue</name>
    <value>*</value>
  </property>
    <property>
    <name>yarn.scheduler.capacity.node-locality-delay</name>
    <value>2</value>
    <description>
      Number of missed scheduling opportunities after which the CapacityScheduler
      attempts to schedule rack-local containers.
      Typically this should be set to number of nodes in the cluster, By default is setting
      approximately number of nodes in one rack which is 40.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings</name>
    <value></value>
    <description>
      A list of mappings that will be used to assign jobs to queues
      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
      Typically this list will be used to map users to queues,
      for example, u:%user:%user maps all users to queues with the same name
      as the user.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
    <value>false</value>
    <description>
      If a queue mapping is present, will it override the value specified
      by the user? This can be used by administrators to place jobs in queues
      that are different than the one specified by the user.
      The default is false.
    </description>
  </property>

</configuration>

猜你喜欢

转载自blog.csdn.net/xy908325/article/details/84984182