一.写一个简单的Yarn应用
1.Yarn Client
// Client通过RPC获取app id
// 初始化并启动YarnClient
YarnClient yarnClient = YarnClient.createYarnClient();
Configuration conf = new Configuration();
yarnClient.init(conf);
yarnClient.start();
// 使用YarnClient创建Application并获取application id
YarnClientApplication app = yarnClient.createApplication();
// appResponse中包含了集群的信息,例如集群中资源的最大最小值等
GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
// 得到APP ID
ApplicationId appId = appContext.getApplicationId();
/** 1.使用Client的一大难点就是设置ApplicationSubmissionContext对象。
* 该对象包含了RM启动AM需要的所有信息。包括:app id,app name,queue,priporty,user info。
* 2.ContainerLaunchContext对象包含用来运行AM的容器信息。
* 包括:本机资源(如:jar,输入文件等),环境设置(如:ClassPath等),待执行的命令和安全Tokens。
*/
// 设置应用名字等基本信息
ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
appContext.setApplicationName(appName);
//设置本地资源、日志配置
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
FileSystem fs = FileSystem.get(conf);
addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(),
localResources, null);
if (!log4jPropFile.isEmpty()) {
addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(),
localResources, null);
}
//shell脚本必须可以在containers上可用,因此,我们需要copy 一份到HDFS文件系统让Yarn框架可见
//为了让任务container能够访问到,应当将该资源设置成Yarn集群资源。
//不需要设置成AM的本地资源,AM不需要它。
String hdfsShellScriptLocation = "";
long hdfsShellScriptLen = 0;
long hdfsShellScriptTimestamp = 0;
if (!shellScriptPath.isEmpty()) {
Path shellSrc = new Path(shellScriptPath);
String shellPathSuffix =
appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
Path shellDst =
new Path(fs.getHomeDirectory(), shellPathSuffix);
fs.copyFromLocalFile(false, true, shellSrc, shellDst);
hdfsShellScriptLocation = shellDst.toUri().toString();
FileStatus shellFileStatus = fs.getFileStatus(shellDst);
hdfsShellScriptLen = shellFileStatus.getLen();
hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
}
if (!shellCommand.isEmpty()) {
addToLocalResources(fs, null, shellCommandPath, appId.toString(),
localResources, shellCommand);
}
if (shellArgs.length > 0) {
addToLocalResources(fs, null, shellArgsPath, appId.toString(),
localResources, StringUtils.join(shellArgs, " "));
}
// 设置Application Master运行的环境变量
Map<String, String> env = new HashMap<String, String>();
// 使用env info将shell脚本放入env,AM会为containers创建本地资源,让shell执行
env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
// 设置jar信息
StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
for (String c : conf.getStrings(
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
classPathEnv.append(c.trim());
}
classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append(
"./log4j.properties");
// 设置命令参数
Vector<CharSequence> vargs = new Vector<CharSequence>(30);
// 设置java可执行命令
LOG.info("Setting up app master command");
vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
// 设置AM内存大小
vargs.add("-Xmx" + amMemory + "m");
// 设置类路径
vargs.add(appMasterMainClass);
// 设置AM参数
vargs.add("--container_memory " + String.valueOf(containerMemory));
vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
vargs.add("--num_containers " + String.valueOf(numContainers));
vargs.add("--priority " + String.valueOf(shellCmdPriority));
for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
}
if (debugFlag) {
vargs.add("--debug");
}
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");
// 获取最终命令
StringBuilder command = new StringBuilder();
for (CharSequence str : vargs) {
command.append(str).append(" ");
}
LOG.info("Completed setting up app master command " + command.toString());
List<String> commands = new ArrayList<String>();
commands.add(command.toString());
// 设置AM containerContext
ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(
localResources, env, commands, null, null, null);
// 创建必须资源类型,如:memory,vcores
Resource capability = Resource.newInstance(amMemory, amVCores);
appContext.setResource(capability);
// service data是一个二进制blob,会将数据传给app
// amContainer.setServiceData(serviceData);
// 创建安全tokens
if (UserGroupInformation.isSecurityEnabled()) {
// Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
Credentials credentials = new Credentials();
String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
if (tokenRenewer == null | | tokenRenewer.length() == 0) {
throw new IOException(
"Can't get Master Kerberos principal for the RM to use as renewer");
}
// 只得到默认文件系统tokens
final Token<?> tokens[] =
fs.addDelegationTokens(tokenRenewer, credentials);
if (tokens != null) {
for (Token<?> token : tokens) {
LOG.info("Got dt for " + fs.getUri() + "; " + token);
}
}
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
amContainer.setTokens(fsTokens);
}
// 设置上AM Container
appContext.setAMContainerSpec(amContainer);
//设置app优先级
Priority pri = Priority.newInstance(amPriority);
appContext.setPriority(pri);
// 设置app在Yarn中的queue
appContext.setQueue(amQueue);
// 提交app
yarnClient.submitApplication(appContext);
在提交app之后,RM接收Job,并为AM分配Container,然后设置环境并在Container中开启AM。客户端可用通过app id获得应用运行信息。
ApplicationReport report = yarnClient.getApplicationReport(appId);
当然,如果某个应用太消耗资源或运行时间过长,希望杀死它。可以通过YarnClient的killApplication方法杀死应用。实际上是通知RM给AM发送kill命令。
yarnClient.killApplication(appId);
2.Yarn AM
-
AM是Job实际拥有者,通过YarnClient设置各种参数,由RM启动它。
-
由于AM也运行在Container中,不能对使用的端口做假设。只能使用预定义端口8888,该端口可能被其他应用占用了。
-
当AM启动后,AM可以查询到自身所在的NodeManager信息和其他环境参数。
-
AM和RM通信需要携带ApplicationAttemptId参数。
// 获取ApplicationAttemptId
Map<String, String> envs = System.getenv();
String containerIdString =
envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV);
if (containerIdString == null) {
// container id should always be set in the env by the framework
throw new IllegalArgumentException(
"ContainerId not set in the environment");
}
ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
// AM初始化完成后,可以启动两个AM客户端分别与RM和NM通信。设置事件处理函数用于处理各种事件。
AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
amRMClient.init(conf);
amRMClient.start();
containerListener = createNMCallbackHandler();
nmClientAsync = new NMClientAsyncImpl(containerListener);
nmClientAsync.init(conf);
nmClientAsync.start();
// AM需要通过heartbeats通知RM,AM还活跃着。注册AM到RM。
appMasterHostname = NetUtils.getHostname();
RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort,
appMasterTrackingUrl);
// RM heartbeat response中包含了一些集群中现状信息
// 集群当前最大可用内存、虚拟cpu核数,之前AM运行的Container数目
int maxMem = response.getMaximumResourceCapability().getMemory();
int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
List<Container> previousAMRunningContainers =
response.getContainersFromPreviousAttempts();
// AM可以请求多个Containers,用来运行特定的Job
List<Container> previousAMRunningContainers =
response.getContainersFromPreviousAttempts();
LOG.info("Received " + previousAMRunningContainers.size()
+ " previous AM's running containers on AM registration.");
int numTotalContainersToRequest =
numTotalContainers - previousAMRunningContainers.size();
for (int i = 0; i < numTotalContainersToRequest; ++i) {
// 在setupContainerAskForRM中,你需要设置:memory,vcores;优先级
ContainerRequest containerAsk = setupContainerAskForRM();
amRMClient.addContainerRequest(containerAsk);
}
AM向RM申请资源后,可通过AMRMClientAsync对象来异步启动Container。在AM处理onContainersAllocated回调函数时,需要初始化设置ContainerLaunchContext 。设置过程繁琐,为了不阻塞AM,我们另启动一个线程处理设置参数和启动Container。
@Override
public void onContainersAllocated(List<Container> allocatedContainers) {
LOG.info("Got response from RM for container ask, allocatedCnt="
+ allocatedContainers.size());
numAllocatedContainers.addAndGet(allocatedContainers.size());
for (Container allocatedContainer : allocatedContainers) {
LaunchContainerRunnable runnableLaunchContainer =
new LaunchContainerRunnable(allocatedContainer, containerListener);
Thread launchThread = new Thread(runnableLaunchContainer);
// launch and start the container on a separate thread to keep
// the main thread unblocked
// as all containers may not be allocated at one go.
launchThreads.add(launchThread);
launchThread.start();
}
}
/** launchThread线程实现如下。AM通过NMClientAsync和NodeManager通信来启动Container。**/
// 设置在Container上执行的必需命令
Vector<CharSequence> vargs = new Vector<CharSequence>(5);
// 设置可执行命令
vargs.add(shellCommand);
// 设置shell脚本路径
if (!scriptPath.isEmpty()) {
vargs.add(Shell.WINDOWS ? ExecBatScripStringtPath
: ExecShellStringPath);
}
// 设置shell参数
vargs.add(shellArgs);
// 添加日志参数
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
// 最终拼接好的命令
StringBuilder command = new StringBuilder();
for (CharSequence str : vargs) {
command.append(str).append(" ");
}
List<String> commands = new ArrayList<String>();
commands.add(command.toString());
// 设置ContainerLaunchContext:本地资源、环境、命令、token
ContainerLaunchContext ctx = ContainerLaunchContext.newInstance(
localResources, shellEnv, commands, null, allTokens.duplicate(), null);
containerListener.addContainer(container.getId(), container);
nmClientAsync.startContainerAsync(container, ctx);
/**AM heartbeat中,可通过如下方式来报告进度**/
@Override
public float getProgress() {
// set progress to deliver to RM on next heartbeat
float progress = (float) numCompletedContainers.get()
/ numTotalContainers;
return progress;
}
/**在所有的任务完成后,AM需要和Yarn注销自己,停止所有的client对象**/
try {
amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
} catch (YarnException ex) {
LOG.error("Failed to unregister application", ex);
} catch (IOException e) {
LOG.error("Failed to unregister application", e);
}
amRMClient.stop();
二.Yarn Shell命令
1.常用命令
1.1 yarn jar
描述:运行一个jar文件,用户可以捆绑yarn代码在一个jar文件,然后使用这个命令执行。
用法:yarn jar <jar> [mainClass] args...
1.2 yarn application
描述:打印应用程序的报告和kill掉的应用程序。
用法:yarn application <options>
命令选项 描述
-applicationId ApplicationId 指定一个应用程序ID(application id)
-appOwner AppOwner 指定一个 程序拥有者(application owner)
-containerId ContainerId 指定一个容器ID(container id)
-nodeAddress NodeAddress 指定一个节点地址( node address)
1.3 yarn node
描述:打印节点报告
用法:yarn node <options>
命令选项 描述
-status NodeId 指定一个节点ID(node id)
-list 列出所有节点列表信息
1.4 yarn logs
描述:转储容器日志
用法:yarn logs <options>
命令选项 描述
-applicationId ApplicationId 指定一个应用程序id( application id)
-appOwner AppOwner 指定一个程序拥有者( application owner)
-containerId ContainerId 指定一个容器id(container id)
-nodeAddress NodeAddress 指定一个节点地址( node address)
1.5 yarn classpath
描述:打印需要得到Hadoop的jar和所需要的库的类路径
用法:yarn classpah
1.6 yarn version
描述:打印版本信息
用法:yarn version
2.管理命令
2.1 启动资源管理
用法:yarn resourcemanager
2.2 启动节点管理
用法:yarn nodemanager
2.3 启动代理服务
用法:yarn proxyserver