HDFS startup script
start-dfs.sh
# 我的hadoop 安装位置 /opt/hadoop-2.7.7 #Hadoop sbin的位置 eg. /opt/hadoop-2.7.7/sbin $HADOOP_HOME/sbin bin=`dirname "${BASH_SOURCE-$0}"` # 进入sbin 目录下 bin=`cd "$bin"; pwd` # 一些执行配置文件的目录 eg:/opt/hadoop-2.7.7/sbin/../libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec # 目录:/opt/hadoop-2.7.7/sbin/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 执行目录:/opt/hadoop-2.7.7/sbin/../libexec 下的脚本 hdfs-config.sh . $HADOOP_LIBEXEC_DIR/hdfs-config.sh
hdfs-config.sh
# 目录:/opt/hadoop-2.7.7/sbin/start-dfs.sh bin=`which "$0"` # 目录:/opt/hadoop-2.7.7/sbin bin=`dirname "${bin}"` # 进入目录:/opt/hadoop-2.7.7/sbin bin=`cd "$bin"; pwd` # 目录:/opt/hadoop-2.7.7/sbin/../libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec # 目录:/opt/hadoop-2.7.7/sbin/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 监测文件(包括目录)是否存在 if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then # 执行hadoop-config.sh(其它分支一直找到hadoop-config.sh) . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then . "$HADOOP_HOME"/libexec/hadoop-config.sh else # 都没找到hadoop-config.sh 启动失败 echo "Hadoop common not found." exit fi
hadoop-config.sh
# 脚本的位置 /opt/hadoop-2.7.7/sbin/../libexec/hadoop-config.sh this="${BASH_SOURCE-$0}" # /opt/hadoop-2.7.7/libexec common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) # 脚本名字 hadoop-config.sh script="$(basename -- "$this")" # 脚本的绝对路径: /opt/hadoop-2.7.7/libexec/hadoop-config.sh this="$common_bin/$script" # 文件 hadoop-layout.sh 是普通文件 执行(我目录下没有,后续补充) [ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh" # 1. hadoop 一些库的路径 不粘贴了 # 2. 确定hadoop 安装的根目录 HADOOP_PREFIX # 3. 检查参数 # 4. 设置日志级别,默认INFO # 5. 设置备用的conf目录 # 6. 可以通过参数指明host # 7. 可以通过参数指明hostname # 8. 确定是不是cygwin环境 # 9. 校验确认不是ipv6 only # 10.尝试设置JAVA_HOME ,如果没设置的话 # 11.设置一堆classpath 和 conf 目录 # 总之就是这个脚本就是确认运行环境
# 回到 start-dfs.sh 脚本上来 # get arguments 获取参数, start-dfs.sh 的另外两个操作 -upgrade 和 -rollback if [[ $# -ge 1 ]]; then startOpt="$1" shift case "$startOpt" in -upgrade) nameStartOpt="$startOpt" ;; -rollback) dataStartOpt="$startOpt" ;; *) echo $usage exit 1 ;; esac fi #Add other possible options 默认为启动 ,所以nameStartOpt 为空(没有参数) nameStartOpt="$nameStartOpt $@" # 启动namenode #--------------------------------------------------------- # namenodes NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) echo "Starting namenodes on [$NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ --config "$HADOOP_CONF_DIR" \ --hostnames "$NAMENODES" \ --script "$bin/hdfs" start namenode $nameStartOpt # 启动datanode #--------------------------------------------------------- # datanodes (using default slaves file) if [ -n "$HADOOP_SECURE_DN_USER" ]; then echo \ "Attempting to start secure cluster, skipping datanodes. " \ "Run start-secure-dns.sh as root to complete startup." else "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ --config "$HADOOP_CONF_DIR" \ --script "$bin/hdfs" start datanode $dataStartOpt fi # 启动secondary namenodes #--------------------------------------------------------- # secondary namenodes (if any) SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) if [ -n "$SECONDARY_NAMENODES" ]; then echo "Starting secondary namenodes [$SECONDARY_NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ --config "$HADOOP_CONF_DIR" \ --hostnames "$SECONDARY_NAMENODES" \ --script "$bin/hdfs" start secondarynamenode fi # 启动quorumjournal (高可用集群的时候用于同步namenode 的信息) #--------------------------------------------------------- # quorumjournal nodes (if any) SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) case "$SHARED_EDITS_DIR" in qjournal://*) JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g') echo "Starting journal nodes [$JOURNAL_NODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ --config "$HADOOP_CONF_DIR" \ --hostnames "$JOURNAL_NODES" \ --script "$bin/hdfs" start journalnode ;; esac # 启动zkfc -- 高可用集群故障转移使用 #--------------------------------------------------------- # ZK Failover controllers, if auto-HA is enabled AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ --config "$HADOOP_CONF_DIR" \ --hostnames "$NAMENODES" \ --script "$bin/hdfs" start zkfc fi
You can see all hadoop-daemons.sh script at the time of the transfer of the last to start, and then to three parameters, config (configuration file path), hostname (start in which server), script (script which processes the start) , let's look at these two scripts hadoop-daemons.sh and hdfs
hadoop-daemons.sh
usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..." # if no args specified, show usage if [ $# -le 1 ]; then echo $usage exit 1 fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 刷新一下hadoop-config.sh 这个文件 . $HADOOP_LIBEXEC_DIR/hadoop-config.sh # 执行slave.sh 这个脚本和 hadoop-daemon.sh(daemon是单数) exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
slaves.sh
# 摘抄了关键部分的代码,遍历 子节点 然后ssh 过去执行hadoop-daemon.sh # Where to start the script, see hadoop-config.sh # (it set up the variables based on command line options) if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then SLAVE_NAMES=$HADOOP_SLAVE_NAMES else SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves} SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d') fi # start the daemons for slave in $SLAVE_NAMES ; do ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
hadoop-daemon.sh
# 第一段,获取操作的节点类型,比如启动namenode 最后command 就是namenode hadoopScript="$HADOOP_PREFIX"/bin/hadoop if [ "--script" = "$1" ] then shift hadoopScript=$1 shift fi startStop=$1 shift command=$1 shift # 中间一段设置日志路径,进程pid文件路径等 # 第三段 case $startStop in # 启动进程 (start) # 确认 pid文件路径可写 [ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR" if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then echo $command running as process `cat $pid`. Stop it first. exit 1 fi fi if [ "$HADOOP_MASTER" != "" ]; then echo rsync from $HADOOP_MASTER rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX" fi hadoop_rotate_log $log echo starting $command, logging to $log cd "$HADOOP_PREFIX" case $command in namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc) if [ -z "$HADOOP_HDFS_HOME" ]; then hdfsScript="$HADOOP_PREFIX"/bin/hdfs else hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs fi #### 调整执行的优先级 并 执行 hdfs 命令 启动namenode 的时候,$command = namenode ,$@ 为空 #### 相当于执行 hdfs namenode 命令 nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & ;; (*) nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & ;; esac echo $! > $pid sleep 1 head "$log" # capture the ulimit output if [ "true" = "$starting_secure_dn" ]; then echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log # capture the ulimit info for the appropriate user su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1 elif [ "true" = "$starting_privileged_nfs" ]; then echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1 else echo "ulimit -a for user $USER" >> $log ulimit -a >> $log 2>&1 fi sleep 3; if ! ps -p $! > /dev/null ; then exit 1 fi ;; # 停止进程 if [ -f $pid ]; then TARGET_PID=`cat $pid` if kill -0 $TARGET_PID > /dev/null 2>&1; then echo stopping $command kill $TARGET_PID sleep $HADOOP_STOP_TIMEOUT if kill -0 $TARGET_PID > /dev/null 2>&1; then echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9" kill -9 $TARGET_PID fi else echo no $command to stop fi rm -f $pid else echo no $command to stop fi ;; (*) echo $usage exit 1 ;; esac
hdfs
#摘抄部分 ,确定Java主类 if [ "$COMMAND" = "namenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS" elif [ "$COMMAND" = "zkfc" ] ; then CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS" elif [ "$COMMAND" = "secondarynamenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS" elif [ "$COMMAND" = "datanode" ] ; then CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode' if [ "$starting_secure_dn" = "true" ]; then HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS" else HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS" fi elif [ "$COMMAND" = "journalnode" ] ; then CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS" # run it 启动Java 类的入口 exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" #### 部分 #启动namenode 运行的类 #org.apache.hadoop.hdfs.server.namenode.NameNode #启动datanode 运行的类 #org.apache.hadoop.hdfs.server.datanode.DataNode #启动secondarynamenode 运行的类 #org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode # 查看version #org.apache.hadoop.util.VersionInfo
to sum up
hdfs started was started by start-dfs.sh, call center scripting hdfs-config.sh some environmental settings, hadoop-config, sh, then call hadoop-daemons.sh start, hadoop-daemons.sh script by calling slaves.sh , ssh to each node, and then call hadoop-daemon.sh script starts, hadoop-daemon.sh go to identify some process-related, then call hdfs script to confirm the specific operation, confirmed that the main Java classes, and then perform, followed by NameNode depends on what the class did some