HDFS源码解析之在我们使用mkdir之后hdfs到底发生了什么(六)

1. 我们采用场景驱动的方式,首先我们编写一段代码

package org.apache.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

/**
 * Copyright (c) 2019 leyou ALL Rights Reserved
 * Project: hadoop-main
 * Package: org.apache.hadoop
 * Version: 1.0
 *
 * @author qingzhi.wu
 * @date 2020/7/6 20:05
 */
public class TestHDFS {
    public static void main(String[] args) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.newInstance(configuration);

        //场景驱动的方式(元数据的更新流程)
        fileSystem.mkdirs(new Path("/user/hive/warehouse/test/my"));
        /**
         * 我们可以提前猜测一下,根据我们之前看的源码来说,这个mkdirs应该是执行在哪里呢,
         * 肯定是在rpc的服务端,那么毫无疑问的就是我们的 NamenodeRpcServer
         * 那么为什么不直接
         * NameNode namenode = new NameNode(conf)
         * 
         * namenode.mkdirs(new path("xx"))
         * 
         * 这么说这个FileSystem里面肯定会获取代理
         * 
         */

    }
}

然后我们是不是就可以一直点,看看他到底怎么做的

2. HDFS创建目录到底发生了什么?

2.1 DistributedFileSystem的mkdirs方法

  /**
   * Create a directory and its parent directories.
   *
   * See {@link FsPermission#applyUMask(FsPermission)} for details of how
   * the permission is applied.
   *
   * @param f           The path to create
   * @param permission  The permission.  See FsPermission#applyUMask for 
   *                    details about how this is used to calculate the
   *                    effective permission.
   */
  @Override
  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
    return mkdirsInternal(f, permission, true);
  }

2.2 mkdirsInternal

private boolean mkdirsInternal(Path f, final FsPermission permission,
      final boolean createParent) throws IOException {
    statistics.incrementWriteOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<Boolean>() {
      @Override
      public Boolean doCall(final Path p)
          throws IOException, UnresolvedLinkException {
        //TODO 重要代码
        return dfs.mkdirs(getPathName(p), permission, createParent);
      }

      @Override
      public Boolean next(final FileSystem fs, final Path p)
          throws IOException {
        // FileSystem doesn't have a non-recursive mkdir() method
        // Best we can do is error out
        if (!createParent) {
          throw new IOException("FileSystem does not support non-recursive"
              + "mkdir");
        }
        return fs.mkdirs(p, permission);
      }
    }.resolve(this, absF);
  }

2.3 DFSClient的prinitiveMkdir

 /**
   * Same {{@link #mkdirs(String, FsPermission, boolean)} except
   * that the permissions has already been masked against umask.
   */
  public boolean primitiveMkdir(String src, FsPermission absPermission, 
    boolean createParent)
    throws IOException {
    checkOpen();
    if (absPermission == null) {
      absPermission = 
        FsPermission.getDefault().applyUMask(dfsClientConf.uMask);
    } 

    if(LOG.isDebugEnabled()) {
      LOG.debug(src + ": masked=" + absPermission);
    }
    TraceScope scope = Trace.startSpan("mkdir", traceSampler);
    try {
      //TODO 走的HadoopRPC ,调用服务端的方法
      return namenode.mkdirs(src, absPermission, createParent);
    } catch(RemoteException re) {
      throw re.unwrapRemoteException(AccessControlException.class,
                                     InvalidPathException.class,
                                     FileAlreadyExistsException.class,
                                     FileNotFoundException.class,
                                     ParentNotDirectoryException.class,
                                     SafeModeException.class,
                                     NSQuotaExceededException.class,
                                     DSQuotaExceededException.class,
                                     UnresolvedPathException.class,
                                     SnapshotAccessControlException.class);
    } finally {
      scope.close();
    }
  }

2.4 NameNodeRpcServer中的mkdirs方法

@Override // ClientProtocol
  public boolean mkdirs(String src, FsPermission masked, boolean createParent)
      throws IOException {
    checkNNStartup();
    if(stateChangeLog.isDebugEnabled()) {
      stateChangeLog.debug("*DIR* NameNode.mkdirs: " + src);
    }
    if (!checkPathLength(src)) {
      throw new IOException("mkdirs: Pathname too long.  Limit " 
                            + MAX_PATH_LENGTH + " characters, " + MAX_PATH_DEPTH + " levels.");
    }
    
    //TODO 通过FSNamesystem来创建目录
    return namesystem.mkdirs(src,
        new PermissionStatus(getRemoteUser().getShortUserName(),
            null, masked), createParent);
  }

2.5 FSNamesystem中的mkdirs方法

  /**
   * Create all the necessary directories
   */
  boolean mkdirs(String src, PermissionStatus permissions,
      boolean createParent) throws IOException {
    HdfsFileStatus auditStat = null;
    checkOperation(OperationCategory.WRITE);
    writeLock();
    try {
      checkOperation(OperationCategory.WRITE);
      //TODO 校验是否是安全模式
      checkNameNodeSafeMode("Cannot create directory " + src);
      //TODO hadoop采用了面向对象的思想,将每一步操作都进行了封装成了对象
      auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent);
    } catch (AccessControlException e) {
      logAuditEvent(false, "mkdirs", src);
      throw e;
    } finally {
      writeUnlock();
    }
    //TODO 重点 元数据日志持久化
    getEditLog().logSync();
    logAuditEvent(true, "mkdirs", src, null, auditStat);
    return true;
  }

2.6 FSDirMkdirOp.mkdirs

static HdfsFileStatus mkdirs(FSNamesystem fsn, String src,
      PermissionStatus permissions, boolean createParent) throws IOException {

    //TODO 获取目录树
    /**
     * FSDirectory 目录树
     *
     * hadoop fs -ls /
     *
     * hdfs dfs -ls /
     *
     */
    FSDirectory fsd = fsn.getFSDirectory();
    if(NameNode.stateChangeLog.isDebugEnabled()) {
      NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
    }
    if (!DFSUtil.isValidName(src)) {
      throw new InvalidPathException(src);
    }
    FSPermissionChecker pc = fsd.getPermissionChecker();
    byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
    fsd.writeLock();
    try {
      //TODO 解析要创建目录的路径 /user/hive/warehouse/data/mytable
      /**
       * hadoop fs -mkdir /user/hive/warehouse/data/mytable
       */
      src = fsd.resolvePath(pc, src, pathComponents);
      INodesInPath iip = fsd.getINodesInPath4Write(src);
      if (fsd.isPermissionEnabled()) {
        fsd.checkTraverse(pc, iip);
      }

      /**
       * 比如我们现在已经存在的目录是:/user/hive/warehouse
       * 我们需要创建的目录是:/user/hive/warehouse/data/mytable
       * 他首先找到最后一个INode,其实就是warehouse 这个INode
       */
      final INode lastINode = iip.getLastINode();
      if (lastINode != null && lastINode.isFile()) {
        throw new FileAlreadyExistsException("Path is not a directory: " + src);
      }

      INodesInPath existing = lastINode != null ? iip : iip.getExistingINodes();
      if (lastINode == null) {
        if (fsd.isPermissionEnabled()) {
          fsd.checkAncestorAccess(pc, iip, FsAction.WRITE);
        }

        if (!createParent) {
          fsd.verifyParentDir(iip, src);
        }

        // validate that we have enough inodes. This is, at best, a
        // heuristic because the mkdirs() operation might need to
        // create multiple inodes.
        fsn.checkFsObjectLimit();

        /**
         * 已存在: /user/hive/warehouse
         * 要创建: /user/hive/warehouse/data/mytable
         * 需要创建的目录 /data/mytable
         */
        List<String> nonExisting = iip.getPath(existing.length(),
            iip.length() - existing.length());
        int length = nonExisting.size();
        //TODO 需要创建多级目录走这里
        if (length > 1) {
          List<String> ancestors = nonExisting.subList(0, length - 1);
          // Ensure that the user can traversal the path by adding implicit
          // u+wx permission to all ancestor directories
          existing = createChildrenDirectories(fsd, existing, ancestors,
              addImplicitUwx(permissions, permissions));
          if (existing == null) {
            throw new IOException("Failed to create directory: " + src);
          }
        }
        //TODO 需要创建单目录走这里 /data
        if ((existing = createChildrenDirectories(fsd, existing,
            nonExisting.subList(length - 1, length), permissions)) == null) {
          throw new IOException("Failed to create directory: " + src);
        }
      }
      return fsd.getAuditFileInfo(existing);
    } finally {
      fsd.writeUnlock();
    }
  }

2.7 FSDirectory类的注释和重要变量

/**
 * Both FSDirectory and FSNamesystem manage the state of the namespace.
 * FSDirectory is a pure in-memory data structure, all of whose operations
 * happen entirely in memory. In contrast, FSNamesystem persists the operations
 * to the disk.
 *
 * FSDirectory 和FSNamesystem 都是管理元数据的状态的
 *
 * 1. FSDirectory是一个一直在内存里面的数据结构,其实就是内存目录树
 *
 * 2. FSNamesystem是把我们的元数据记录信息持久化到内存上面的(先写到内存,再写到磁盘 也就是双缓冲机制)
 *
 * @see org.apache.hadoop.hdfs.server.namenode.FSNamesystem
 **/
 //我们的根目录也就是 我们的 /
  INodeDirectory rootDir;
  /**构造方法*/
  private static INodeDirectory createRoot(FSNamesystem namesystem) {
    final INodeDirectory r = new INodeDirectory(
        INodeId.ROOT_INODE_ID,
        INodeDirectory.ROOT_NAME,//为空,因为根目录没有名称
        namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
        0L);
    r.addDirectoryWithQuotaFeature(
        new DirectoryWithQuotaFeature.Builder().
            nameSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA).
            storageSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA).
            build());
    r.addSnapshottableFeature();
    r.setSnapshotQuota(0);
    return r;
  }

2.8 createChildrenDirectories

/**
   * Create the directory {@code parent} / {@code children} and all ancestors
   * along the path.
   *
   * @param fsd FSDirectory
   * @param existing The INodesInPath instance containing all the existing
   *                 ancestral INodes
   * @param children The relative path from the parent towards children,
   *                 starting with "/"
   * @param perm the permission of the directory. Note that all ancestors
   *             created along the path has implicit {@code u+wx} permissions.
   *
   * @return {@link INodesInPath} which contains all inodes to the
   * target directory, After the execution parentPath points to the path of
   * the returned INodesInPath. The function return null if the operation has
   * failed.
   */
  private static INodesInPath createChildrenDirectories(FSDirectory fsd,
      INodesInPath existing, List<String> children, PermissionStatus perm)
      throws IOException {
    assert fsd.hasWriteLock();

    for (String component : children) {
      //TODO 一个目录一个目录的去创建
      //TODO 如果只创建单目录,暗恶魔这个循环只会运行一次
      existing = createSingleDirectory(fsd, existing, component, perm);
      if (existing == null) {
        return null;
      }
    }
    return existing;
  }

2.9 createSingleDirectory

private static INodesInPath createSingleDirectory(FSDirectory fsd,
      INodesInPath existing, String localName, PermissionStatus perm)
      throws IOException {
    assert fsd.hasWriteLock();

    //TODO 更新目录树,这颗目录树是存在内存中的
    //更新内存里面的数据
    existing = unprotectedMkdir(fsd, fsd.allocateNewInodeId(), existing,
        localName.getBytes(Charsets.UTF_8), perm, null, now());
    if (existing == null) {
      return null;
    }

    final INode newNode = existing.getLastINode();
    // Directory creation also count towards FilesCreated
    // to match count of FilesDeleted metric.
    NameNode.getNameNodeMetrics().incrFilesCreated();

    String cur = existing.getPath();

    //TODO 把元数据信息记录到磁盘上(但是一开始先写到内存)
    //往磁盘上面记录元数据日志
    fsd.getEditLog().logMkDir(cur, newNode);
    if (NameNode.stateChangeLog.isDebugEnabled()) {
      NameNode.stateChangeLog.debug("mkdirs: created directory " + cur);
    }
    return existing;
  }

2.10 unprotectedMkdir

  /**
   * create a directory at path specified by parent
   */
  private static INodesInPath unprotectedMkdir(FSDirectory fsd, long inodeId,
      INodesInPath parent, byte[] name, PermissionStatus permission,
      List<AclEntry> aclEntries, long timestamp)
      throws QuotaExceededException, AclException, FileAlreadyExistsException {
    assert fsd.hasWriteLock();
    assert parent.getLastINode() != null;
    if (!parent.getLastINode().isDirectory()) {
      throw new FileAlreadyExistsException("Parent path is not a directory: " +
          parent.getPath() + " " + DFSUtil.bytes2String(name));
    }
    /**
     * FSDirectory 文件目录树 / 是根目录
     * INodeDirectory代表目录
     * INodeFile代表文件
     */
    //TODO 封装成一个目录
    final INodeDirectory dir = new INodeDirectory(inodeId, name, permission,
        timestamp);

    //TODO 往文件目录树 该添加目录的地方添加节点
    INodesInPath iip = fsd.addLastINode(parent, dir, true);
    if (iip != null && aclEntries != null) {
      AclStorage.updateINodeAcl(dir, aclEntries, Snapshot.CURRENT_STATE_ID);
    }
    return iip;
  }
}

2.11 addLastINode

/**
   * Add a child to the end of the path specified by INodesInPath.
   * @return an INodesInPath instance containing the new INode
   */
  @VisibleForTesting
  public INodesInPath addLastINode(INodesInPath existing, INode inode,
      boolean checkQuota) throws QuotaExceededException {
    assert existing.getLastINode() != null &&
        existing.getLastINode().isDirectory();

    final int pos = existing.length();
    // Disallow creation of /.reserved. This may be created when loading
    // editlog/fsimage during upgrade since /.reserved was a valid name in older
    // release. This may also be called when a user tries to create a file
    // or directory /.reserved.
    if (pos == 1 && existing.getINode(0) == rootDir && isReservedName(inode)) {
      throw new HadoopIllegalArgumentException(
          "File name \"" + inode.getLocalName() + "\" is reserved and cannot "
              + "be created. If this is during upgrade change the name of the "
              + "existing file or directory to another name before upgrading "
              + "to the new release.");
    }

    //TODO 获取父目录 warehouse
    final INodeDirectory parent = existing.getINode(pos - 1).asDirectory();
    // The filesystem limits are not really quotas, so this check may appear
    // odd. It's because a rename operation deletes the src, tries to add
    // to the dest, if that fails, re-adds the src from whence it came.
    // The rename code disables the quota when it's restoring to the
    // original location because a quota violation would cause the the item
    // to go "poof".  The fs limits must be bypassed for the same reason.
    if (checkQuota) {
      final String parentPath = existing.getPath(pos - 1);
      verifyMaxComponentLength(inode.getLocalNameBytes(), parentPath);
      verifyMaxDirItems(parent, parentPath);
    }
    // always verify inode name
    verifyINodeName(inode.getLocalNameBytes());

    final QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite());
    updateCount(existing, pos, counts, checkQuota);

    boolean isRename = (inode.getParent() != null);
    boolean added;
    try {
      //TODO 在父目录下添加
      added = parent.addChild(inode, true, existing.getLatestSnapshotId());
    } catch (QuotaExceededException e) {
      updateCountNoQuotaCheck(existing, pos, counts.negation());
      throw e;
    }
    if (!added) {
      updateCountNoQuotaCheck(existing, pos, counts.negation());
      return null;
    } else {
      if (!isRename) {
        AclStorage.copyINodeDefaultAcl(inode);
      }
      addToInodeMap(inode);
    }
    return INodesInPath.append(existing, inode, inode.getLocalNameBytes());
  }

2.12 parent.addChild

  /**
   * Add a child inode to the directory.
   * 
   * @param node INode to insert
   * @param setModTime set modification time for the parent node
   *                   not needed when replaying the addition and 
   *                   the parent already has the proper mod time
   * @return false if the child with this name already exists; 
   *         otherwise, return true;
   */
  public boolean addChild(INode node, final boolean setModTime,
      final int latestSnapshotId) throws QuotaExceededException {
    final int low = searchChildren(node.getLocalNameBytes());
    if (low >= 0) {
      return false;
    }

    if (isInLatestSnapshot(latestSnapshotId)) {
      // create snapshot feature if necessary
      DirectoryWithSnapshotFeature sf = this.getDirectoryWithSnapshotFeature();
      if (sf == null) {
        sf = this.addSnapshotFeature(null);
      }
      return sf.addChild(this, node, setModTime, latestSnapshotId);
    }
    //TODO 添加子节点
    addChild(node, low);
    if (setModTime) {
      // update modification time of the parent directory
      updateModificationTime(node.getModificationTime(), latestSnapshotId);
    }
    return true;
  }

2.13 addChild

  /**
   * Add the node to the children list at the given insertion point.
   * The basic add method which actually calls children.add(..).
   */
  private void addChild(final INode node, final int insertionPoint) {
    if (children == null) {
      children = new ArrayList<INode>(DEFAULT_FILES_PER_DIRECTORY);
    }
    node.setParent(this);

    //TODO 子节点列表里面再添加一个INode
    children.add(-insertionPoint - 1, node);

    if (node.getGroupName() == null) {
      node.setGroup(getGroupName());
    }
  }

2.14 INode的重要子类INodeDriectory的属性

  /**
   * 这是一个重要的属性
   * 往这个里面存储子节点
   * 可以是目录,也可以是文件
   * 模仿的linux
   * INodeDirectory 存储目录
   * INodeFile 存储文件
   */
  private List<INode> children = null;
  /**
   * Add the node to the children list at the given insertion point.
   * The basic add method which actually calls children.add(..).
   */
  private void addChild(final INode node, final int insertionPoint) {
    if (children == null) {
      children = new ArrayList<INode>(DEFAULT_FILES_PER_DIRECTORY);
    }
    node.setParent(this);

    //TODO 子节点列表里面再添加一个INode
    children.add(-insertionPoint - 1, node);

    if (node.getGroupName() == null) {
      node.setGroup(getGroupName());
    }
  }

上面可以看到,我们的目录树其实就是一个INode的ArrayList集合

3. 总结

  • 当我们用java客户端在hdfs创建目录的时候,首先会调用FileSystem中的mkdirs方法
  • 其实使用的是其子类DistributedFileSystem的方法
  • 然后通过DFSClient获取到NameNodeRpcServer的代理,调用了我们创建目录的方法
  • 通过FSNamesystem元数据管理类,先校验我们的NameNode是不是处于安全模式,我们上一次已经知道什么时候处于安全模式了(100M,0.999f,datanode的个数)
  • hdfs将我们的操作都封装成了一些对象,例如FSDirMkdirOp等
  • 获取目录树
  • 截取已经存在的父目录
  • 将我们要创建的每级目录封装为一个INodeDirectory(文件是INodeFile)
  • 然后将我们创建好的目录依次挂接到我们的父目录上
  • 刷写元数据(后面再讲)

image-20200707223200572

猜你喜欢

转载自blog.csdn.net/weixin_43704599/article/details/107192462