JAVA API operation of small files merged into HDFS (notes)

Relevant documents please create your own! ! !

package com.hadoop.hdfs;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IOUtils;
/**

  • Merge small files to HDFS
  • */
    public class MergeSmallFilesToHDFS {
    private static FileSystem fs = null;
    private static FileSystem local = null;

    public static void main(String[] args) throws IOException,
    URISyntaxException {
    list();
    }

    /**

    • And data collection and uploaded to HDFS
    • IOException throws
      throws a URISyntaxException to
      /
      public static void List () throws IOException, a URISyntaxException to {
      // read the configuration file system hadoop
      the Configuration new new = the conf the Configuration ();
      // file system access interfaces, Note: hdfs: // master: 9000 Review HDFS into their own address
      uRI of the new new uri = uRI of the ( "HDFS: // Master: 9000");
      // create an object fileSystem
      FS = FileSystem.get (uri, conf);
      // get the local file system
      local = FileSystem.getLocal ( the conf);
      // svn filtered files in the directory, Note: path E: // Hadoop / 73 / revise their path
      FileStatus [] dirstatus = local.globStatus (new path ( "E: // Hadoop / 73 / . "), new new RegexExcludePathFilter (" ^ the svn $ "));
      all file paths 73 // Get directory under
      the path [] = dirs FileUtil.stat2Paths (dirstatus);
      FSDataOutputStream OUT = null;
      = Null in FSDataInputStream;
      for (Path dir: dirs) {
      // 2019-10-31
      . String fileName = dir.getName () the replace ( "-", ""); // file name
      // only accept the next date directory .txt file
      FileStatus [] = localStatus local.globStatus (new new Path (dir + "/ "), new new RegexAcceptPathFilter ( "^. TXT $"));
      all the files in the directory // get the date
      Path [] listedPaths = FileUtil. stat2Paths (localStatus);
      // output path, Note: hdfs: // master: 9000/ 20191031 / modified directory address as its own HDFS
      path block = new path ( "hdfs : // master: 9000/20191031 /" + fileName + ".txt");
      System.out.println ( "merged file name:" + fileName + "TXT.");
      // open the output stream
      OUT = fs.create (Block);
      for (the Path P: listedPaths) {
      in = local.open (p); // open input stream
      IOUtils.copyBytes (in, out, 4096, false); // copy data
      // Close the input stream
      in.close ();
      }
      IF (! OUT = null) {
      // close the output stream
      the out.close ();
      }
      }

    }

    /**

    • Regex filter file format
    • */
      public static class RegexExcludePathFilter implements PathFilter {
      private final String regex;
      public RegexExcludePathFilter(String regex) {
      this.regex = regex;
      }

      public boolean accept(Path path) {
      boolean flag = path.toString().matches(regex);
      return !flag;
      }

    }

    /**

    • Regex accepted file formats
    • */
      public static class RegexAcceptPathFilter implements PathFilter {
      private final String regex;
      public RegexAcceptPathFilter(String regex) {
      this.regex = regex;
      }

      @Override
      public boolean accept(Path path) {
      boolean flag = path.toString().matches(regex);
      return flag;
      }

    }
    }

Guess you like

Origin blog.51cto.com/14572091/2446947