Relevant documents please create your own! ! !
package com.hadoop.hdfs;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IOUtils;
/**
- Merge small files to HDFS
-
*/
public class MergeSmallFilesToHDFS {
private static FileSystem fs = null;
private static FileSystem local = null;public static void main(String[] args) throws IOException,
URISyntaxException {
list();
}/**
- And data collection and uploaded to HDFS
- IOException throws
throws a URISyntaxException to
/
public static void List () throws IOException, a URISyntaxException to {
// read the configuration file system hadoop
the Configuration new new = the conf the Configuration ();
// file system access interfaces, Note: hdfs: // master: 9000 Review HDFS into their own address
uRI of the new new uri = uRI of the ( "HDFS: // Master: 9000");
// create an object fileSystem
FS = FileSystem.get (uri, conf);
// get the local file system
local = FileSystem.getLocal ( the conf);
// svn filtered files in the directory, Note: path E: // Hadoop / 73 / revise their path
FileStatus [] dirstatus = local.globStatus (new path ( "E: // Hadoop / 73 / . "), new new RegexExcludePathFilter (" ^ the svn $ "));
all file paths 73 // Get directory under
the path [] = dirs FileUtil.stat2Paths (dirstatus);
FSDataOutputStream OUT = null;
= Null in FSDataInputStream;
for (Path dir: dirs) {
// 2019-10-31
. String fileName = dir.getName () the replace ( "-", ""); // file name
// only accept the next date directory .txt file
FileStatus [] = localStatus local.globStatus (new new Path (dir + "/ "), new new RegexAcceptPathFilter ( "^. TXT $"));
all the files in the directory // get the date
Path [] listedPaths = FileUtil. stat2Paths (localStatus);
// output path, Note: hdfs: // master: 9000/ 20191031 / modified directory address as its own HDFS
path block = new path ( "hdfs : // master: 9000/20191031 /" + fileName + ".txt");
System.out.println ( "merged file name:" + fileName + "TXT.");
// open the output stream
OUT = fs.create (Block);
for (the Path P: listedPaths) {
in = local.open (p); // open input stream
IOUtils.copyBytes (in, out, 4096, false); // copy data
// Close the input stream
in.close ();
}
IF (! OUT = null) {
// close the output stream
the out.close ();
}
}
}
/**
- Regex filter file format
-
*/
public static class RegexExcludePathFilter implements PathFilter {
private final String regex;
public RegexExcludePathFilter(String regex) {
this.regex = regex;
}public boolean accept(Path path) {
boolean flag = path.toString().matches(regex);
return !flag;
}
}
/**
- Regex accepted file formats
-
*/
public static class RegexAcceptPathFilter implements PathFilter {
private final String regex;
public RegexAcceptPathFilter(String regex) {
this.regex = regex;
}@Override
public boolean accept(Path path) {
boolean flag = path.toString().matches(regex);
return flag;
}
}
}