HDFS的IO操作
1. 通过URL读取HDFS中的数据
public class URLCat {
private static final String HDFS_PATH = "hdfs://localhost:9000/user/zhang/test/README.txt";
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) {
InputStream inputStream = null;
try {
inputStream = new URL(HDFS_PATH).openStream();
IOUtils.copyBytes(inputStream, System.out, 4096, false);
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(inputStream);
}
}
}
class FileSystemCat {
private static final String HDFS_URI = "hdfs://localhost:9000";
private static final String PATH = "/user/zhang/test/README.txt";
public static void main(String[] args) throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
IOUtils.copyBytes(inputStream, System.out, 4096, false);
IOUtils.closeStream(inputStream);
}
}
public class FileSystemDoubleCat {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String PATH = "/user/zhang/test/README.txt";
public static void main(String[] args) throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
IOUtils.copyBytes(inputStream, System.out, 4096, false);
inputStream.seek(0);
IOUtils.copyBytes(inputStream, System.out, 4096, false);
IOUtils.closeStream(inputStream);
}
}
4. 将HDFS中的文件复制到另一个位置—-FSDataOutputStream 与 Progressable
public class FileCopyWirteProgress {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String SRC_PATH = "/user/zhang/test/README.txt";
public static final String DESC_PATH = "/user/zhang/test/abc.txt";
public static void main(String[] args) throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
FSDataInputStream inputStream = fileSystem.open(new Path(SRC_PATH));
# Progressable记录文件写入过程,做标记
FSDataOutputStream outputStream = fileSystem.create(new Path(DESC_PATH), new Progressable() {
public void progress() {
System.out.println("正在读取,请稍后...");
}
});
IOUtils.copyBytes(inputStream, outputStream, 4096, true);
}
}
5. ListStatus 列出文件路径
public class ListStatus {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static void main(String[] args) throws URISyntaxException, IOException {
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
Path path = fileSystem.getHomeDirectory();
System.out.println("home path: " + path.getName());
FileStatus[] fileStatuses = fileSystem.listStatus(path, new PathFilter() {
public boolean accept(Path path) {
System.out.println("raw path: " + path);
if (path.toString().contains("test")) {
System.out.println("-----------------------");
return true;
}
return false;
}
});
Path[] paths = FileUtil.stat2Paths(fileStatuses);
for (Path p : paths) {
System.out.println("path: " + p);
}
}
}
6. Compressor 压缩
public class EncodeCompressor {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String PATH = "/user/zhang/test/README.txt";
public static void main(String[] args) throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
CompressionCodec codec = new GzipCodec();
CompressionOutputStream outputStream = codec.createOutputStream(new FileOutputStream("README.txt.gz"));
IOUtils.copyBytes(inputStream, outputStream, 4096, true);
}
}
7. Decompressor 解压缩
public class DecodeDecompressor {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String PATH = "/user/zhang/test/README.txt.gz";
public static void main(String[] args) throws IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
Path path = new Path(PATH);
FSDataInputStream inputStream = fileSystem.open(path);
CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
CompressionCodec codec = factory.getCodec(path);
if (codec != null) {
CompressionInputStream compressionInputStream = codec.createInputStream(inputStream);
IOUtils.copyBytes(compressionInputStream, new FileOutputStream("readme.txt"), 4096, true);
}
}
}
8. CodecPool 压缩池
public class CodecPoolTest {
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String DE_PATH = "/user/zhang/test/README.txt.gz";
public static final String EN_PATH = "/user/zhang/test/README.txt";
public static void main(String[] args) throws IOException {
en();
de();
}
public static void en() throws IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
Path path = new Path(EN_PATH);
FSDataInputStream inputStream = fileSystem.open(path);
CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
CompressionCodec codec = new GzipCodec();
Compressor compressor = CodecPool.getCompressor(codec);
CompressionOutputStream outputStream = codec.createOutputStream(new FileOutputStream("en_readme.txt.gz"), compressor);
IOUtils.copyBytes(inputStream, outputStream, 4096, true);
}
public static void de() throws IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
Path path = new Path(DE_PATH);
FSDataInputStream inputStream = fileSystem.open(path);
CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
CompressionCodec codec = factory.getCodec(path);
Decompressor decompressor = CodecPool.getDecompressor(codec);
CompressionInputStream codecInputStream = codec.createInputStream(inputStream, decompressor);
IOUtils.copyBytes(codecInputStream, new FileOutputStream("de_readme.txt"), 4096, true);
}
}
9. SequeneceFile 写入和读取数据
public class SequenceFileDemo {
public static String[] strings = new String[]{"hello", "java", "python", "hadoop", "scala"};
public static final String HDFS_URI = "hdfs://localhost:9000";
public static final String PATH = "/user/zhang/test/sequenceFile.txt";
public static void main(String[] args) throws IOException {
writeSeqenceFile();
}
private static void readSeqenceFile() throws IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, new Path(PATH), configuration);
Class<?> keyClass = reader.getKeyClass();
Class<?> valueClass = reader.getValueClass();
Writable key = (Writable) ReflectionUtils.newInstance(keyClass, configuration);
Writable value = (Writable) ReflectionUtils.newInstance(valueClass, configuration);
long position = 0;
while (reader.next(key, value)) {
String s = reader.syncSeen() ? "*" : "";
System.out.println(position + " syncSeen: " + s + " key: " + key + " value: " + value);
position = reader.getPosition();
}
IOUtils.closeStream(reader);
}
private static void writeSeqenceFile() throws IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer writer = SequenceFile.createWriter(fileSystem, configuration, new Path(PATH), key.getClass(), value.getClass());
for (int i = 0; i < 100; i++) {
key.set(100 - i);
value.set(strings[i % strings.length]);
System.out.println(writer.getLength() + " key: " + key + " value: " + value);
writer.append(key, value);
}
IOUtils.closeStream(writer);
}
}