HDFS的IO操作(Java代码)

HDFS的IO操作

1. 通过URL读取HDFS中的数据

public class URLCat {
    private static final String HDFS_PATH = "hdfs://localhost:9000/user/zhang/test/README.txt";

    static {
        URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
    }

    public static void main(String[] args) {
        InputStream inputStream = null;
        try {
            inputStream = new URL(HDFS_PATH).openStream();
            IOUtils.copyBytes(inputStream, System.out, 4096, false);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(inputStream);
        }
    }
}

2. 通过FileSystem的方式获取文件—-FSDataInputStream

class FileSystemCat {
    private static final String HDFS_URI = "hdfs://localhost:9000";
    private static final String PATH = "/user/zhang/test/README.txt";

    public static void main(String[] args) throws IOException {
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
        FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
        IOUtils.copyBytes(inputStream, System.out, 4096, false);
        IOUtils.closeStream(inputStream);
    }
}

3. FSDataInputStream的seek(position)使用

public class FileSystemDoubleCat {
    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String PATH = "/user/zhang/test/README.txt";

    public static void main(String[] args) throws IOException {
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
        FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
        IOUtils.copyBytes(inputStream, System.out, 4096, false);
        // 回到文件开始位置
        inputStream.seek(0);
        IOUtils.copyBytes(inputStream, System.out, 4096, false);
        IOUtils.closeStream(inputStream);
    }
}

4. 将HDFS中的文件复制到另一个位置—-FSDataOutputStream 与 Progressable

public class FileCopyWirteProgress {
    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String SRC_PATH = "/user/zhang/test/README.txt";
    public static final String DESC_PATH = "/user/zhang/test/abc.txt";

    public static void main(String[] args) throws IOException {
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
        FSDataInputStream inputStream = fileSystem.open(new Path(SRC_PATH));
        # Progressable记录文件写入过程,做标记
        FSDataOutputStream outputStream = fileSystem.create(new Path(DESC_PATH), new Progressable() {
            public void progress() {
                System.out.println("正在读取,请稍后...");
            }
        });
        IOUtils.copyBytes(inputStream, outputStream, 4096, true);
    }
}

5. ListStatus 列出文件路径

public class ListStatus {
    public static final String HDFS_URI = "hdfs://localhost:9000";

    public static void main(String[] args) throws URISyntaxException, IOException {
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
        Path path = fileSystem.getHomeDirectory();
        System.out.println("home path: " + path.getName());
        FileStatus[] fileStatuses = fileSystem.listStatus(path, new PathFilter() {
            public boolean accept(Path path) {
                System.out.println("raw path: " + path);
                if (path.toString().contains("test")) {
                    System.out.println("-----------------------");
                    return true;
                }
                return false;
            }
        });
        Path[] paths = FileUtil.stat2Paths(fileStatuses);
        for (Path p : paths) {
            System.out.println("path: " + p);
        }
    }
}

6. Compressor 压缩

public class EncodeCompressor {
    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String PATH = "/user/zhang/test/README.txt";

    public static void main(String[] args) throws IOException {
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), new Configuration());
        FSDataInputStream inputStream = fileSystem.open(new Path(PATH));
        CompressionCodec codec = new GzipCodec();
        CompressionOutputStream outputStream = codec.createOutputStream(new FileOutputStream("README.txt.gz"));
        IOUtils.copyBytes(inputStream, outputStream, 4096, true);
    }
}

7. Decompressor 解压缩

public class DecodeDecompressor {

    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String PATH = "/user/zhang/test/README.txt.gz";

    public static void main(String[] args) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
        Path path = new Path(PATH);
        FSDataInputStream inputStream = fileSystem.open(path);
        CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
        CompressionCodec codec = factory.getCodec(path);
        if (codec != null) {
            CompressionInputStream compressionInputStream = codec.createInputStream(inputStream);
            IOUtils.copyBytes(compressionInputStream, new FileOutputStream("readme.txt"), 4096, true);
        }

    }
}

8. CodecPool 压缩池

public class CodecPoolTest {
    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String DE_PATH = "/user/zhang/test/README.txt.gz";
    public static final String EN_PATH = "/user/zhang/test/README.txt";

    public static void main(String[] args) throws IOException {
        en();
        de();
    }

    public static void en() throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
        Path path = new Path(EN_PATH);
        FSDataInputStream inputStream = fileSystem.open(path);
        CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
        CompressionCodec codec = new GzipCodec();
        Compressor compressor = CodecPool.getCompressor(codec);
        CompressionOutputStream outputStream = codec.createOutputStream(new FileOutputStream("en_readme.txt.gz"), compressor);
        IOUtils.copyBytes(inputStream, outputStream, 4096, true);
    }

    public static void de() throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
        Path path = new Path(DE_PATH);
        FSDataInputStream inputStream = fileSystem.open(path);
        CompressionCodecFactory factory = new CompressionCodecFactory(configuration);
        CompressionCodec codec = factory.getCodec(path);
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        CompressionInputStream codecInputStream = codec.createInputStream(inputStream, decompressor);
        IOUtils.copyBytes(codecInputStream, new FileOutputStream("de_readme.txt"), 4096, true);
    }
}

9. SequeneceFile 写入和读取数据

public class SequenceFileDemo {
    public static String[] strings = new String[]{"hello", "java", "python", "hadoop", "scala"};
    public static final String HDFS_URI = "hdfs://localhost:9000";
    public static final String PATH = "/user/zhang/test/sequenceFile.txt";

    public static void main(String[] args) throws IOException {
        writeSeqenceFile();
//        readSeqenceFile();
    }

    private static void readSeqenceFile() throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
        SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, new Path(PATH), configuration);
        Class<?> keyClass = reader.getKeyClass();
        Class<?> valueClass = reader.getValueClass();
        Writable key = (Writable) ReflectionUtils.newInstance(keyClass, configuration);
        Writable value = (Writable) ReflectionUtils.newInstance(valueClass, configuration);
        long position = 0;
        while (reader.next(key, value)) {
            String s = reader.syncSeen() ? "*" : "";
            System.out.println(position + " syncSeen: " + s + "  key: " + key + "   value: " + value);
            position = reader.getPosition();
        }
        IOUtils.closeStream(reader);
    }

    private static void writeSeqenceFile() throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(HDFS_URI), configuration);
        IntWritable key = new IntWritable();
        Text value = new Text();
        SequenceFile.Writer writer = SequenceFile.createWriter(fileSystem, configuration, new Path(PATH), key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(strings[i % strings.length]);
            System.out.println(writer.getLength() + "     key: " + key + "    value: " + value);
            writer.append(key, value);
        }
        IOUtils.closeStream(writer);
    }
}

猜你喜欢

转载自blog.csdn.net/qq_33689414/article/details/79651378