本博文是我学习《Hadoop权威指南》第4章的笔记，主要是里面范例程序的实现，部分实现有修改

1、压缩

1.1 标准输入压缩后输出

新建类StreamCompressor

package com.tuan.hadoopLearn.io;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;

public class StreamCompressor {
    public static void main(String[] args) {
        String codeClassName = args[0];
        Class<?> codeClass = null;
        try {
            codeClass = Class.forName(codeClassName);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codeClass, new Configuration());
        CompressionOutputStream out = null;
        try {
            out = codec.createOutputStream(System.out);
            IOUtils.copyBytes(System.in, out, 4096, true);
            out.finish();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

cmd命令，把标准输出保存成一个gz压缩文件，执行后目录下多出一个test.gz，解压后看到一个文本文件

echo Test | hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.io.StreamCompressor org.apache.hadoop.io.compress.GzipCodec > test.gz

1.2 压缩文件解压

新建类FileDecompressor

package com.tuan.hadoopLearn.io;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;

public class FileDecompressor {
    public static void main(String[] args) {
        String uri = args[0];
        Configuration conf = new Configuration();
        FileSystem fs = null;
        try {
            fs = FileSystem.get(URI.create(uri), conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Path path = new Path(uri);
        CompressionCodecFactory factory = new CompressionCodecFactory(conf);
        CompressionCodec codec = factory.getCodec(path);
        if (codec == null) {
            System.out.println("No suitable codec");
            System.exit(1);
        }
        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
        InputStream in = null;
        OutputStream out = null;
        try {
            in = codec.createInputStream(fs.open(path));
            out = fs.create(new Path(outputUri));
            IOUtils.copyBytes(in, out, conf);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(in);
            IOUtils.closeStream(out);
        }

    }
}

然后往hadoop集群上传一个gz压缩文件，执行cmd命令

hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.io.FileDecompressor hdfs:/test/test.gz

然后就华丽丽地出了错，我是通过xshell把电脑桌面的一个gz文件传到我的服务器，然后再上传到hadoop集群，出现这个错误后，我试图解压服务器上的gz文件，结果提示说“not in gzip format”。查了一下资料，因为是rz命令默认通过ASCII传输，某些压缩文件传输会出问题，改为二进制传输就可以解决问题，命令为rz -be（https://www.jianshu.com/p/489dfea6d652）

照着试了一下，结果文件提示传输失败，根本传不上去了，懵逼了很久才发现，哦对了原来的test.gz还没删掉呢，删掉之前的test.gz，用rz -be重新上传，再传到hadoop集群，最后执行命令，解压成功

1.3 使用压缩池

新建类PooledStreamCompressor

package com.tuan.hadoopLearn.io;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;

public class PooledStreamCompressor {
    public static void main(String[] args) {
        String codecClassName = args[0];
        Class<?> codecClass = null;
        try {
            codecClass = Class.forName(codecClassName);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        Configuration conf = new Configuration();
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        Compressor compressor = CodecPool.getCompressor(codec, conf);
        try {
            CompressionOutputStream out = codec.createOutputStream(System.out, compressor);
            IOUtils.copyBytes(System.in, out, 4096, true);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

cmd命令

echo hello | hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.io.PooledStreamCompressor  org.apache.hadoop.io.compress.GzipCodec > hello.gz

1.4 MapReduce中使用压缩

用刚才写的StreamCompressor把input.txt压缩一下，传到集群上

type input.txt | hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.io.StreamCompressor org.apache.hadoop.io.compress.GzipCodec > input.gz

新建类MaxTemperatureWithCompression

package com.tuan.hadoopLearn.mapreduce;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MaxTemperatureWithCompression {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        if (args.length != 2) {
            System.err.println("Usage: MaxTemperature <input path> <output path");
            System.exit(1);
        }

        Job job = new Job();
        job.setJarByClass(MaxTemperatureWithCompression.class);
        job.setJobName("Max Temperature");

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

        job.setMapperClass(MaxTemperatureMapper.class);
        job.setReducerClass(MaxTemperatureReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

cmd命令

hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.mapreduce.MaxTemperatureWithCompression /mapreduce/input.gz /mapreduce/output.gz

最后的输出文件夹里面结构是这样

2 流

2.1 Writable

随便写了一个测试类

package com.tuan.hadoopLearn.io;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
import org.junit.jupiter.api.Test;

import java.io.*;

import static org.junit.jupiter.api.Assertions.assertEquals;


class WritableTest{
    private static IntWritable writable = new IntWritable(163);

    private static byte[] serialize (Writable writable) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(out);
        writable.write(dataOut);
        dataOut.close();
        return out.toByteArray();
    }

    private static Writable deserialize(byte[] bytes) throws IOException {
        ByteArrayInputStream in = new ByteArrayInputStream(bytes);
        DataInputStream dataIn = new DataInputStream(in);
        writable.readFields(dataIn);
        dataIn.close();
        return writable;
    }
    
    @Test
    void serializeTest() throws IOException {
        byte[] bytes = serialize(writable);
        assertEquals(bytes.length, 4);
        assertEquals(StringUtils.byteToHexString(bytes), "000000a3");

        IntWritable deserializeWritable = (IntWritable) deserialize(bytes);
        assertEquals(deserializeWritable.get(), 163);
    }
}

2.2 Text迭代

新建类TextIterator

package com.tuan.hadoopLearn.io;

import org.apache.hadoop.io.ByteBufferPool;
import org.apache.hadoop.io.Text;

import java.nio.ByteBuffer;

public class TextIterator {
    public static void main(String[] args) {
        Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");
        ByteBuffer buf = ByteBuffer.wrap(t.getBytes(), 0, t.getLength());
        int cp;
        while(buf.hasRemaining() && (cp = Text.bytesToCodePoint(buf)) != -1) {
            System.out.println(Integer.toHexString(cp));
        }
    }
}

运行后输出

2.3 实现定制的Writable

没啥好说的，写了个主函数验证一下定制的Comparator

package com.tuan.hadoopLearn.io;

        import org.apache.hadoop.io.Text;
        import org.apache.hadoop.io.WritableComparable;
        import org.apache.hadoop.io.WritableComparator;
        import org.apache.hadoop.io.WritableUtils;

        import java.io.DataInput;
        import java.io.DataOutput;
        import java.io.IOException;

public class TextPair implements WritableComparable<TextPair> {
    private Text first;
    private Text second;

    public TextPair() {
        set(new Text(), new Text());
    }

    public TextPair(String first, String second) {
        set(new Text(first), new Text(second));
    }

    public TextPair(Text first, Text second) {
        set(first, second);
    }

    public void set(Text first, Text second) {
        this.first = first;
        this.second = second;
    }

    public Text getFirst() {
        return first;
    }

    public Text getSecond() {
        return second;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        first.write(out);
        second.write(out);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        first.readFields(in);
        second.readFields(in);
    }

    @Override
    public int compareTo(TextPair tp) {
        int cmp = first.compareTo(tp.first);
        if (cmp != 0) {
            return cmp;
        }
        return second.compareTo(tp.second);
    }

    @Override
    public String toString() {
        return first + "\t" + second;
    }

    @Override
    public boolean equals(Object o) {
        if (o instanceof TextPair) {
            TextPair tp = (TextPair) o;
            return first.equals(tp.first) && second.equals(tp.second);
        }
        return false;
    }

    @Override
    public int hashCode() {
        return first.hashCode() * 163 + second.hashCode();
    }

    public static class Comparator extends WritableComparator {
        private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();

        static {
            WritableComparator.define(TextPair.class, new Comparator());
        }

        Comparator() {
            super(TextPair.class);
        }

        @Override
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
            int firstL1 = WritableUtils.decodeVIntSize(b1[s1]) + readInt(b1, s1);
            int firstL2 = WritableUtils.decodeVIntSize(b2[s2]) + readInt(b2, s2);
            int cmp = TEXT_COMPARATOR.compare(b1, s1, firstL1, b2, s2, firstL2);
            if (cmp != 0) {
                return cmp;
            }
            return TEXT_COMPARATOR.compare(b1, s1 + firstL1, l1 - firstL1, b2, s2 + firstL2, l2 - firstL2);
        }
    }

    public static void main(String[] args) {
        TextPair tp1 = new TextPair("Nothing", "True");
        TextPair tp2 = new TextPair("Everything", "Permitted");
        WritableComparator comparator = WritableComparator.get(TextPair.class);
        System.out.println(comparator.compare(tp1, tp2));
    }
}

3 Avro

3.1 Avro数据读写

Resources目录下（用的Idea）新建一个叫StringPair.avsc的文件，新建一个StringPair对象存入缓冲区再读出打印在控制台

{
    "type": "record",
    "name": "StringPair",
    "doc": "A pair of strings.",
    "fields": [
        {"name": "left", "type": "string"},
        {"name": "right", "type": "string"}
    ]
}

package com.tuan.hadoopLearn.io;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.*;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.io.IOUtils;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

public class AvroString {
    public void stringProcess() throws IOException {
        Schema.Parser parser = new Schema .Parser();
        Schema schema = parser.parse(getClass().getResourceAsStream("/StringPair.avsc"));
        GenericRecord datum = new GenericData.Record(schema);
        datum.put("left", new Utf8("Work in the dark"));
        datum.put("right", new Utf8("To serve the light"));

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
        writer.write(datum, encoder);
        encoder.flush();

        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
        Decoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null);
        GenericRecord newDatum = reader.read(null, decoder);
        System.out.println(newDatum);
    }

    public static void main(String[] args) {
        try {
            new AvroString().stringProcess();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

运行后

3.2 文件读写

同样，新建一个datum，存入文件后再重新读出，并在控制台打印

package com.tuan.hadoopLearn.io;

import com.sun.tools.javah.Gen;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.util.Utf8;
import sun.net.www.content.text.Generic;

import java.io.File;
import java.io.IOException;

public class AvroFile {
    public void fileProcess() throws IOException {
        Schema.Parser parser = new Schema .Parser();
        Schema schema = parser.parse(getClass().getResourceAsStream("/StringPair.avsc"));
        GenericRecord datum = new GenericData.Record(schema);
        datum.put("left", new Utf8("Work in the dark"));
        datum.put("right", new Utf8("To serve the light"));

        DatumWriter<GenericRecord> writer = new GenericDatumWriter<>();
        DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(writer);
        File file = new File("data.avro");
        fileWriter.create(schema, file);
        fileWriter.append(datum);
        fileWriter.close();

        DatumReader<GenericRecord> reader = new GenericDatumReader<>();
        DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader);
        GenericRecord newDatum = fileReader.next();
        System.out.println(newDatum);
    }
    public static void main(String[] args) {
        try {
            new AvroFile().fileProcess();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

运行后

生成的avro文件

3.3 Avro格式版最高气温

用avro格式作为mapreduce任务的输入输出格式，这里我踩了很多坑，花了两天时间才跑通，详细踩坑经过见我另一篇博文https://mp.csdn.net/postedit/81184615

首先新建一个TemperaturePair.avsc文件，后缀一定要对，我第一次就是因为后缀不对编译一直失败

{
    "type": "record",
    "name": "TemperaturePair",
    "doc": "A weather reading.",
    "fields": [
        {"name": "year", "type": "int"},
        {"name": "temperature", "type": "int"}
    ]
}

在pom.xml中添加avro插件，其中sourceDirectory指向avsc文件目录，outputDirectory指向生成的类目录

<plugin>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro-maven-plugin</artifactId>
    <version>1.7.7</version>
    <executions>
        <execution>
            <phase>generate-sources</phase>
            <goals>
                <goal>schema</goal>
            </goals>
            <configuration> 
                <sourceDirectory>${project.basedir}/src/main/Resources/</sourceDirectory>
                <outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
            </configuration>
        </execution>
    </executions>
</plugin>

新建Schema文件TemperaturePair.avsc

{
    "namespace": "com.tuan.hadoopLearn.avro",
    "type": "record",
    "name": "TemperaturePair",
    "doc": "A weather reading.",
    "fields": [
        {"name": "year", "type": "int"},
        {"name": "temperature", "type": "int"}
    ]
}

配置好后，maven compile一下，在outputDirectory下生成类TemperaturePair

新建类CreateAvroInput用来生成MapReduce处理的输入文件input.avro，当然也可以先写一个json再用avro-tools转，我觉得麻烦，学习嘛，一切从简。生成完了把input.avro传到集群

package com.tuan.hadoopLearn.io;

import com.tuan.hadoopLearn.avro.TemperaturePair;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;

import java.io.File;
import java.io.IOException;

import static org.apache.hadoop.util.ThreadUtil.getResourceAsStream;

public class CreateAvroInput {
    public void create() throws IOException {
        DatumWriter<TemperaturePair> TemperaturePairDatumWriter = new SpecificDatumWriter<TemperaturePair>(TemperaturePair.class);
        DataFileWriter<TemperaturePair> dataFileWriter = new DataFileWriter<TemperaturePair>(TemperaturePairDatumWriter);
        Schema.Parser parser = new Schema.Parser();
        Schema schema = parser.parse(getClass().getResourceAsStream("/TemperaturePair.avsc"));
        dataFileWriter.create(schema, new File("input.avro"));
        dataFileWriter.append(new TemperaturePair(1993, 87));
        dataFileWriter.append(new TemperaturePair(1993, 25));
        dataFileWriter.append(new TemperaturePair(1992, 37));
        dataFileWriter.append(new TemperaturePair(1995, 74));
        dataFileWriter.append(new TemperaturePair(1992, 38));
        dataFileWriter.append(new TemperaturePair(1993, 103));
        dataFileWriter.close();
    }
    public static void main(String[] args) throws IOException {
        new CreateAvroInput().create();
    }
}

然后就是MapReduce的主程序

package com.tuan.hadoopLearn.mapreduce;

import java.io.IOException;

import com.tuan.hadoopLearn.avro.TemperaturePair;
import com.tuan.hadoopLearn.utils.JarUtils;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class AvroMaxTemperature extends Configured implements Tool {

    public static class AvroMaxTemperatureMapper extends
            Mapper<AvroKey<TemperaturePair>, NullWritable, IntWritable, IntWritable> {

        @Override
        public void map(AvroKey<TemperaturePair> key, NullWritable value, Context context)
                throws IOException, InterruptedException {
            Integer year = key.datum().getYear();
            Integer temperature = key.datum().getTemperature();
            context.write(new IntWritable(year), new IntWritable(temperature));
        }
    }

    public static class AvroMaxTemperatureReducer extends
            Reducer<IntWritable, IntWritable, AvroKey<Integer>, AvroValue<Integer>> {

        @Override
        public void reduce(IntWritable key, Iterable<IntWritable> values,
                           Context context) throws IOException, InterruptedException {

            Integer max = 0;
            for (IntWritable value : values) {
                max = Math.max(max, value.get());
            }
            context.write(new AvroKey<Integer>(key.get()), new AvroValue<Integer>(max));
        }
    }

    public int run(String[] args) throws Exception {
        if (args.length != 2) {
            System.err.println("Usage: MapReduceMaxTemperature <input path> <output path>");
            return -1;
        }

        Configuration conf = getConf();
        JarUtils.addTmpJar( "C:/Software/Hadoop-3.0.3/lib/avro/avro-mapred-1.7.7-hadoop2.jar", conf);
        Job job = new Job(conf);
        job.setJarByClass(AvroMaxTemperature.class);
        job.setJobName("Avro Max Temperature");

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setInputFormatClass(AvroKeyInputFormat.class);
        job.setMapperClass(AvroMaxTemperatureMapper.class);
        AvroJob.setInputKeySchema(job, TemperaturePair.getClassSchema());
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
        job.setReducerClass(AvroMaxTemperatureReducer.class);
        AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.INT));
        AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

        return (job.waitForCompletion(true) ? 0 : 1);
    }

     public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new AvroMaxTemperature(), args);
        System.exit(res);
    }
}

cmd命令

hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.mapreduce.AvroMaxTemperature /mapreduce/input.avro /mapreduce/avroOutput

把输出扒拉下来，用avro-tool打开

java -jar avro-tools-1.7.7.jar tojson avroOutput/part-r-00000.avro

4 SequenceFile文件的读写

新建一个类SequenceFileDemo，写一个SequenceFile再读取打印在控制台

package com.tuan.hadoopLearn.io;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;

import java.io.IOException;
import java.net.URI;

public class SequenceFileDemo {
    private static final String[] DATA = {
        "Jingle bells, jingle bells",
        "Jingle all the way",
        "Oh! what fun it is to ride",
        "In a one-horse open sleigh"
    };

    public void write(String uri) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        IntWritable key = new IntWritable();
        Text value = new Text();
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 10; i ++) {
            key.set(10 - i);
            value.set(DATA[i % 4]);
            writer.append(key, value);
        }
        IOUtils.closeStream(writer);
    }

    public void read(String uri) throws IOException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        IntWritable key = new IntWritable();
        Text value = new Text();
        while (reader.next(key, value)) {
            System.out.println(key + " : " + value);
        }
    }

    public static void main(String[] args) throws IOException {
        String uri = args[0];
        SequenceFileDemo demo = new SequenceFileDemo();
        demo.write(uri);
        demo.read(uri);
    }
}

cmd命令

hadoop jar hadoopLearn-0.0.1-SNAPSHOT.jar com.tuan.hadoopLearn.io.SequenceFileDemo hdfs:/SequenceFile/sequence

《Hadoop权威指南》学习笔记（二）

1、压缩

1.1 标准输入压缩后输出

1.2 压缩文件解压

1.3 使用压缩池

1.4 MapReduce中使用压缩

2 流

2.1 Writable

2.2 Text迭代

2.3 实现定制的Writable

3 Avro

3.1 Avro数据读写

3.2 文件读写

3.3 Avro格式版最高气温

4 SequenceFile文件的读写

猜你喜欢