Avro 数据序列化

Apache Avro 是一个独立于编程语言的数据序列化系统。旨在解决Hadoop中Writable类型的不足:缺乏语言的可移植性。Avro 模式通常用json来写,数据通常采用二进制格式编码。

Avro的使用

Avro 的使用可以分为两种:编译Schema和非编译Schema

编译Schema
  • 定义schema:emp.avsc
//json格式的文件
{
    "namespace": "tutorialspoint.com",
    "type": "record",
    "name": "emp",
    "fields": [
        {"name": "name", "type": "string"},
        {"name": "id", "type": "int"},
        {"name": "salary", "type": "int"},
        {"name": "age", "type": "int"},
        {"name": "address", "type": "string"}
    ]
}
  • 编译Schema,生成Java类,并将源码导入idea
// . 表示当前目录
cmd> java -jar avro-tools-1.8.2.jar compile schema emp.avsc .

//在idea中导入依赖
<dependency>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro</artifactId>
    <version>1.8.2</version>
</dependency>

image

image

编译后的源码如下:

package Tutorialspoint;

import org.apache.avro.specific.SpecificData;
import org.apache.avro.message.BinaryMessageEncoder;
import org.apache.avro.message.BinaryMessageDecoder;
import org.apache.avro.message.SchemaStore;

@SuppressWarnings("all")
@org.apache.avro.specific.AvroGenerated
public class Employee extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
  private static final long serialVersionUID = -8873171083721622992L;
  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Employee\",\"namespace\":\"Tutorialspoint\",\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"int\"}]}");
  public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }

  private static SpecificData MODEL$ = new SpecificData();

  private static final BinaryMessageEncoder<Employee> ENCODER =
      new BinaryMessageEncoder<Employee>(MODEL$, SCHEMA$);

  private static final BinaryMessageDecoder<Employee> DECODER =
      new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$);

  /**
   * Return the BinaryMessageDecoder instance used by this class.
   */
  public static BinaryMessageDecoder<Employee> getDecoder() {
    return DECODER;
  }

  /**
   * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
   * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
   */
  public static BinaryMessageDecoder<Employee> createDecoder(SchemaStore resolver) {
    return new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$, resolver);
  }

  /** Serializes this Employee to a ByteBuffer. */
  public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
    return ENCODER.encode(this);
  }

  /** Deserializes a Employee from a ByteBuffer. */
  public static Employee fromByteBuffer(
      java.nio.ByteBuffer b) throws java.io.IOException {
    return DECODER.decode(b);
  }

  @Deprecated public CharSequence Name;
  @Deprecated public int age;

  /**
   * Default constructor.  Note that this does not initialize fields
   * to their default values from the schema.  If that is desired then
   * one should use <code>newBuilder()</code>.
   */
  public Employee() {}

  /**
   * All-args constructor.
   * @param Name The new value for Name
   * @param age The new value for age
   */
  public Employee(CharSequence Name, Integer age) {
    this.Name = Name;
    this.age = age;
  }

  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
  // Used by DatumWriter.  Applications should not call.
  public Object get(int field$) {
    switch (field$) {
    case 0: return Name;
    case 1: return age;
    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
    }
  }

  // Used by DatumReader.  Applications should not call.
  @SuppressWarnings(value="unchecked")
  public void put(int field$, Object value$) {
    switch (field$) {
    case 0: Name = (CharSequence)value$; break;
    case 1: age = (Integer)value$; break;
    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
    }
  }

  /**
   * Gets the value of the 'Name' field.
   * @return The value of the 'Name' field.
   */
  public CharSequence getName() {
    return Name;
  }

  /**
   * Sets the value of the 'Name' field.
   * @param value the value to set.
   */
  public void setName(CharSequence value) {
    this.Name = value;
  }

  /**
   * Gets the value of the 'age' field.
   * @return The value of the 'age' field.
   */
  public Integer getAge() {
    return age;
  }

  /**
   * Sets the value of the 'age' field.
   * @param value the value to set.
   */
  public void setAge(Integer value) {
    this.age = value;
  }

  /**
   * Creates a new Employee RecordBuilder.
   * @return A new Employee RecordBuilder
   */
  public static Builder newBuilder() {
    return new Builder();
  }

  /**
   * Creates a new Employee RecordBuilder by copying an existing Builder.
   * @param other The existing builder to copy.
   * @return A new Employee RecordBuilder
   */
  public static Builder newBuilder(Builder other) {
    return new Builder(other);
  }

  /**
   * Creates a new Employee RecordBuilder by copying an existing Employee instance.
   * @param other The existing instance to copy.
   * @return A new Employee RecordBuilder
   */
  public static Builder newBuilder(Employee other) {
    return new Builder(other);
  }

  /**
   * RecordBuilder for Employee instances.
   */
  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Employee>
    implements org.apache.avro.data.RecordBuilder<Employee> {

    private CharSequence Name;
    private int age;

    /** Creates a new Builder */
    private Builder() {
      super(SCHEMA$);
    }

    /**
     * Creates a Builder by copying an existing Builder.
     * @param other The existing Builder to copy.
     */
    private Builder(Builder other) {
      super(other);
      if (isValidValue(fields()[0], other.Name)) {
        this.Name = data().deepCopy(fields()[0].schema(), other.Name);
        fieldSetFlags()[0] = true;
      }
      if (isValidValue(fields()[1], other.age)) {
        this.age = data().deepCopy(fields()[1].schema(), other.age);
        fieldSetFlags()[1] = true;
      }
    }

    /**
     * Creates a Builder by copying an existing Employee instance
     * @param other The existing instance to copy.
     */
    private Builder(Employee other) {
            super(SCHEMA$);
      if (isValidValue(fields()[0], other.Name)) {
        this.Name = data().deepCopy(fields()[0].schema(), other.Name);
        fieldSetFlags()[0] = true;
      }
      if (isValidValue(fields()[1], other.age)) {
        this.age = data().deepCopy(fields()[1].schema(), other.age);
        fieldSetFlags()[1] = true;
      }
    }

    /**
      * Gets the value of the 'Name' field.
      * @return The value.
      */
    public CharSequence getName() {
      return Name;
    }

    /**
      * Sets the value of the 'Name' field.
      * @param value The value of 'Name'.
      * @return This builder.
      */
    public Builder setName(CharSequence value) {
      validate(fields()[0], value);
      this.Name = value;
      fieldSetFlags()[0] = true;
      return this;
    }

    /**
      * Checks whether the 'Name' field has been set.
      * @return True if the 'Name' field has been set, false otherwise.
      */
    public boolean hasName() {
      return fieldSetFlags()[0];
    }


    /**
      * Clears the value of the 'Name' field.
      * @return This builder.
      */
    public Builder clearName() {
      Name = null;
      fieldSetFlags()[0] = false;
      return this;
    }

    /**
      * Gets the value of the 'age' field.
      * @return The value.
      */
    public Integer getAge() {
      return age;
    }

    /**
      * Sets the value of the 'age' field.
      * @param value The value of 'age'.
      * @return This builder.
      */
    public Builder setAge(int value) {
      validate(fields()[1], value);
      this.age = value;
      fieldSetFlags()[1] = true;
      return this;
    }

    /**
      * Checks whether the 'age' field has been set.
      * @return True if the 'age' field has been set, false otherwise.
      */
    public boolean hasAge() {
      return fieldSetFlags()[1];
    }


    /**
      * Clears the value of the 'age' field.
      * @return This builder.
      */
    public Builder clearAge() {
      fieldSetFlags()[1] = false;
      return this;
    }

    @SuppressWarnings("unchecked")
    public Employee build() {
      try {
        Employee record = new Employee();
        record.Name = fieldSetFlags()[0] ? this.Name : (CharSequence) defaultValue(fields()[0]);
        record.age = fieldSetFlags()[1] ? this.age : (Integer) defaultValue(fields()[1]);
        return record;
      } catch (Exception e) {
        throw new org.apache.avro.AvroRuntimeException(e);
      }
    }
  }

  @SuppressWarnings("unchecked")
  private static final org.apache.avro.io.DatumWriter<Employee>
    WRITER$ = (org.apache.avro.io.DatumWriter<Employee>)MODEL$.createDatumWriter(SCHEMA$);

  @Override public void writeExternal(java.io.ObjectOutput out)
    throws java.io.IOException {
    WRITER$.write(this, SpecificData.getEncoder(out));
  }

  @SuppressWarnings("unchecked")
  private static final org.apache.avro.io.DatumReader<Employee>
    READER$ = (org.apache.avro.io.DatumReader<Employee>)MODEL$.createDatumReader(SCHEMA$);

  @Override public void readExternal(java.io.ObjectInput in)
    throws java.io.IOException {
    READER$.read(this, SpecificData.getDecoder(in));
  }

}
  • 串行化
/**
 * 串行化
 */
@Test
public void write() throws IOException {
    //创建Writer对象
    SpecificDatumWriter empDatumWriter = new SpecificDatumWriter<Employee>(Employee.class);
    //写入文件
    DataFileWriter<Employee> empFileWriter = new DataFileWriter<Employee>(empDatumWriter);
    //创建对象
    Employee e = new Employee();
    e.setAge(12);
    e.setName("Bob");
    //串行化数据到磁盘
    empFileWriter.create(e.getSchema(), new File("F:/hadoop/avro/e.avro"));
    //添加数据
    empFileWriter.append(e);
    empFileWriter.append(e);
    empFileWriter.append(e);

    //关闭流
    empFileWriter.close();
}
  • 反串行化
/**
 * 反串行化
 */
@Test
public void read() throws IOException {
    //创建Reader对象
    SpecificDatumReader empDatumWriter = new SpecificDatumReader(Employee.class);
    //写入文件
    DataFileReader<Employee> dataReader = new DataFileReader<Employee>(new File("F:/hadoop/avro/e.avro"), empDatumWriter);

    Iterator<Employee> it = dataReader.iterator();
    while(it.hasNext()) {
        System.out.println(it.next());
    }
}

结果:
{"Name": "Bob", "age": 12}
{"Name": "Bob", "age": 12}
{"Name": "Bob", "age": 12}
非编译Schema

可以不用编译 emp.avsc 文件,直接使用

  • 串行化
/**
 * 直接使用Schema文件进行读写,不用编译
 */
@Test
public void writeInSchme() throws IOException {
    //指定定义的avsc文件
    Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));

    //创建
    GenericRecord e = new GenericData.Record(schema);
    //设置Javabean属性
    e.put("Name", "tomas");
    e.put("age", 25);

    DatumWriter<GenericRecord> w1 = new SpecificDatumWriter(schema);
    DataFileWriter<GenericRecord> w2 = new DataFileWriter(w1);
    w2.create(schema, new File("F:/hadoop/avro/e2.avro"));
    w2.append(e);
    w2.append(e);
    w2.append(e);
    w2.close();
}
  • 反串行化
/**
 * 反串行avro数据
 */
@Test
public void readInSchema() throws  Exception {
    //指定定义的avsc文件。
    Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));

    GenericRecord e1 = new GenericData.Record(schema);
    DatumReader r1 = new SpecificDatumReader (schema);
    DataFileReader r2 = new DataFileReader(new File("F:/hadoop/avro/e2.avro"),r1);
    while(r2.hasNext()){
        GenericRecord rec = (GenericRecord)r2.next();
        System.out.println(rec.get("Name"));
    }
    r2.close();
}

结果:
tomas
tomas
tomas

猜你喜欢

转载自blog.csdn.net/king123456man/article/details/81676219