Hadoop————串行化

1、什么是串行化
串行化简而言之就是将对象转成字节流(二进制格式)存放在磁盘上或用于传输,便于减小存储空间、网络传输,提高效率。在分布式编程中显得尤为重要,如果不进行串行化,则传输效率将得到限制。
2、google的protobuf

1.下载google protobuf.

protoc-2.5.0-win32.zip

添加pom.xml依赖

<dependencies>
        <dependency>
                <groupId>com.google.protobuf</groupId>
                <artifactId>protobuf-java</artifactId>
                <version>2.5.0</version>
        </dependency>
        <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.11</version>
        </dependency>
</dependencies>

2.设计对象
3.描述对象

package tutorial;
        option java_package = "com.example.tutorial";
        option java_outer_classname = "AddressBookProtos";
        message Person {
            required string name = 1;
            required int32 id = 2;
            optional string email = 3;
            enum PhoneType {
                MOBILE = 0;
                HOME = 1;
                WORK = 2;
            }
            message PhoneNumber {
                required string number = 1;
                optional PhoneType type = 2 [default = HOME];
            }
            repeated PhoneNumber phone = 4;
        }
        message AddressBook {
            repeated Person person = 1;
        }

4.编译描述

cmd>protoc --java_out . xxx.proto

5.导入源代码到项目中

6.使用对象

public class TestProtoBuf {

            @Test
            public void write() throws Exception{
                AddressBookProtos.Person john = AddressBookProtos.Person.newBuilder()
                        .setId(12345)
                        .setName("tomas")
                        .setEmail("[email protected]")
                        .addPhone(AddressBookProtos.Person.PhoneNumber.newBuilder()
                                .setNumber("+351 999 999 999")
                                .setType(AddressBookProtos.Person.PhoneType.HOME)
                                .build())
                        .build();
                john.writeTo(new FileOutputStream("d:/prototbuf.data"));
            }

            @Test
            public void read() throws Exception{
                AddressBookProtos.Person john = AddressBookProtos.Person.parseFrom(new FileInputStream("d:/prototbuf.data"));
                System.out.println(john.getName());
            }
        }

3、avro
1.数据串行化系统
2.自描述语言.
数据结构和数据都存在文件中。跨语言。
使用json格式存储数据。
3.可压缩 + 可切割。

使用avro
a)定义schema
b)编译schema,生成java类

emp.avsc

{
        "namespace": "tutorialspoint.com",
             "type": "record",
             "name": "emp",
           "fields": [
                {"name": "name", "type": "string"},
                {"name": "id", "type": "int"},
                {"name": "salary", "type": "int"},
                {"name": "age", "type": "int"},
                {"name": "address", "type": "string"}
            ]
    }

生成java类,最终生成Tutorialspoint文件(根据定义schema来定的),文件中包含Employee.java类

cmd>java -jar avro-tools-1.7.7.jar compile schema emp.avsc .

c)使用java类

d)单元测试

package cn.ctgu.avrodemo.test;

import Tutorialspoint.Employee;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.junit.Test;

import java.io.File;
import java.util.Iterator;

public class TestAvro {
    //串行化数据到磁盘
    @Test
    public void write() throws Exception {
        //创建write对象
        SpecificDatumWriter empDatumWriter=new SpecificDatumWriter<Employee>(Employee.class);
        //写入文件
        DataFileWriter<Employee>empFileWriter=new DataFileWriter<Employee>(empDatumWriter);

        //创建对象
        Employee el=new Employee();
        el.setName("tomas");
        el.setAge(12);

        //串行化数据到磁盘
        empFileWriter.create(el.getSchema(),new File("J:\\Program\\file\\avro\\el.avro"));
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.append(el);
        //关闭流
        empFileWriter.close();

    }
    //反串行化
    @Test
    public void read() throws Exception {
        //创建reader对象
        SpecificDatumReader empDatumReader = new SpecificDatumReader<Employee>(Employee.class);
        //读取文件
        DataFileReader<Employee> dataReader = new DataFileReader<Employee>(new File("J:\\Program\\file\\avro\\el.avro"), empDatumReader);

        Iterator<Employee> it = dataReader.iterator();
        while (it.hasNext()) {
            System.out.println(it.next().getName());
        }
    }
        /*
        *
        * 直接使用schema文件进行读写,不需要编译
        *
        * */
    @Test
    public void writeInSchema() throws Exception {
        //指定定义的avsc文件
        Schema schema=new Schema.Parser().parse(new File("F:\\徐培成——spark\\线路一全\\05-avro和protobuf\\emp.avsc"));
        //创建GenericRecord相当于Employee
        GenericRecord el=new GenericData.Record(schema);
        //设置javabean属性
        el.put("Name","ramu");
        el.put("age",25);

        //
        DatumWriter empDatumWriter=new SpecificDatumWriter(schema);
        DataFileWriter empFileWriter=new DataFileWriter(empDatumWriter);
        empFileWriter.create(schema,new File("J:\\Program\\file\\avro\\el2.avro"));
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.append(el);
        empFileWriter.close();
    }


    /*
        *
    * 建议使用这种
        * 直接使用schema文件进行读写,不需要编译
        *
        * */
    @Test
    public void readInSchema() throws Exception {
        //指定定义的avsc文件
        Schema schema=new Schema.Parser().parse(new File("F:\\徐培成——spark\\线路一全\\05-avro和protobuf\\emp.avsc"));

        DatumReader empDatumReader=new SpecificDatumReader(schema);
        DataFileReader r=new DataFileReader(new File("J:\\Program\\file\\avro\\el2.avro"),empDatumReader);

        while(r.hasNext()){
            GenericRecord rec=(GenericRecord) r.next();
            System.out.println(rec.get("Name"));
        }
        r.close();
    }
}

猜你喜欢

转载自blog.csdn.net/Jorocco/article/details/80778538
今日推荐