For more information click here

1 Basic knowledge review

1.1 I.

The IO stream in the program is blocked, supports random reading of data, and does not support modifying data

1 review file copy

2 Random read data

3 Cannot write data randomly

/**
 * FileName: TestIo
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description: 测试IO的随机读取数据
 * 1 作业  复制文件
 */
public class TestIo {
    public static void main(String[] args) throws Exception {
        // 1   获取一个文件的输入流
        FileInputStream fis = new FileInputStream("d://word.txt");
        // 输出流没有类似于skip的方法 不能随机写数据
        FileOutputStream fout = new FileOutputStream("");
        // 2   读取数据
       // int read1 = fis.read();  // a  97
       // int read2 = fis.read();  // 98
        // 跳过指定的字节
        fis.skip(2L); // 2k 1024*2*1024   1k=1024byte
        int read3 = fis.read();
        // 3   打印
       // System.out.println(read1);
       // System.out.println(read2);
        System.out.println(read3); //101


    }
}

1.2 Serialization

Store the object data in memory on the disk, or transmit the object over the network! The object needs to be serialized,

Essence: It is the rule of object conversion to binary , how does deserialization convert binary to object rule

Java has its own serialization mechanism to implement interfaces

Persist objects to disk Persistence passivation

Deserialize object data on disk into java object activation

1.2.1 Serialization of java

/**
 * FileName: TestSer
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 * 思考 :   User类能不能不直接实现序列化接口 
 *         但是能将数据存储在磁盘上 : 保证存储的数据比Serializable方式存储的数据少
 *         网络传输节省资源
 */
public class TestSer {
    public static void main(String[] args) throws Exception {
        // 将内存对象持久化到磁盘
        // 写出对象流
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("d://user.user"));
        User user = new User();
        user.set(1,"王奔",20000);
        // 在磁盘中存储的任何数据都是二进制
        oos.writeObject(user);// 110长度   1  王奔   20000
        /**
         * java中的Serializable接口的序列化在磁盘中存储的数据有
         *  包名  类名  属性名  数据类型  ...  方便通过反射反序列化方便
         *  有很多的冗余数据
         */
        oos.close();
        // 将磁盘中的对象数据  反序列化成内存java对象
        ObjectInputStream ois = new ObjectInputStream(new FileInputStream("d://user.user"));
        User u = (User)ois.readObject();
        System.out.println(u);


    }

1.2.2 Convert object to JSON (String)

https://mvnrepository.com/search maven warehouse download jar package

/**
 * FileName: JsonToDisc
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 * 将java对象转换成Json串 --->写到磁盘
 */
public class JsonToDisc {
    public static void main(String[] args) throws Exception {
        User user = new User();
        user.set(1,"benB",20000);
        String str = JSON.toJSONString(user);
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("d://user.json"));
        oos.writeUTF(str);
        oos.close();
    }
}

1.2.3 Write attributes directly to disk (simple type)

/**
 * FileName: FieldsToDisc
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 * 灵活
 * 方便 
 * 数据小
 * 注意 写和读的顺序
 */
public class FieldsToDisc {
    public static void main(String[] args) throws  Exception {
        /*User user = new User();
        user.set(1,"benB",20000);
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("d://user.f"));
        // 写出属性
        oos.writeInt(user.getId());
        oos.writeUTF(user.getName());
        oos.writeDouble(user.getSal());
        oos.close();*/
        ObjectInputStream ois = new ObjectInputStream(new FileInputStream("d://user.f"));
        // 读取数据    按照写的顺序读  Int  UTF  Double
        int id = ois.readInt();
        String name = ois.readUTF();
        double sal = ois.readDouble();
        System.out.println();
        User user = new User();
        user.set(id,name,sal) ;
        System.out.println(user);


    }
}

1.2.4 Custom serialization rules

1 Define an interface with read and write methods

2 The java class to be serialized implements the interface to rewrite the read and write rules (serialization rules)

3 Test use Call the read and write methods to achieve serialization and deserialization

interface

/**
 * FileName: Writable
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description: 接口
 * 定义两个方法 写  读
 *  作用 : 以后 有类要序列化  实现这个接口
 *    重写里面的读写方法   (指定了序列化和反序列化的规则)
 */
public interface Writable {
    public void write(ObjectOutputStream oos) throws  Exception;
    public void read(ObjectInputStream ois)throws  Exception;
}

Implementation class

/**
 * FileName: Teacher
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:  要进行序列化和反序列化
 * 实现接口 重写方法
 */
public class Teacher implements  Writable{
    private int  tid ;
    private  String name ;
    private String gender ;
    private double faceValue ;

    public void set (int tid, String name, String gender, double faceValue) {
        this.tid = tid;
        this.name = name;
        this.gender = gender;
        this.faceValue = faceValue;
    }

    public int getTid() {
        return tid;
    }

    public void setTid(int tid) {
        this.tid = tid;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getGender() {
        return gender;
    }

    public void setGender(String gender) {
        this.gender = gender;
    }

    public double getFaceValue() {
        return faceValue;
    }

    public void setFaceValue(double faceValue) {
        this.faceValue = faceValue;
    }

    @Override
    public String toString() {
        return "Teacher{" +
                "tid=" + tid +
                ", name='" + name + '\'' +
                ", gender='" + gender + '\'' +
                ", faceValue=" + faceValue +
                '}';
    }

    /**
     * 序列化
     * @param oos
     * @throws Exception
     */
    @Override
    public void write(ObjectOutputStream oos) throws Exception {
        oos.writeInt(this.tid);
        oos.writeUTF(this.name);
        oos.writeUTF(this.gender);
        oos.writeDouble(this.faceValue);
        oos.close();

    }

    /**
     * 反序列化
     * @param ois
     * @throws Exception
     */
    @Override
    public void read(ObjectInputStream ois) throws Exception {
        this.tid = ois.readInt();
        this.name = ois.readUTF() ;
        this.gender = ois.readUTF() ;
        this.faceValue = ois.readDouble() ;
        ois.close();

    }
}

test

/**
 * FileName: TestMyWrite
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 */
public class TestMyWrite {
    public static void main(String[] args) throws Exception {
        Teacher teacher = new Teacher();
       // teacher.set(1,"wangben","M",99.99);
        // 写出去  序列化
       //teacher.write(new ObjectOutputStream(new FileOutputStream("d://teacher2.txt")));
        // 读回来  反序列化
       teacher.read(new ObjectInputStream(new FileInputStream("d://teacher2.txt"))) ;
        System.out.println(teacher);
    }
}

1.3 Iterator

Use without knowing the data structure and number of data

For example: the company has its own data 1#zss:23@M, providing iterator hasNext - next --> User

package com._51doit.cn.hdp.day01.iter;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.Iterator;

/**
 * FileName: MyIteratable
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 */
public class MyIteratable implements Iterator<User> {
    BufferedReader br;
    String line = null;
    User user = new User();

    {
        try {
            br = new BufferedReader(new FileReader("d://user.txt"));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 当这个方法返回true的时候  才会执行next方法
     *
     * @return
     */
    @Override
    public boolean hasNext() {
        boolean flag = false;
        try {
            line = br.readLine();
            if (line != null) {
                flag = true;
            } else {
                flag = false;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return flag;
    }

    @Override
    public User next() {
        // 处理每行数据  封装结果到User中 返回
        //8#fengjie:53@F
        String uid = line.split("#")[0];
        String name = line.split("#")[1].split(":")[0];
        String age = line.split("#")[1].split(":")[1].split("@")[0];
        String gender = line.split("#")[1].split(":")[1].split("@")[1];
        user.set(uid,name,age,gender);
        return user;
    }
}

package com._51doit.cn.hdp.day01.iter;

/**
 * FileName: User
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 */
public class User {
    private  String  uid ;
    private  String  name ;
    private  String  age ;
    private  String  gender ;

    public void set(String uid, String name, String age, String gender) {
        this.uid = uid;
        this.name = name;
        this.age = age;
        this.gender = gender;
    }

    public String getUid() {
        return uid;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getAge() {
        return age;
    }

    public void setAge(String age) {
        this.age = age;
    }

    public String getGender() {
        return gender;
    }

    public void setGender(String gender) {
        this.gender = gender;
    }

    @Override
    public String toString() {
        return "User{" +
                "uid='" + uid + '\'' +
                ", name='" + name + '\'' +
                ", age='" + age + '\'' +
                ", gender='" + gender + '\'' +
                '}';
    }
}

/**
 * FileName: Test1
 * Author:   多易教育-DOIT
 * Date:     2020/11/12 0012
 * Description:
 */
public class Test1 {
    public static void main(String[] args) {
        MyIteratable mi = new MyIteratable();
        while(mi.hasNext()){
            // 返回的数据始终是一个对象来接收的
            System.out.println(mi.next());
        }
    }
}

Big data background

Introduction to hadoop

Hadoop is a big data technology (framework)

The main big data problems 1) The storage of massive data 2) The operation of massive data 3) The resource allocation of multiple machines (storage resources, computing resources)

1 Mass data storage HDFS hadoop distribute filesystem

2 The operation of massive data MapReduce operation framework

3 Computing resource scheduling and task monitoring platform Yarn

4 Toolkit Commons

Features

1 High fault tolerance and high availability

2 Extremely easy to expand the cluster scale to enhance storage and computing capabilities

3 Inexpensiveness

Introduction to HDFS

Distributed file system hadoop distribute filesystem

File system: Read and write data, upload data, delete data, create folder, move, copy...Provide virtual access directory similar to Baidu cloud disk

mysql is a file system relational data unit

File system in linuxwindows operating system

Distributed file system:

Store data on different machines and provide data operations

basic skills:

HDFS installation

1 upload

[root@linux01 apps]# pwd
/opt/apps

2 Unzip

tar -zxvf hadoop-3.1.1.tar.gz

3 configuration

vi /opt/apps/hadoop-3.1.1 / etc / hadoop / hadoop-env.sh

 # variable is REQUIRED on ALL platforms except OS X!
 export JAVA_HOME=/opt/apps/jdk1.8.0_141/

vi /opt/apps/hadoop-3.1.1/etc/hadoop/hdfs-site.xml

 <!-- 集群的namenode的位置  datanode能通过这个地址注册-->
<property>
	<name>dfs.namenode.rpc-address</name>
	<value>linux01:8020</value>
</property>
 <!-- namenode存储元数据的位置 -->
<property>
	<name>dfs.namenode.name.dir</name>
	<value>/opt/hdpdata/name</value>
</property>
 <!-- datanode存储数据的位置 -->
<property>
	<name>dfs.datanode.data.dir</name>
	<value>/opt/hdpdata/data</value>
</property>
 <!-- secondary namenode机器的位置-->
<property>
	<name>dfs.namenode.secondary.http-address</name>
	<value>linux02:50090</value>
</property>

vi /opt/apps/hadoop-3.1.1/etc/hadoop/core-site.xml

<property>
<name>fs.defaultFS</name>
<value>hdfs://linux01:8020</value>
</property>

4 Distribution

scp -r hadoop-3.1.1 linux02:$PWD

scp -r hadoop-3.1.1 linux03:$PWD

5 Initialization (bin)

Execute in the bin directory

./hadoop purpose -format

在/opt/hdpdata/name

6 Start (sbin)

Execute in the sbin directory

./hadoop-daemon.sh start purpose

jps appears Namenode process

Visit page http://linux01:9870

Execute in the sbin directory of linux01 linux02 linux03 respectively

./hadoop-daemon.sh start datanode

7 Configure system environment variables

vi /etc/profile

export  JAVA_HOME=/opt/apps/jdk1.8.0_141
export  HADOOP_HOME=/opt/apps/hadoop-3.1.1
export  PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

source /etc/profile

8 One-key start and stop

Configure the name of the machine that needs to start the DataNode in etc/hadoop/workers

linux01

linux02

linux03

Declare the user in the start-stop script sbin/start-dfs.sh sbin/stop-dfs.sh

#!/usr/bin/env bash
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root

start-dfs.sh

stop-dfs.sh

HDFS client

hdfs dfs -

[root@linux01 bin]# hdfs dfs 
Usage: hadoop fs [generic options]
        [-appendToFile <localsrc> ... <dst>]
        [-cat [-ignoreCrc] <src> ...]
        [-checksum <src> ...]
        [-chgrp [-R] GROUP PATH...]
        [-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
        [-chown [-R] [OWNER][:[GROUP]] PATH...]
        [-copyFromLocal [-f] [-p] [-l] [-d] [-t <thread count>] <localsrc> ... <dst>]
        [-copyToLocal [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
        [-count [-q] [-h] [-v] [-t [<storage type>]] [-u] [-x] [-e] <path> ...]
        [-cp [-f] [-p | -p[topax]] [-d] <src> ... <dst>]
        [-createSnapshot <snapshotDir> [<snapshotName>]]
        [-deleteSnapshot <snapshotDir> <snapshotName>]
        [-df [-h] [<path> ...]]
        [-du [-s] [-h] [-v] [-x] <path> ...]
        [-expunge]
        [-find <path> ... <expression> ...]
        [-get [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
        [-getfacl [-R] <path>]
        [-getfattr [-R] {-n name | -d} [-e en] <path>]
        [-getmerge [-nl] [-skip-empty-file] <src> <localdst>]
        [-head <file>]
        [-help [cmd ...]]
        [-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [-e] [<path> ...]]
        [-mkdir [-p] <path> ...]
        [-moveFromLocal <localsrc> ... <dst>]
        [-moveToLocal <src> <localdst>]
        [-mv <src> ... <dst>]
        [-put [-f] [-p] [-l] [-d] <localsrc> ... <dst>]
        [-renameSnapshot <snapshotDir> <oldName> <newName>]
        [-rm [-f] [-r|-R] [-skipTrash] [-safely] <src> ...]
        [-rmdir [--ignore-fail-on-non-empty] <dir> ...]
        [-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
        [-setfattr {-n name [-v value] | -x name} <path>]
        [-setrep [-R] [-w] <rep> <path> ...]
        [-stat [format] <path> ...]
        [-tail [-f] <file>]
        [-test -[defsz] <path>]
        [-text [-ignoreCrc] <src> ...]
        [-touchz <path> ...]
        [-truncate [-w] <length> <path> ...]
        [-usage [cmd ...]]

doit19-day04-hadoop01