Article Directory
1. Big data assignment 1
Job content:
1. Local operation mode
1) Create a wcinput folder in hadoop100
2) Create a name.txt file under the wcinput file
3) Edit the file, enter words in the file, words include your name
4) Execute the program, and Looking at the results, ask the results to print how many times each word occurs
2. Use scp to securely copy
1) Create new folders in hadoop100, hadoop102, and hadoop103 and the files of your own name 1.txt and your own name 2.txt
1) Copy the file of name 1.txt to the corresponding folder of 102 on hadoop100 Middle
2) Copy the name 2.txt file in hadoop100 on hadoop102
3) Copy the name 1.txt and name 2.txt files in hadoop100 to the corresponding folder of hadoop103 on hadoop102
1. Local operation mode part
2. Use scp to securely copy the part
2. Big data assignment 2
contents of homework:
-
Rrsync remote synchronization tool
1) Delete the hadoop-3.1.3 folder under /opt/module in hadoop102
2) Use rsync to send the hadoop-3.1.3 folder under /opt/module in hadoop to the same directory of hadoop102 -
xsync cluster distribution script
- understanding script content
- scripting in hadoop
- Distribute the environment variable my_env to 102 and 103 servers, and realize the password-free function
- Cluster deployment
1) Modify the configuration file and start the cluster
2) Create name+student ID folders on the local machine and the cluster respectively
3) Create name.txt and upload it to the cluster
1. Rrsync remote synchronization tool part
2. Xsync cluster distribution script part
3. Cluster deployment part
3.1 Configure core-site.xml
3.2 Configure hdfs-site.xml
3.3 Configure mapred-site.xml
3.4 Configure yarn-site.xml
3. Big data assignment 3
1. Configure history server and logs
History server section:
2. Log section
3. Other
- Create a school folder and upload it to the cluster
- Create a major.txt (the text content is the name) and upload it to the folder of the cluster school
- Create a name.txt (the text content is the student number) and splice it into the content of the cluster major.txt file
- Download the stitched files in the cluster to the current directory of the local server
4. Big data assignment 4
Write a local wordcount case
1. Source code
- com.igeek.mapreduceDemo.wordcount.WordCountDriver
package com.igeek.mapreduceDemo.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.获取配置信息,获取job对象实例
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
// 2.关联本Driver得jar路径
job.setJarByClass(WordCountDriver.class);
// 3.关联map和reduce
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 4.设置map得输出kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 5.设置最终输出得kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 6.设置输入和输出路径
// FileInputFormat.setInputPaths(job,new Path("d:\\Desktop\\Hello.txt"));
FileInputFormat.setInputPaths(job,new Path("f:\\Documentation\\Hello.txt"));
FileOutputFormat.setOutputPath(job,new Path("f:\\Documentation\\outHello1"));
// FileOutputFormat.setOutputPath(job,new Path("d:\\Desktop\\outHello1"));
// 7.提交job
boolean boo=job.waitForCompletion(true);
System.out.println(boo);
}
}
- com.igeek.mapreduceDemo.wordcount.WordCountMapper
package com.igeek.mapreduceDemo.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* KEYIN Map阶段得输入key类型 LongWritable
* VALUEIN Map阶段输入value类型 Text
* KEYIN map阶段输出key类型 Text
* VALUEIN map阶段输出得value类型 IntWritable
*
*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text outk=new Text();
// 目前不进行聚合,只统计次数,所以给个1
IntWritable outV=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1.将数据转为string类型
String line=value.toString();
// 2.根据空格进行切割
String[] words=line.split(" ");
// 3。输出
for(String word:words){
outk.set(word);
context.write(outk,outV);
}
}
}
- com.igeek.mapreduceDemo.wordcount.WordCountReducer
package com.igeek.mapreduceDemo.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* KEYIN Reduce阶段得输入key类型 Text
* VALUEIN reduce阶段输入value类型 IntWritable
* KEYIN reduce阶段输出key类型 Text
* VALUEIN reduce阶段输出得value类型 IntWritable
*
*/
public class WordCountReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
IntWritable outv=new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// 1.累计求和
int sum=0;
for(IntWritable count:values){
sum+=count.get();
}
// 2.输出
outv.set(sum);
context.write(key,outv);
}
}
- com.igeek.mapreduceDemo.wordcount2.WordCountDriver
package com.igeek.mapreduceDemo.wordcount2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.获取配置信息,获取job对象实例
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
// 2.关联本Driver得jar路径
job.setJarByClass(WordCountDriver.class);
// 3.关联map和reduce
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 4.设置map得输出kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 5.设置最终输出得kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 6.设置输入和输出路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
// 7.提交job
boolean boo=job.waitForCompletion(true);
System.out.println(boo);
}
}
- com.igeek.mapreduceDemo.wordcount2.WordCountMapper
package com.igeek.mapreduceDemo.wordcount2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* KEYIN Map阶段得输入key类型 LongWritable
* VALUEIN Map阶段输入value类型 Text
* KEYIN map阶段输出key类型 Text
* VALUEIN map阶段输出得value类型 IntWritable
*
*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text outk=new Text();
// 目前不进行聚合,只统计次数,所以给个1
IntWritable outV=new IntWritable(1);
String i;
int age;
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1.将数据转为string类型
String line=value.toString();
// 2.根据空格进行切割
String[] words=line.split(" ");
// 3。输出
for(String word:words){
outk.set(word);
context.write(outk,outV);
}
}
@Override
public String toString() {
return "WordCountMapper{" +
"i='" + i + '\'' +
", age=" + age +
'}';
}
}
- com.igeek.mapreduceDemo.wordcount2.WordCountReducer
package com.igeek.mapreduceDemo.wordcount2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* KEYIN Reduce阶段得输入key类型 Text
* VALUEIN reduce阶段输入value类型 IntWritable
* KEYIN reduce阶段输出key类型 Text
* VALUEIN reduce阶段输出得value类型 IntWritable
*
*/
public class WordCountReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
IntWritable outv=new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// 1.累计求和
int sum=0;
for(IntWritable count:values){
sum+=count.get();
}
// 2.输出
outv.set(sum);
context.write(key,outv);
}
}
- pom.xml added
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
2. Information screenshot
- The project is packaged and uploaded to the cluster
- Cluster test self-written wordcount
5. Big data assignment 5
Write a mobile phone number traffic statistics case
- Write a local serialization case to realize mobile phone number traffic statistics
- The project is packaged and uploaded to the cluster
- cluster test
1. Source code
1、 com.igeek.mapreduceDemo.flow.FlowBean
package com.igeek.mapreduceDemo.flow;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 1.实现writable接口
* 2.重写序列化和反序列化方法
* 3.提供空参构造
* 4.tostring
*/
public class FlowBean implements Writable {
private long upFlow;//上行流量
private long downFlow;//下行流量
private long sumFlow;//总流量
public FlowBean() {
}
public long getUpFlow() {
return upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow() {
this.sumFlow = this.downFlow+this.upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow=in.readLong();
this.downFlow=in.readLong();
this.sumFlow=in.readLong();
}
@Override
public String toString() {
return upFlow + "\t"+downFlow+"\t" + sumFlow;
}
}
2、 com.igeek.mapreduceDemo.flow.FlowMapper
package com.igeek.mapreduceDemo.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowMapper extends Mapper<LongWritable, Text,Text,FlowBean> {
private Text outk=new Text();
private FlowBean outv=new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行数据,并转储为字符串
String line=value.toString();
//切割对象
String[] splits=line.split("\t");
String phone=splits[1];
String up=splits[splits.length-3];
String down=splits [splits.length-2];
//封装outk,outv
outk.set(phone);
outv.setUpFlow(Long.parseLong(up));
outv.setDownFlow(Long.parseLong(down));
outv.getSumFlow();
//写出outk,outv
context.write(outk,outv);
}
}
3、 com.igeek.mapreduceDemo.flow.FlowReduce
package com.igeek.mapreduceDemo.flow;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
private FlowBean outv=new FlowBean();
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
long totalUp=0;
long totalDown=0;
for (FlowBean flowBean: values){
totalUp+=flowBean.getUpFlow();
totalDown+=flowBean.getDownFlow();
}
//封装outv
outv.setUpFlow(totalUp);
outv.setDownFlow(totalDown);
outv.setSumFlow();
//写出outk,outv
context.write(key,outv);
}
}
4、 com.igeek.mapreduceDemo.flow.FlowDriver
package com.igeek.mapreduceDemo.flow;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(FlowDriver.class);
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
FileInputFormat.setInputPaths(job,new Path("E:\\QQdown\\phone_data.txt"));
FileOutputFormat.setOutputPath(job,new Path("F:\\Documentation\\flowOutPut"));
boolean b =job.waitForCompletion(true);
System.out.println(b?0:1);
}
}
5、com.igeek.mapreduceDemo.Phone_DataDriver.FlowMapper
package com.igeek.mapreduceDemo.Phone_DataDriver;
import com.igeek.mapreduceDemo.flow.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
private FlowBean outv=new FlowBean();
int phone;
int up;
int down;
int sum;
@Override
public String toString() {
return "FlowReducer{" +
"phone=" + phone +
", up=" + up +
", down=" + down +
", sum=" + sum +
'}';
}
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
long totalUp=0;
long totalDown=0;
for (FlowBean flowBean: values){
totalUp+=flowBean.getUpFlow();
totalDown+=flowBean.getDownFlow();
}
//封装outv
outv.setUpFlow(totalUp);
outv.setDownFlow(totalDown);
outv.setSumFlow();
//写出outk,outv
context.write(key,outv);
}
}
6、com.igeek.mapreduceDemo.Phone_DataDriver.FlowReducer
package com.igeek.mapreduceDemo.Phone_DataDriver;
import com.igeek.mapreduceDemo.flow.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
private FlowBean outv=new FlowBean();
int phone;
int up;
int down;
int sum;
@Override
public String toString() {
return "FlowReducer{" +
"phone=" + phone +
", up=" + up +
", down=" + down +
", sum=" + sum +
'}';
}
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
long totalUp=0;
long totalDown=0;
for (FlowBean flowBean: values){
totalUp+=flowBean.getUpFlow();
totalDown+=flowBean.getDownFlow();
}
//封装outv
outv.setUpFlow(totalUp);
outv.setDownFlow(totalDown);
outv.setSumFlow();
//写出outk,outv
context.write(key,outv);
}
}
7、com.igeek.mapreduceDemo.Phone_DataDriver.PhoneDriver
package com.igeek.mapreduceDemo.Phone_DataDriver;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class PhoneDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.获取配置信息,获取job对象实例
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
// 2.关联本Driver得jar路径
job.setJarByClass(PhoneDriver.class);
// 3.关联map和reduce
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReducer.class);
// 4.设置map得输出kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 5.设置最终输出得kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 6.设置输入和输出路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
// 7.提交job
boolean boo=job.waitForCompletion(true);
System.out.println(boo);
}
}
2. Information screenshot