Cuente el número de líneas donde la primera palabra de cada línea en el archivo de entrada es la misma.
(1) Datos de entrada
banzhang ni hao
xihuan hadoop banzhang
banzhang ni hao
xihuan hadoop banzhang
(2) Datos de resultados esperados
banzhang 2 xihuan
2
clase de mapa
package KVText;
import java.io.IOException;
//banzhang ni hao
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class KVTextMapper extends Mapper<Text, Text, Text, LongWritable>{
//设置value
LongWritable v = new LongWritable(1);
@Override
protected void map(Text key, Text value, Mapper<Text, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
//写出
context.write(key, v);
}
}
clase reductora
package KVText;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class KVTextReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
LongWritable v = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {
//累计求和
Long sum = 0L;
for (LongWritable value : values) {
sum += value.get();
}
v.set(sum);
//写出
context.write(key, v);
}
}
clase dirver
package KVText;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class KVTextDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
args=new String[] {
"e:input2","e:/output8"};
//设置切割符
Configuration conf = new Configuration();
//获取job对象
Job job = Job.getInstance(conf);
//设置jar包路径
job.setJarByClass(KVTextDriver.class);
//关联mapper和reduce
job.setMapperClass(KVTextMapper.class);
job.setReducerClass(KVTextReducer.class);
//设置map输出kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置最终输出kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
//设置输入格式
job.setInputFormatClass(KeyValueTextInputFormat.class);
//设置输出数据路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0:1);
}
}