hadoop series of fourteen --MapReduce input and output formats (sequencefile file)

The data sequencefile is key, value for storage.

Mapreduce setting mode by changing the input of the output data can be read in sequencefile. Use
sequencefile file, more convenient to use, does not require as text files, word segmentation. Between two MapReduce programs used this model.
The first MapReduce
Code:

public class IndexStepOne {

	public static class IndexStepOneMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

		// 产生 <hello-文件名,1> 
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			// 从输入切片信息中获取当前正在处理的一行数据所属的文件
			FileSplit inputSplit = (FileSplit) context.getInputSplit();
			String fileName = inputSplit.getPath().getName();

			String[] words = value.toString().split(" ");
			for (String w : words) {
				// 将"单词-文件名"作为key,1作为value,输出
				context.write(new Text(w + "-" + fileName), new IntWritable(1));
			}

		}

	}

	public static class IndexStepOneReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {

			int count = 0;
			for (IntWritable value : values) {
				count += value.get();
			}

			context.write(key, new IntWritable(count));

		}

	}
	
	
	
	public static void main(String[] args) throws Exception{
		
		Configuration conf = new Configuration(); 
		
		Job job = Job.getInstance(conf);

		job.setJarByClass(IndexStepOne.class);

		job.setMapperClass(IndexStepOneMapper.class);
		job.setReducerClass(IndexStepOneReducer.class);

		job.setNumReduceTasks(3);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// job.setOutputFormatClass(TextOutputFormat.class);  // 这是默认的输出组件
		job.setOutputFormatClass(SequenceFileOutputFormat.class);   //控制输出文件的格式,这里是重点!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
		

		FileInputFormat.setInputPaths(job, new Path("F:\\mrdata\\index\\input"));
		FileOutputFormat.setOutputPath(job, new Path("F:\\mrdata\\index\\out-seq-1"));

		job.waitForCompletion(true);
		
	}
	

}

The first MMapReduce
Code:

`public class IndexStepTwo {

	public static class IndexStepTwoMapper extends Mapper<Text, IntWritable, Text, Text> {

		@Override
		protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {
			String[] split = key.toString().split("-");
			context.write(new Text(split[0]), new Text(split[1]+"-->"+value));
		}

	}

	public static class IndexStepTwoReducer extends Reducer<Text, Text, Text, Text> {

		// 一组数据:  <hello,a.txt-->4> <hello,b.txt-->4> <hello,c.txt-->4>
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
			// stringbuffer是线程安全的,stringbuilder是非线程安全的,在不涉及线程安全的场景下,stringbuilder更快
			StringBuilder sb = new StringBuilder();
			
			for (Text value : values) {
				sb.append(value.toString()).append("\t");
			}
			
			context.write(key, new Text(sb.toString()));
			

		}

	}
	
	
	
	public static void main(String[] args) throws Exception{
		
		Configuration conf = new Configuration(); // 默认只加载core-default.xml core-site.xml
		
		Job job = Job.getInstance(conf);

		job.setJarByClass(IndexStepTwo.class);

		job.setMapperClass(IndexStepTwoMapper.class);
		job.setReducerClass(IndexStepTwoReducer.class);

		job.setNumReduceTasks(1);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		// job.setInputFormatClass(TextInputFormat.class); 默认的输入组件
		job.setInputFormatClass(SequenceFileInputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		FileInputFormat.setInputPaths(job, new Path("F:\\mrdata\\index\\out1"));
		FileOutputFormat.setOutputPath(job, new Path("F:\\mrdata\\index\\out2"));

		job.waitForCompletion(true);
		
	}
	

}
Published 44 original articles · won praise 0 · Views 866

Guess you like

Origin blog.csdn.net/heartless_killer/article/details/102690643