code show as below:
AccessLogWritable Code:
. 1 Package cn.dataClean.mr.mapper; 2 . 3 Import the java.io.DataInput; . 4 Import java.io.DataOutput; . 5 Import java.io.IOException; . 6 . 7 Import org.apache.hadoop.io.Writable; . 8 . 9 / ** 10 * custom type serial data interfaces cleaning . 11 * 12 is * @author the Lenovo 13 is * 14 * / 15 public class AccessLogWritable the implements the Writable { 16 . 17 Private String ip; 18 private String time; 19 private String day; 20 private String traffic; 21 private String type; 22 private String id; 23 24 public AccessLogWritable(String ip, String time, String day, String traffic, String type, String id) { 25 this.ip = ip; 26 this.time = time; 27 this.day = day; 28 this.traffic = traffic; 29 this.type = type; 30 this.id = id; 31 } 32 33 public String getIp() { 34 return ip; 35 } 36 37 public void setIp(String ip) { 38 this.ip = ip; 39 } 40 41 public String getTime() { 42 return time; 43 } 44 45 public void setTime(String time) { 46 this.time = time; 47 } 48 49 public String getDay() { 50 return day; 51 } 52 53 public void setDay(String day) { 54 this.day = day; 55 } 56 57 public String getTraffic() { 58 return traffic; 59 } 60 61 public void setTraffic(String traffic) { 62 this.traffic = traffic; 63 } 64 65 public String getType () { 66 return type; 67 } 68 69 public void setType (String type) { 70 the this .Type = type; 71 is } 72 73 is public String getId () { 74 return ID; 75 } 76 77 public void the setId (String ID) { 78 the this .id = ID; 79 } 80 81 / ** 82 * Hadoop system call this method when deserialization 83 * / 84 @Override 85 public void readFields (of DataInput for primitive in) throws IOException { 86 the this .ip = in.readUTF (); 87 the this .time = in.readUTF (); 88 the this .DAY, = in.readUTF (); 89 the this .traffic = in.readUTF (); 90 the this .Type = in.readUTF (); 91 is the this .id = in.readUTF (); 92 } 93 94 / ** 95 * Hadoop that the system calls in the sequence of the time methods 96 * / 97 @Override 98 public void write(DataOutput out) throws IOException { 99 out.writeUTF(ip); 100 out.writeUTF(time); 101 out.writeUTF(day); 102 out.writeUTF(traffic); 103 out.writeUTF(type); 104 out.writeUTF(id); 105 } 106 107 }
AccessLogCleanMapper Code:
Package cn.dataClean.mr.mapper; Import java.io.IOException; Import the java.text.SimpleDateFormat; Import java.util.Date; Import java.util.Locale; Import org.apache.hadoop.io.LongWritable; Import ORG .apache.hadoop.io.Text; Import org.apache.hadoop.mapreduce.Mapper; / ** * when the cleaning Mapper class data * * @author the Lenovo * * / public class AccessLogCleanMapper the extends Mapper <LongWritable, the Text, the Text, AccessLogWritable> { // set the present time format file and destination file format, for converting the format of the time public static final SimpleDateFormat FORMAT = new SimpleDateFormat("d/MMM/yyyy:HH:mm:ss", Locale.ENGLISH); // 原时间格式 public static final SimpleDateFormat dateformat1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");// 期望时间格式 private String ip; private String time; private String day; private String traffic; private String type; private String id; private static Date parse; // 重写Mapper方法 @Override protected void Map (LongWritable Key, the Text value, the Context context) throws IOException, InterruptedException { String Line = value.toString (); String [] Fields = line.split ( "," ); // IF (Fields == || fields.length null <. 6) { // abnormal data, a message alert // System.out.println ( "data error"); // return; // } // variable assignment package Date date = parseDateFormat (Fields [. 1 ]); IP = Fields [0 ]; Time =dateformat1.format (DATE); Day = Fields [2 ]; the traffic = Fields [. 3 ]; type = Fields [. 4 ]; ID = Fields [. 5 ]; // System.out.println (Time); // variable package sent context.write ( new new the Text (IP), new new AccessLogWritable (IP, time, Day, the traffic, type, ID)); } // override setup method, format conversion time Private static a Date parseDateFormat (String String) { the try { the parse = FORMAT.parse (String); } the catch (Exception e) { e.printStackTrace(); } return parse; } // // 释放资源 // @Override // protected void cleanup(Context context) throws IOException, InterruptedException { // parse = null; // } }
AccessLogCleanReduce Code:
package cn.dataClean.mr.mapper; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class AccessLogCleanReduce extends Reducer<Text, AccessLogWritable, Text, AccessLogWritable> { String ip; String time; String day; String traffic; String type; String id; // 重写reduce方法 @Override protected void reduce(Text key, Iterable<AccessLogWritable> values, Context context) throws IOException, InterruptedException { for (AccessLogWritable value : values) { ip = value.getIp(); time = value.getTime(); day = value.getDay(); traffic = value.getTraffic(); type = value.getType(); id = value.getId(); System.out.println(time); } context.write(key, new AccessLogWritable(ip, time, day, traffic, type, id)); } }
AccessLogCleanJob Code:
package cn.dataClean.mr.mapper; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class AccessLogCleanJob { public static void main(String[] args) throws Exception { // // 检查MapReduce数据是否正确 // if (args == null || args.length < 2) { // System.err.println("Parameter Errors!Usage<inputPath...><outputPath>"); // System.exit(-1); // } Job job = Job.getInstance(); job.setJobName("AccessLogCleanJob"); job.setJarByClass(AccessLogCleanJob.class); // map设置 job.setMapperClass(AccessLogCleanMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AccessLogWritable.class); // reduce设置 job.setReducerClass(AccessLogCleanReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(AccessLogWritable.class); // 文件输入路径与输出路径 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.57.128:9000/MyMapReduce/AccessLogClean/result.txt")); FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.57.128:9000/MyMapReduce/AccessLogClean/Result")); // 任务提示 boolean flag = job.waitForCompletion(true); System.out.println(flag); System.exit(flag ? 0 : 1); } }