大数据统计大量股票开盘和收盘的平均价
一、需要统计的文件:
二、单个文件内容:
三、程序:
程序来自小奶狗的博客
连接为:https://blog.csdn.net/pengyangyan/article/details/80115183
package com.test4; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class CodeX { /** * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://localhost:9000"); String[] otherArgs = (new GenericOptionsParser(conf,args)).getRemainingArgs(); if(otherArgs.length<2){ System.err.println("Usage:CodeX<in><out>"); System.exit(2); } Job job = Job.getInstance(conf,"CodeX"); job.setJarByClass(CodeX.class); job.setMapperClass(CodeX.Map.class); System.out.println("Mapper over"); // job.setCombinerClass(CodeX.Reduce.class); job.setReducerClass(CodeX.Reduce.class); System.out.println("Reduce over"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); System.out.println("all over"); for(int i = 0;i<otherArgs.length-1;i++){ FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length-1])); System.exit(job.waitForCompletion(true)?0:1); } public static class Map extends Mapper<Object,Text,Text,Text>{ private Text text = new Text(); private Text keys = new Text(); private int no = 0; public Map(){ } public void map(Object key,Text value,Context context)throws IOException,InterruptedException{ String line = value.toString(); this.no +=1; System.out.println(this.no+line); String[] lines = line.split("\\s+"); for(int i =0;i<lines.length;i++){ System.out.print(lines[i]+" ~~"); } if(this.no == 1){ this.keys.set("股票编码:"+lines[0]); } if(this.no > 2){ if(lines.length == 7){ this.text.set(lines[0]+"+"+lines[1]+"+"+lines[4]); System.out.println(this.no+"---->"+lines[0]+"+"+lines[1]+"+"+lines[4]); context.write(this.keys, this.text); } } } } public static class Reduce extends Reducer<Text,Text,Text,Text>{ private Text text = new Text(); public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{ double sum1 = 0.0; double sum2 = 0.0; int n = 0; System.out.println("...................start"+key.toString()); Iterator<Text> $it = values.iterator(); while($it.hasNext()){ String record =$it.next().toString(); System.out.println(n); System.out.println("原始数据:"+record); n++; System.out.println("第"+n+"次循环"); String []result = record.split("[+]"); System.out.println(Double.valueOf(result[1])+" "+Double.valueOf(result[2])); sum1 +=(Double.valueOf(result[1])*100); sum2 +=(Double.valueOf(result[2])*100); System.out.println(sum1/100+" "+sum2/100); } System.out.println("最后的结果:"+sum1/100+" "+sum2/100); double openPrise = sum1/(100*n); double closePrise = sum2/(100*n); openPrise = (double)Math.round(openPrise*100)/100; closePrise = (double)Math.round(closePrise*100)/100; System.out.println("平均值:"+openPrise+" "+closePrise); Double.toString(closePrise); String result ="开盘平均价:"+Double.toString(openPrise)+", 收盘平均价:"+Double.toString(closePrise); this.text.set(result); context.write(key, this.text); } } }
四、运行程序后的统计结果:
在put文件时可能遇到的问题:
文件put不进到hdfs中,有可能是文件名里的符号问题,这时候只需要将文件名重命名即可,由于是大量文件,所以要一句shell命令对所有文件去除干扰符号,这里我输入的命令是:rename 's/\#60/60/' *.txt
重命名前:
重命名后:
当你cat一个文件后会发现其中的中文是乱码的,这是因为文件是GBK编码,只需要转换为utf-8即可。
命令为:conv -f gbk -t utf-8 1.txt>2.txt