大数据统计大量股票开盘平均价和收盘平均价

大数据统计大量股票开盘和收盘的平均价

一、需要统计的文件:

二、单个文件内容:

三、程序:

程序来自小奶狗的博客

连接为:https://blog.csdn.net/pengyangyan/article/details/80115183

    package com.test4;  
    import java.io.IOException;  
    import java.util.Iterator;  
    import org.apache.hadoop.conf.Configuration;  
    import org.apache.hadoop.fs.Path;  
    import org.apache.hadoop.io.Text;  
    import org.apache.hadoop.mapreduce.Job;  
    import org.apache.hadoop.mapreduce.Mapper;  
    import org.apache.hadoop.mapreduce.Reducer;  
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    import org.apache.hadoop.util.GenericOptionsParser;  
      
      
    public class CodeX {  
      
        /** 
         * @param args 
         * @throws IOException  
         * @throws InterruptedException  
         * @throws ClassNotFoundException  
         */  
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
            // TODO Auto-generated method stub  
            Configuration conf = new Configuration();  
            conf.set("fs.default.name", "hdfs://localhost:9000");  
            String[] otherArgs = (new GenericOptionsParser(conf,args)).getRemainingArgs();  
            if(otherArgs.length<2){  
                System.err.println("Usage:CodeX<in><out>");  
                System.exit(2);  
            }  
            Job job = Job.getInstance(conf,"CodeX");  
            job.setJarByClass(CodeX.class);  
            job.setMapperClass(CodeX.Map.class);  
            System.out.println("Mapper over");  
    //      job.setCombinerClass(CodeX.Reduce.class);  
            job.setReducerClass(CodeX.Reduce.class);  
            System.out.println("Reduce over");  
            job.setOutputKeyClass(Text.class);  
            job.setOutputValueClass(Text.class);  
            System.out.println("all over");  
            for(int i = 0;i<otherArgs.length-1;i++){  
                FileInputFormat.addInputPath(job, new Path(otherArgs[i]));  
            }         
            FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length-1]));         
            System.exit(job.waitForCompletion(true)?0:1);  
      
        }  
        public static class Map extends Mapper<Object,Text,Text,Text>{  
              
            private Text text = new Text();   
            private Text keys = new Text();  
            private int no = 0;  
            public Map(){  
                  
            }  
            public void map(Object key,Text value,Context context)throws IOException,InterruptedException{  
                String line = value.toString();  
                this.no +=1;              
                System.out.println(this.no+line);             
                String[] lines = line.split("\\s+");  
                for(int i =0;i<lines.length;i++){  
                    System.out.print(lines[i]+" ~~");  
                }  
                if(this.no == 1){  
                      
                    this.keys.set("股票编码:"+lines[0]);              
                }  
                if(this.no > 2){  
                    if(lines.length == 7){  
                        this.text.set(lines[0]+"+"+lines[1]+"+"+lines[4]);   
                        System.out.println(this.no+"---->"+lines[0]+"+"+lines[1]+"+"+lines[4]);  
                        context.write(this.keys, this.text);  
                    }                 
                }         
                  
            }  
        }  
          
        public static class Reduce extends Reducer<Text,Text,Text,Text>{        
            private Text text = new Text();       
            public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{  
                    double sum1 = 0.0;  
                    double sum2 = 0.0;  
                    int n = 0;                
                    System.out.println("...................start"+key.toString());  
                    Iterator<Text> $it = values.iterator();  
                    while($it.hasNext()){                     
                        String record =$it.next().toString();  
                        System.out.println(n);                    
                        System.out.println("原始数据:"+record);  
                        n++;                      
                        System.out.println("第"+n+"次循环");  
                        String []result = record.split("[+]");  
                        System.out.println(Double.valueOf(result[1])+" "+Double.valueOf(result[2]));  
                        sum1 +=(Double.valueOf(result[1])*100);                   
                        sum2 +=(Double.valueOf(result[2])*100);                   
                        System.out.println(sum1/100+" "+sum2/100);                    
                    }  
                    System.out.println("最后的结果:"+sum1/100+" "+sum2/100);  
                    double openPrise = sum1/(100*n);  
                    double closePrise = sum2/(100*n);  
                    openPrise = (double)Math.round(openPrise*100)/100;  
                    closePrise = (double)Math.round(closePrise*100)/100;  
                    System.out.println("平均值:"+openPrise+" "+closePrise);  
                      
                    Double.toString(closePrise);  
                    String result ="开盘平均价:"+Double.toString(openPrise)+",   收盘平均价:"+Double.toString(closePrise);  
                    this.text.set(result);  
                    context.write(key, this.text);  
          
                  
            }  
        }  
      
    }  

四、运行程序后的统计结果:

在put文件时可能遇到的问题:

文件put不进到hdfs中,有可能是文件名里的符号问题,这时候只需要将文件名重命名即可,由于是大量文件,所以要一句shell命令对所有文件去除干扰符号,这里我输入的命令是:rename 's/\#60/60/' *.txt

重命名前:

重命名后:

当你cat一个文件后会发现其中的中文是乱码的,这是因为文件是GBK编码,只需要转换为utf-8即可。

命令为:conv -f gbk -t utf-8 1.txt>2.txt


猜你喜欢

转载自blog.csdn.net/weixin_42134141/article/details/80287614