关联法则

原始数据
订单号 商品名称
1 a
1 b
1 c
2 a
2 d
2 c
单品统计
商品编号 出现次数
a    5
b    5
c    6
d    4
e    1
商品组合
a-b    1
a-c    1
b-c    1
计算结果
a-b    3    a    5    b    5
a-c    3    a    5    c    6
a-d    3    a    5    d    4
b-c    4    b    5    c    6
b-d    1    b    5    d    4
b-e    1    b    5    e    1
c-d    3    c    6    d    4
c-e    1    c    6    e    1
算法实现
package com.anyec.join;

import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;

public class RefProducts {
    public static class SingleMapper extends
            Mapper<LongWritable, Text, Text, Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String ivalue = value.toString();
            String[] keyvalue = ivalue.split(" ");
            context.write(new Text(keyvalue[1]), new Text("1"));
        }

    }

    public static class SingleReduce extends Reducer<Text, Text, Text, Text> {
        redis.clients.jedis.JedisPool pool;
        Jedis jedis;
        protected void setup(Context context) throws IOException,
                InterruptedException {
            pool = new JedisPool("nstorma");
            jedis = pool.getResource();
        }

        protected void cleanup(Context context) throws IOException,
                InterruptedException {
            pool.returnResource(jedis);
            pool.destroy();
        }

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            int count = 0;
            for (Text v : values) {
                count = count + Integer.parseInt(v.toString());
            }
            jedis.set(key.toString(), count+"");
            context.write(key, new Text(count + ""));
        }

    }

    public static class DoubleMapper extends
            Mapper<LongWritable, Text, Text, Text> {

        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String ivalue = value.toString();
            String[] keyvalue = ivalue.split(" ");
            context.write(new Text(keyvalue[0]), new Text(keyvalue[1]));
        }

    }

    public static class DoubleReduce extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            Set<String> set = new TreeSet<String>();
            for (Text t : values) {
                set.add(t.toString());
            }
            int i = 0;
            for (String s : set) {
                int j = 0;
                for (String s2 : set) {
                    if (i < j) {
                        context.write(new Text(s + "-" + s2), new Text("1"));
                    }
                    j++;
                }
                i++;
            }
        }
    }

    public static class DoubleRefMapper extends
            Mapper<LongWritable, Text, Text, Text> {
      
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String ivalue = value.toString();
            String[] keyvalue = ivalue.split("    ");
            context.write(new Text(keyvalue[0]), new Text(keyvalue[1]));
        }

    }

    public static class DoubleRefReduce extends Reducer<Text, Text, Text, Text> {
        redis.clients.jedis.JedisPool pool;
        Jedis jedis ;
        protected void setup(Context context) throws IOException,
                InterruptedException {
            pool = new JedisPool("nstorma");
            jedis = pool.getResource();
        }

        protected void cleanup(Context context) throws IOException,
                InterruptedException {
            pool.returnResource(jedis);
            pool.destroy();
        }
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            Set<String> sets = new TreeSet<String>();
            int count = 0;
            for (Text t : values) {
                count = count + Integer.parseInt(t.toString());
            }
            String[] iv=key.toString().split("-");
            int a=Integer.parseInt(jedis.get(iv[0]));
            int b=Integer.parseInt(jedis.get(iv[1]));
      
            context.write(key, new Text(count + "\t"+iv[0]+"\t"+a+"\t"+iv[1]+"\t"+b));
        }
    }

    public static class RefProductMapper extends
            Mapper<LongWritable, Text, Text, Text> {

        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {

        }

    }

    public static class RefProductReduce extends
            Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {

        }

    }

    public static void main(String[] args) throws IOException,
            ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        // System.setProperty("hadoop.home.dir","D:/soft/hadoop-2.2.0");
        // System.setProperty("HADOOP_HOME","D:/soft/hadoop-2.2.0");
        // System.setProperty("HADOOP_USER_NAME","hadoop");

        String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        // if (otherArgs.length != 2) {
        // System.err
        // .println("Usage: com.anyec.join.RefProducts <in> <out>");
        // System.exit(2);
        // }
        String basePath = conf.get("fs.defaultFS");
        conf.set("mapreduce.framework.name", "local");// </value> //yarn
        // conf.set("mapreduce.framework.name", "yarn");
        // conf.set("yarn.resourcemanager.address","master");
        // conf.set("mapred.remote.os","Linux");
        conf.set(
                "mapreduce.application.classpath",
                "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*");

        if (basePath.endsWith("/"))
            basePath = basePath + "mr";
        else
            basePath = basePath + "/" + "mr";
        String inpath = basePath + "/in/ref/";
        // String inpath = "file:///E:/stock/dpdata";
        String outpath = basePath + "/out/ref/sigle";
        String outpath2 = basePath + "/out/ref/double";
        String outpath3 = basePath + "/out/ref/doubleref";
        String outpath4 = basePath + "/out/ref/doublecount";
        FileSystem fs = FileSystem.get(conf);
        fs.delete(new Path(outpath), true);
        fs.delete(new Path(outpath2), true);
        fs.delete(new Path(outpath3), true);
        fs.delete(new Path(outpath4), true);
        // HdfsClient.rm(conf, outpath);
        // if (f.exists()) {
        // com.anyec.common.DelOutPut.del(f);
        // }
        Job job = new Job(conf, "single");
        job.setJarByClass(RefProducts.class);
        job.setUser("hadoop");
        job.setMapperClass(RefProducts.SingleMapper.class);
        job.setReducerClass(RefProducts.SingleReduce.class);
        // job.setNumReduceTasks(100);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // job.setOutputFormatClass(AlphabetOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(inpath));
        FileOutputFormat.setOutputPath(job, new Path(outpath));
        job.waitForCompletion(true);
        // System.exit(job.waitForCompletion(true) ? 0 : 1);
        job = new Job(conf, "double");
        job.setJarByClass(RefProducts.class);
        job.setUser("hadoop");
        job.setMapperClass(RefProducts.DoubleMapper.class);
        job.setReducerClass(RefProducts.DoubleReduce.class);
        // job.setNumReduceTasks(100);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // job.setOutputFormatClass(AlphabetOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(inpath));
        FileOutputFormat.setOutputPath(job, new Path(outpath2));
        job.waitForCompletion(true);
        job = new Job(conf, "doubleRef");
        job.setJarByClass(RefProducts.class);
        job.setUser("hadoop");
        job.setMapperClass(RefProducts.DoubleRefMapper.class);
        job.setReducerClass(RefProducts.DoubleRefReduce.class);
        // job.setNumReduceTasks(100);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // job.setOutputFormatClass(AlphabetOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(outpath2));
        FileOutputFormat.setOutputPath(job, new Path(outpath3));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

猜你喜欢

转载自qqggcc.iteye.com/blog/2028331