数据算法-hadoop6 移动平均

移动平均很简单,就是先二次排序,然后把按时间排完序的值,算出平均值。
排序算法

//通过数组模拟队列,求出平均值
public class MovingAverage {

    private double sum = 0.0;
    private final int period;
    private double[] window = null;
    private int pointer = 0;
    private int size = 0;

    public MovingAverage(int period) {
        if (period < 1) {
            throw new IllegalArgumentException("period must be > 0");
        }
        this.period = period;
        window = new double[period];
    }

    public void addNewNumber(double number) {
        sum += number;
        if (size < period) {
            window[pointer++] = number;
            size++;
        } else {
            // size = period (size cannot be > period)
            pointer = pointer % period;
            sum -= window[pointer];
            window[pointer++] = number;
        }
    }

    public String getMovingAverage() {
        if (size == 0) {
            throw new IllegalArgumentException("average is undefined");
        }
        //
        String windowlist = "";
        for (int i=0;i<window.length;i++) {
            windowlist = windowlist + " " +window[i];
        }
        return windowlist + ":" +sum / size;
    }
}

二次排序算法

public class CompositeKey implements WritableComparable<CompositeKey> {

    private String name;
    private long timestamp;

    public CompositeKey(String name, long timestamp) {
        this.name = name;
        this.timestamp = timestamp;
    }

    public CompositeKey() {
    }

    public void set (String name, long timestamp) {
        this.name = name;
        this.timestamp = timestamp;
    }

    public void readFields(DataInput in) throws IOException {
        this.name = in.readUTF();
        this.timestamp = in.readLong();

    }

    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.name);
        out.writeLong(this.timestamp);
    }

    public int compareTo(CompositeKey other) {
        if (this.name.compareTo(other.name) != 0) {
            return this.name.compareTo(other.name);
        } else if (this.timestamp != other.timestamp) {
            return timestamp < other.timestamp ? -1 : 1;
        } else {
            return 0;
        }
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public long getTimestamp() {
        return timestamp;
    }

    public void setTimestamp(long timestamp) {
        this.timestamp = timestamp;
    }

}
public class TimeSeriesData implements WritableComparable<TimeSeriesData> {
    private long timestamp;
    private double value;

    public void set(long timestamp, double value) {
        this.timestamp = timestamp;
        this.value = value;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.timestamp = in.readLong();
        this.value = in.readDouble();

    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeLong(this.timestamp);
        out.writeDouble(this.value);
    }

    @Override
    public int compareTo(TimeSeriesData data) {

        if (this.timestamp < data.timestamp) {
            return -1;
        } else if (this.timestamp > data.timestamp) {
            return 1;
        } else {
            return 0;
        }
    }

    public String toString() {
        return "(" + timestamp + "," + value + ")";
    }

    public long getTimestamp() {
        return timestamp;
    }

    public void setTimestamp(long timestamp) {
        this.timestamp = timestamp;
    }

    public double getValue() {
        return value;
    }

    public void setValue(double value) {
        this.value = value;
    }

}
public class CompositeKeyComparator extends WritableComparator {
    protected CompositeKeyComparator() {
        super(CompositeKey.class, true);
    }

    @Override
    public int compare(Object w1, Object w2) {
        CompositeKey key1 = (CompositeKey) w1;
        CompositeKey key2 = (CompositeKey) w2;

        int comparison = key1.getName().compareTo(key2.getName());
        if (comparison == 0) {
            // names are equal here
            if (key1.getTimestamp() == key2.getTimestamp()) {
                return 0;
            } else if (key1.getTimestamp() < key2.getTimestamp()) {
                return -1;
            } else {
                return 1;
            }
        } else {
            return comparison;
        }
    }

}
public class NaturalKeyPartitioner extends Partitioner<CompositeKey, TimeSeriesData> {
    @Override
    public int getPartition(CompositeKey key, TimeSeriesData value,
            int numberOfPartitions) {
        return Math.abs((int) (hash(key.getName()) % numberOfPartitions));
    }

    /**
     * adapted from String.hashCode()
     */
    static long hash(String str) {
        long h = 1125899906842597L; // prime
        int length = str.length();
        for (int i = 0; i < length; i++) {
            h = 31 * h + str.charAt(i);
        }
        return h;
    }
}
public class NaturalKeyGroupingComparator extends WritableComparator {
    protected NaturalKeyGroupingComparator() {
        super(CompositeKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {
        CompositeKey key1 = (CompositeKey) w1;
        CompositeKey key2 = (CompositeKey) w2;
        return key1.getName().compareTo(key2.getName());
    }
}

mapreduce

public class SortByMRF_MovingAverageMapper extends
        Mapper<LongWritable, Text, CompositeKey, TimeSeriesData> {
    private final CompositeKey reducerKey = new CompositeKey();
    private final TimeSeriesData reducerValue = new TimeSeriesData();

    public void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        String record = value.toString();
        if ((record == null) || (record.length() == 0)) {
            return;
        }
        String[] tokens = record.split(",");
        if (tokens.length == 3) {
            Date date = DateUtil.getDate(tokens[1]);
            if (date == null) {
                return;

            }
            long timestamp = date.getTime();
            reducerKey.set(tokens[0], timestamp);
            reducerValue.set(timestamp, Double.parseDouble(tokens[2]));
            context.write(reducerKey, reducerValue);
        }
    }
}

通过二次排序结束,算平均值

public class SortByMRF_MovingAverageReducer extends
        Reducer<CompositeKey, TimeSeriesData, Text, Text> {
    int windowSize = 5;

    /**
     * reduce
     */
    @Override
    protected void reduce(CompositeKey key, Iterable<TimeSeriesData> values,
            Context context) throws IOException, InterruptedException {
        Text outputKey = new Text();
        Text outputValue = new Text();
        MovingAverage ma = new MovingAverage(this.windowSize);
        for (TimeSeriesData data : values) {
            ma.addNewNumber(data.getValue());
            String movingAverage = ma.getMovingAverage();
            long timestamp = data.getTimestamp();
            String dateAsString = DateUtil.getDateAsString(timestamp);
            // THE_LOGGER.info("Next number = " + x + ", SMA = " +
            // sma.getMovingAverage());
            outputValue.set(dateAsString + "," + movingAverage);
            outputKey.set(key.getName());
            context.write(outputKey, outputValue);
        }
    }
}
public class SortByMRF_MovingAverageTaskTest {
    private static Log log = null;
    static {
        DOMConfigurator.configureAndWatch("./conf/log4j.xml");
        log = LogFactory.getLog("default");
    }

    // public Class myclass;
    /**
     * name
     */
    public static final String JOB_NAME = "RelationFilterMR";

    /**
     * 入口
     * 
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        Configuration conf1 = new Configuration();
        System.setProperty("hadoop.home.dir", "D:\\hadoop-2.5.2");

        // conf1.set("mapreduce.app-submission.cross-platform", "true");
        Job job = Job.getInstance(conf1, "SecondarySort");

        job.setMapperClass(SortByMRF_MovingAverageMapper.class);
        job.setReducerClass(SortByMRF_MovingAverageReducer.class);

        job.setMapOutputKeyClass(CompositeKey.class);
        job.setMapOutputValueClass(TimeSeriesData.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setPartitionerClass(NaturalKeyPartitioner.class);
        job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
        job.setSortComparatorClass(CompositeKeyComparator.class);
        // 设置Reduce任务数
        job.setNumReduceTasks(1);

        FileInputFormat.setInputPaths(job, new Path("C:\\demo\\06\\input.txt"));
        FileOutputFormat.setOutputPath(job, new Path("C:\\demo\\06\\out"));
        if (job.waitForCompletion(true)) {
            log.info("MR run successfully");

        } else {
            log.error("MR run failed");

        }

    }

}

输入

GOOG,2004-11-04,184.70
GOOG,2004-11-03,191.67
GOOG,2004-11-02,194.87
AAPL,2013-10-9,486.59
AAPL,2013-10-8,480.94
AAPL,2013-10-7,487.75
AAPL,2013-10-4,483.03
AAPL,2013-10-3,483.41
IBM,2013-09-30,185.18
IBM,2013-09-30,186.92
IBM,2013-09-30,190.22
IBM,2013-09-30,189.47
GOOG,2013-07-19,896.60
GOOG,2013-07-18,910.68
GOOG,2013-07-17,918.55

输出

AAPL    2013-10-03, 483.41 0.0 0.0 0.0 0.0:483.41
AAPL    2013-10-04, 483.41 483.03 0.0 0.0 0.0:483.22
AAPL    2013-10-07, 483.41 483.03 487.75 0.0 0.0:484.73
AAPL    2013-10-08, 483.41 483.03 487.75 480.94 0.0:483.7825
AAPL    2013-10-09, 483.41 483.03 487.75 480.94 486.59:484.34400000000005
GOOG    2004-11-02, 194.87 0.0 0.0 0.0 0.0:194.87
GOOG    2004-11-03, 194.87 191.67 0.0 0.0 0.0:193.26999999999998
GOOG    2004-11-04, 194.87 191.67 184.7 0.0 0.0:190.41333333333333
GOOG    2013-07-17, 194.87 191.67 184.7 918.55 0.0:372.4475
GOOG    2013-07-18, 194.87 191.67 184.7 918.55 910.68:480.09399999999994
GOOG    2013-07-19, 896.6 191.67 184.7 918.55 910.68:620.4399999999999
IBM 2013-09-30, 185.18 0.0 0.0 0.0 0.0:185.18
IBM 2013-09-30, 185.18 186.92 0.0 0.0 0.0:186.05
IBM 2013-09-30, 185.18 186.92 190.22 0.0 0.0:187.44000000000003
IBM 2013-09-30, 185.18 186.92 190.22 189.47 0.0:187.94750000000002

猜你喜欢

转载自blog.csdn.net/jshazhang/article/details/78366727
今日推荐