移动平均很简单,就是先二次排序,然后把按时间排完序的值,算出平均值。
排序算法
//通过数组模拟队列,求出平均值
public class MovingAverage {
private double sum = 0.0;
private final int period;
private double[] window = null;
private int pointer = 0;
private int size = 0;
public MovingAverage(int period) {
if (period < 1) {
throw new IllegalArgumentException("period must be > 0");
}
this.period = period;
window = new double[period];
}
public void addNewNumber(double number) {
sum += number;
if (size < period) {
window[pointer++] = number;
size++;
} else {
// size = period (size cannot be > period)
pointer = pointer % period;
sum -= window[pointer];
window[pointer++] = number;
}
}
public String getMovingAverage() {
if (size == 0) {
throw new IllegalArgumentException("average is undefined");
}
//
String windowlist = "";
for (int i=0;i<window.length;i++) {
windowlist = windowlist + " " +window[i];
}
return windowlist + ":" +sum / size;
}
}
二次排序算法
public class CompositeKey implements WritableComparable<CompositeKey> {
private String name;
private long timestamp;
public CompositeKey(String name, long timestamp) {
this.name = name;
this.timestamp = timestamp;
}
public CompositeKey() {
}
public void set (String name, long timestamp) {
this.name = name;
this.timestamp = timestamp;
}
public void readFields(DataInput in) throws IOException {
this.name = in.readUTF();
this.timestamp = in.readLong();
}
public void write(DataOutput out) throws IOException {
out.writeUTF(this.name);
out.writeLong(this.timestamp);
}
public int compareTo(CompositeKey other) {
if (this.name.compareTo(other.name) != 0) {
return this.name.compareTo(other.name);
} else if (this.timestamp != other.timestamp) {
return timestamp < other.timestamp ? -1 : 1;
} else {
return 0;
}
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
}
public class TimeSeriesData implements WritableComparable<TimeSeriesData> {
private long timestamp;
private double value;
public void set(long timestamp, double value) {
this.timestamp = timestamp;
this.value = value;
}
@Override
public void readFields(DataInput in) throws IOException {
this.timestamp = in.readLong();
this.value = in.readDouble();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(this.timestamp);
out.writeDouble(this.value);
}
@Override
public int compareTo(TimeSeriesData data) {
if (this.timestamp < data.timestamp) {
return -1;
} else if (this.timestamp > data.timestamp) {
return 1;
} else {
return 0;
}
}
public String toString() {
return "(" + timestamp + "," + value + ")";
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public double getValue() {
return value;
}
public void setValue(double value) {
this.value = value;
}
}
public class CompositeKeyComparator extends WritableComparator {
protected CompositeKeyComparator() {
super(CompositeKey.class, true);
}
@Override
public int compare(Object w1, Object w2) {
CompositeKey key1 = (CompositeKey) w1;
CompositeKey key2 = (CompositeKey) w2;
int comparison = key1.getName().compareTo(key2.getName());
if (comparison == 0) {
// names are equal here
if (key1.getTimestamp() == key2.getTimestamp()) {
return 0;
} else if (key1.getTimestamp() < key2.getTimestamp()) {
return -1;
} else {
return 1;
}
} else {
return comparison;
}
}
}
public class NaturalKeyPartitioner extends Partitioner<CompositeKey, TimeSeriesData> {
@Override
public int getPartition(CompositeKey key, TimeSeriesData value,
int numberOfPartitions) {
return Math.abs((int) (hash(key.getName()) % numberOfPartitions));
}
/**
* adapted from String.hashCode()
*/
static long hash(String str) {
long h = 1125899906842597L; // prime
int length = str.length();
for (int i = 0; i < length; i++) {
h = 31 * h + str.charAt(i);
}
return h;
}
}
public class NaturalKeyGroupingComparator extends WritableComparator {
protected NaturalKeyGroupingComparator() {
super(CompositeKey.class, true);
}
@Override
public int compare(WritableComparable w1, WritableComparable w2) {
CompositeKey key1 = (CompositeKey) w1;
CompositeKey key2 = (CompositeKey) w2;
return key1.getName().compareTo(key2.getName());
}
}
mapreduce
public class SortByMRF_MovingAverageMapper extends
Mapper<LongWritable, Text, CompositeKey, TimeSeriesData> {
private final CompositeKey reducerKey = new CompositeKey();
private final TimeSeriesData reducerValue = new TimeSeriesData();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String record = value.toString();
if ((record == null) || (record.length() == 0)) {
return;
}
String[] tokens = record.split(",");
if (tokens.length == 3) {
Date date = DateUtil.getDate(tokens[1]);
if (date == null) {
return;
}
long timestamp = date.getTime();
reducerKey.set(tokens[0], timestamp);
reducerValue.set(timestamp, Double.parseDouble(tokens[2]));
context.write(reducerKey, reducerValue);
}
}
}
通过二次排序结束,算平均值
public class SortByMRF_MovingAverageReducer extends
Reducer<CompositeKey, TimeSeriesData, Text, Text> {
int windowSize = 5;
/**
* reduce
*/
@Override
protected void reduce(CompositeKey key, Iterable<TimeSeriesData> values,
Context context) throws IOException, InterruptedException {
Text outputKey = new Text();
Text outputValue = new Text();
MovingAverage ma = new MovingAverage(this.windowSize);
for (TimeSeriesData data : values) {
ma.addNewNumber(data.getValue());
String movingAverage = ma.getMovingAverage();
long timestamp = data.getTimestamp();
String dateAsString = DateUtil.getDateAsString(timestamp);
// THE_LOGGER.info("Next number = " + x + ", SMA = " +
// sma.getMovingAverage());
outputValue.set(dateAsString + "," + movingAverage);
outputKey.set(key.getName());
context.write(outputKey, outputValue);
}
}
}
public class SortByMRF_MovingAverageTaskTest {
private static Log log = null;
static {
DOMConfigurator.configureAndWatch("./conf/log4j.xml");
log = LogFactory.getLog("default");
}
// public Class myclass;
/**
* name
*/
public static final String JOB_NAME = "RelationFilterMR";
/**
* 入口
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration conf1 = new Configuration();
System.setProperty("hadoop.home.dir", "D:\\hadoop-2.5.2");
// conf1.set("mapreduce.app-submission.cross-platform", "true");
Job job = Job.getInstance(conf1, "SecondarySort");
job.setMapperClass(SortByMRF_MovingAverageMapper.class);
job.setReducerClass(SortByMRF_MovingAverageReducer.class);
job.setMapOutputKeyClass(CompositeKey.class);
job.setMapOutputValueClass(TimeSeriesData.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setPartitionerClass(NaturalKeyPartitioner.class);
job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
job.setSortComparatorClass(CompositeKeyComparator.class);
// 设置Reduce任务数
job.setNumReduceTasks(1);
FileInputFormat.setInputPaths(job, new Path("C:\\demo\\06\\input.txt"));
FileOutputFormat.setOutputPath(job, new Path("C:\\demo\\06\\out"));
if (job.waitForCompletion(true)) {
log.info("MR run successfully");
} else {
log.error("MR run failed");
}
}
}
输入
GOOG,2004-11-04,184.70
GOOG,2004-11-03,191.67
GOOG,2004-11-02,194.87
AAPL,2013-10-9,486.59
AAPL,2013-10-8,480.94
AAPL,2013-10-7,487.75
AAPL,2013-10-4,483.03
AAPL,2013-10-3,483.41
IBM,2013-09-30,185.18
IBM,2013-09-30,186.92
IBM,2013-09-30,190.22
IBM,2013-09-30,189.47
GOOG,2013-07-19,896.60
GOOG,2013-07-18,910.68
GOOG,2013-07-17,918.55
输出
AAPL 2013-10-03, 483.41 0.0 0.0 0.0 0.0:483.41
AAPL 2013-10-04, 483.41 483.03 0.0 0.0 0.0:483.22
AAPL 2013-10-07, 483.41 483.03 487.75 0.0 0.0:484.73
AAPL 2013-10-08, 483.41 483.03 487.75 480.94 0.0:483.7825
AAPL 2013-10-09, 483.41 483.03 487.75 480.94 486.59:484.34400000000005
GOOG 2004-11-02, 194.87 0.0 0.0 0.0 0.0:194.87
GOOG 2004-11-03, 194.87 191.67 0.0 0.0 0.0:193.26999999999998
GOOG 2004-11-04, 194.87 191.67 184.7 0.0 0.0:190.41333333333333
GOOG 2013-07-17, 194.87 191.67 184.7 918.55 0.0:372.4475
GOOG 2013-07-18, 194.87 191.67 184.7 918.55 910.68:480.09399999999994
GOOG 2013-07-19, 896.6 191.67 184.7 918.55 910.68:620.4399999999999
IBM 2013-09-30, 185.18 0.0 0.0 0.0 0.0:185.18
IBM 2013-09-30, 185.18 186.92 0.0 0.0 0.0:186.05
IBM 2013-09-30, 185.18 186.92 190.22 0.0 0.0:187.44000000000003
IBM 2013-09-30, 185.18 186.92 190.22 189.47 0.0:187.94750000000002