多项式回归模型（Office Prices）

题目：https://www.hackerrank.com/challenges/predicting-office-space-price

分析：还是上次的房价预测题目，指明要用多项式回归拟合。在多元多项式拟合时候，目标函数表示如下

对其目标函数求偏导得到

很容易写出代码。

代码：

#coding:utf-8

import math

class Data:
	def __init__(self):
		self.x = []
		self.y = 0.0

def makeMatrix(row, col, fill = 0.0):
	mat = []
	for i in range(row):
		mat.append([fill] * col)
	return mat

def WX(d, w, b):
	res = 0.0
	for k in range(len(d.x)):
		for j in range(b + 1):
			res += w[k][j] * math.pow(d.x[k], j)
	return res

def Gradient(d, w, f, b, alpha):
	for k in range(f):
		for j in range(b + 1):
			t1, t2 = 0.0, 0.0
			for i in range(len(d)):
				t1 += (WX(d[i], w, b) - d[i].y) * math.pow(d[i].x[k], j)
			w[k][j] -= alpha * t1

def getValues(d, w, b):
	res = 0.0
	for i in range(len(d)):
		tmp = WX(d[i], w, b)
		res += 0.5 * (d[i].y - tmp) * (d[i].y - tmp)
	return res

def Iterator(d, w, f, b):
	alpha = 0.003
	delta = 0.5
	oldVal = getValues(d, w, b)
	Gradient(d, w, f, b, alpha)
	newVal = getValues(d, w, b)
	while abs(oldVal - newVal) > delta:
		oldVal = newVal
		Gradient(d, w, f, b, alpha)
		newVal = getValues(d, w, b)

def main():
	while True:
		try:
			F, N = map(int, raw_input().split())
			d = []
			b = 5
			w = makeMatrix(F, b + 1)
			for i in range(0, N):
				t = Data()
				t.x = map(float, raw_input().split())
				t.y = t.x.pop()
				d.append(t)
			Iterator(d, w, F, b)
			N = int(raw_input())
			for i in range(0, N):
				t = Data()
				t.x = map(float, raw_input().split())
				print '%.2f'% WX(t, w, b)
		except EOFError:
			break

if __name__ == '__main__':
	main()

不过，上述代码得到的结果偏差比较大，需要重新考虑。除了上述方式外，还有一种特征组合方法效果不错。

代码：

#include <iostream>
#include <string.h>
#include <fstream>
#include <stdio.h>
#include <math.h>
#include <vector>
 
#define Vector vector
using namespace std;
 
struct Data
{
    Vector<double> x;
    double y;
};

double WX(const Data& d, const Vector<double>& w)
{
    double ans = 0;
    for(int i = 0; i < w.size(); i++)
        ans += w[i] * d.x[i];
    return ans;
}

void Gradient(const Vector<Data>& d, Vector<double> &w, double alpha)
{
    for(int i = 0; i < w.size(); i++)
    {
        double tmp = 0;
        for(int j = 0; j < d.size(); j++)
			tmp += alpha * d[j].x[i] * (WX(d[j], w) - d[j].y);
        w[i] -= tmp;
    }
}

double getValues(const Vector<Data>& d, Vector<double> w)
{
	double res = 0;
	for(int i = 0; i < d.size(); i++)
	{
		double tmp = WX(d[i], w);
		res += fabs(d[i].y - tmp);
	}
	return res;
}

void Iterator(const Vector<Data>& d, Vector<double> &w)
{
	double alpha = 0.3 / d.size();
	double delta = 0.5;
	double oldVal = getValues(d, w);  
	Gradient(d, w, alpha);  
	double newVal = getValues(d, w); 
	while(fabs(oldVal - newVal) > delta)
	{
		oldVal = newVal;
		Gradient(d, w, alpha);
		newVal = getValues(d, w);
	}
}

Vector<double> getFeatures(Vector<double> x)
{
	Vector<double> res;
	int n = x.size();
	for(int i = 0; i < n; i++)
		for(int j = i; j < n; j++)
			for(int k = j; k < n; k++)
				res.push_back(x[i] * x[j] * x[k]);
	return res;
}
 
int main()
{
	int F, N;
    Vector<double> w;
    Vector<Data> d;
    while(scanf("%d %d", &F, &N) != EOF)
	{
		d.clear();
		w.clear();
		int features = 0;
        for(int i = 0; i < N; i++)
		{
			Data t;
			double _x, _y;
			t.x.push_back(1);
			for(int j = 1; j <= F; j++)
			{
				scanf("%lf", &_x);
				t.x.push_back(_x);
			}
			t.x = getFeatures(t.x);
			features = t.x.size();
			scanf("%lf", &_y);
			t.y = _y;
			d.push_back(t);
		}
		for(int i = 0; i < features; i++)
			w.push_back(0);
		Iterator(d, w);
		d.clear();
		scanf("%d", &N);
		for(int i = 0; i < N; i++)
		{
			Data t;
			double _x;
			t.x.push_back(1);
			for(int j = 1; j <= F; j++)
			{
				scanf("%lf", &_x);
				t.x.push_back(_x);
			}
			t.x = getFeatures(t.x);
			printf("%.2lf\n", WX(t, w));
		}
	}
    return 0;
}

另外利用Python的机器学习开源库sklearn很方便处理。具体可以参考如下链接。

题解：http://blog.guozengxin.cn/2015/01/08/hackerrank-predicting-office-space-price/

sklearn官网：http://scikit-learn.org/stable/

sklearn源代码：https://github.com/scikit-learn/scikit-learn/

多项式回归模型（Office Prices）

猜你喜欢