Artificial Intelligence-Training Platform

Some simpler and experimental ones are not given here.

Genetic algorithm solves TSP problem

#include "stdio.h"
#include "stdlib.h"
#include "time.h"
#define cityNum 10				
#define popSize 10
#define croRate 0.85				
#define mutRate 0.1				
#define MAX 999					

//定义染色体的结构
struct Chrom
{
	int cityArr[cityNum];		
	char name;				
	float adapt;				
	int dis;					
};
struct Chrom genes[popSize];	
struct Chrom genesNew[popSize]; 
struct Chrom temp;			


char names[cityNum] = {'A','B','C','D','E','F','G','H','I','J'};		

int distance[cityNum][cityNum] = {
   
   { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9  },	  
							 {  1, 0, 1, 2, 3, 4, 5, 6, 7, 8  },
							 {  2, 1, 0, 1, 2, 3, 4, 5, 6, 7  },
							 {  3, 2, 1, 0, 1, 2, 3, 4, 5, 6  },
							 {  4, 3, 2, 1, 0, 1, 2, 3, 4, 5  },
							 {  5, 4, 3, 2, 1, 0, 1, 2, 3, 4  },
							 {  6, 5, 4, 3, 2, 1, 0, 1, 2, 3  },
							 {  7, 6, 5, 4, 3, 2, 1, 0, 1, 2  },
							 {  8, 7, 6, 5, 4, 3, 2, 1, 0, 1  },
							 {  9, 8, 7, 6, 5, 4, 3, 2, 1, 0  }};	

void initGroup()
{
	int i,j,k;
	int t = 0;
	int flag = 0;
	srand(time(NULL));
	for(i = 0; i < popSize; i ++)
	{
		
	    temp.name = names[i];
		temp.adapt = 0.0f;
		temp.dis = 0;
		
		for(j = 0; j < cityNum;)
		{
			t = rand()%cityNum;	
			flag = 1;
			for(k = 0; k < j; k ++)
			{
				if(genes[i].cityArr[k] == t)
				{
					flag = 0;
					break;
				}
			}
			if(flag)
			{
				temp.cityArr[j] = t;
				genes[i] = temp;
				j++;
			}
		}
	}
}

void popFitness()
{
	int i,n1,n2;
	for(i = 0; i < popSize; i ++)
	{
		genes[i].dis = 0;
		for(int j = 1;j < cityNum; j ++)
		{
			n1 = genes[i].cityArr[j-1];
			n2 = genes[i].cityArr[j];
			genes[i].dis += distance[n1][n2];
		}
		genes[i].dis += distance[genes[i].cityArr[0]][genes[i].cityArr[cityNum-1]];
		genes[i].adapt = (float)1/genes[i].dis;	
	}
}

int chooseBest()
{
	int choose = 0;
	float best = 0.0f;
	best = genes[0].adapt;
	for(int i = 0; i < popSize; i ++)
	{
		if(genes[i].adapt < best)
		{
			best = genes[i].adapt;
			choose = i;
		}
	}
	return choose;
}

void select()
{
	float biggestSum = 0.0f;
	float adapt_pro[popSize];
	float pick = 0.0f;
	int i;
	for(i = 0; i < popSize; i ++)
	{
		 biggestSum += genes[i].adapt; 
	}
	for(i = 0; i < popSize; i ++)
	{
		 adapt_pro[i] = genes[i].adapt / biggestSum; 
	}
	
    for(i = 0;i < popSize; i ++)
    {
        pick = (float)rand()/RAND_MAX; 
	    /********** Begin **********/
        float sum = 0;
        for(int j=0;j<popSize;j++){
            if(pick<=sum+adapt_pro[j]){
                genesNew[i]=genes[j];
                break;
            }
            sum += adapt_pro[j];
        }
        
        
	    /********** End **********/
    }
    for(i = 0;i < popSize; i++)
    {
	    genes[i] = genesNew[i];
    }
}

void cross()
{
    float pick;
    int choice1,choice2;
    int pos1,pos2;
    int temp;
    int conflict1[popSize];	
    int conflict2[popSize];
    int num1;
    int num2;
    int index1,index2;
    int move = 0;				
    while(move < popSize-1)
    {
        pick = (float)rand()/RAND_MAX; 
        if(pick > croRate)		
        {
            move += 2;
            continue;			
        }
        choice1 = move;			
        choice2 = move+1;		
        pos1 = rand()%popSize;
        pos2 = rand()%popSize;
        while(pos1 > popSize -2 || pos1 < 1)
        {
            pos1 = rand()%popSize;
        }
        while(pos2 > popSize -2 || pos2 < 1)
        {
            pos2 = rand()%popSize;
        }

        if(pos1 > pos2)
        {
            temp = pos1;
            pos1 = pos2;
            pos2 = temp; 
        }

        for(int j = pos1;j <= pos2; j++)
        {
            temp = genes[choice1].cityArr[j];
            genes[choice1].cityArr[j] = genes[choice2].cityArr[j];
            genes[choice2].cityArr[j] = temp;
        }

        num1 = 0;
        num2 = 0;

        if(pos1 > 0 && pos2 < popSize - 1)
        {
            /********** Begin **********/
            for(int j=0;j<pos1;j++)
            {
                for(int k=pos1;k<=pos2;k++)
                {
                    if(genes[choice1].cityArr[j]==genes[choice1].cityArr[k])
                        conflict1[num1++]=j;
                    if(genes[choice2].cityArr[j]==genes[choice2].cityArr[k])
                        conflict2[num2++]=j;
                }
            }
            
	        /********** End **********/
            

            for(int j = pos2 + 1;j < popSize;j++)
            {
                for(int k = pos1; k <= pos2; k ++)
                {
                    /********** Begin **********/
                    if(genes[choice1].cityArr[j]==genes[choice1].cityArr[k])
                        conflict1[num1++]=j;
                    if(genes[choice2].cityArr[j]==genes[choice2].cityArr[k])
                        conflict2[num2++]=j;
                    
                    
                    /********** End **********/
                }
            }
        }
        if((num1 == num2) && num1 > 0)
        {
            for(int j = 0;j < num1; j ++)
            {
                index1 = conflict1[j];
                index2 = conflict2[j];
                temp = genes[choice1].cityArr[index1]; 
                genes[choice1].cityArr[index1] = genes[choice2].cityArr[index2];
                genes[choice2].cityArr[index2] = temp;
            }
        }
        move += 2;
    }
}

void mutation()
{
	double pick;
    int pos1,pos2,temp;
    for(int i = 0;i < popSize; i ++)
    {
        pick = (float)rand()/RAND_MAX; 
        if(pick > mutRate)
		{
            continue;
		}
        pos1 = rand()%popSize;
        pos2 = rand()%popSize;
        while(pos1 > popSize - 1)
        {
           pos1 = rand()%popSize;
        }
        while(pos2 > popSize - 1)
        {
           pos2 = rand()%popSize;
        }

	   int a = genes[i].dis;
        temp = genes[i].cityArr[pos1];
        genes[i].cityArr[pos1] = genes[i].cityArr[pos2];
        genes[i].cityArr[pos2] = temp;

		popFitness();
		if(genes[i].dis > a)
		{
			temp = genes[i].cityArr[pos1];
			genes[i].cityArr[pos1] = genes[i].cityArr[pos2];
			genes[i].cityArr[pos2] = temp;
		}
    }
}

Implementing a solution to the Romanian problem using a search algorithm

#include<iostream>
#include<vector>
#include<memory.h>
#include<stack>
#include<algorithm>
#include<cmath>
#define A 0
#define B 1
#define C 2
#define D 3
#define E 4
#define F 5
#define G 6
#define H 7
#define I 8
#define L 9
#define M 10
#define N 11
#define O 12
#define P 13
#define R 14
#define S 15
#define T 16
#define U 17
#define V 18
#define Z 19

using namespace std;

int h[20] =
{ 366,0,160,242,161,
178,77,151,226,244,
241,234,380,98,193,
253,329,80,199,374 };

struct node
{
    int g;
    int h;
    int f;
    int name;
    node(int name, int g, int h)
    {
        this->name = name;
        this->g = g;
        this->h = h;
        this->f = g + h;
    };
    bool operator <(const node &a)const
    {
        return f < a.f;
    }
};


class Graph
{
public:
    Graph()
    {
        memset(graph, -1, sizeof(graph));
    }
    int getEdge(int from, int to)
    {
        return graph[from][to];
    }
    void addEdge(int from, int to, int cost)
    {
        if (from >= 20 || from < 0 || to >= 20 || to < 0)
            return;
        graph[from][to] = cost;
    }
    
	void init(){
        addEdge(O, Z, 71);
        addEdge(Z, O, 71);

        addEdge(O, S, 151);
        addEdge(S, O, 151);

        addEdge(Z, A, 75);
        addEdge(A, Z, 75);

        addEdge(A, S, 140);
        addEdge(S, A, 140);

        addEdge(A, T, 118);
        addEdge(T, A, 118);

        addEdge(T, L, 111);
        addEdge(L, T, 111);

        addEdge(L, M, 70);
        addEdge(M, L, 70);

        addEdge(M, D, 75);
        addEdge(D, M, 75);

        addEdge(D, C, 120);
        addEdge(C, D, 120);

        addEdge(C, R, 146);
        addEdge(R, C, 146);

        addEdge(S, R, 80);
        addEdge(R, S, 80);

        addEdge(S, F, 99);
        addEdge(F, S, 99);

        addEdge(F, B, 211);
        addEdge(B, F, 211);

        addEdge(P, C, 138);
        addEdge(C, P, 138);

        addEdge(R, P, 97);
        addEdge(P, R, 97);

        addEdge(P, B, 101);
        addEdge(B, P, 101);

        addEdge(B, G, 90);
        addEdge(G, B, 90);

        addEdge(B, U, 85);
        addEdge(U, B, 85);

        addEdge(U, H, 98);
        addEdge(H, U, 98);

        addEdge(H, E, 86);
        addEdge(E, H, 86);

        addEdge(U, V, 142);
        addEdge(V, U, 142);

        addEdge(I, V, 92);
        addEdge(V, I, 92);

        addEdge(I, N, 87);
        addEdge(N, I, 87);
	}

private:
    int graph[20][20];
};

bool list[20];
vector<node> openList;
bool closeList[20];
stack<int> road;
int parent[20];

void A_star(int goal,node &src,Graph &graph)
{
    openList.push_back(src);
    sort(openList.begin(), openList.end());
    
    while (!openList.empty())
    {
        /********** Begin **********/
		node cur = openList[0];
        if(cur.name==goal) return;
        openList.erase(openList.begin());
        closeList[cur.name] = true;
        list[cur.name] = false;
        for(int i=0;i<20;i++){
            if(graph.getEdge(cur.name, i)!=-1 && !closeList[i]){
                int cost = cur.g + graph.getEdge(cur.name, i);
                if(list[i]){
                   //更新扩展节点
                    for(int j=0;j<openList.size();j++){
                        if(openList[j].name==i){
                            if(openList[j].g>cost){
                               openList[j].g = cost;
                               openList[j].f = openList[j].h + cost;
                               parent[i] = cur.name;
                           }
                           break;
                       }
                   }
               }
               else{
                   node newNode(i, cost, h[i]);
                   openList.push_back(newNode);
                   list[i] = true;
                   parent[i] = cur.name;
               }
            }
        }
        sort(openList.begin(), openList.end());
		/********** End **********/  
    }
}

void print_result(Graph &graph)
{
    int p = openList[0].name;
    int lastNodeNum;
    road.push(p);
    while (parent[p] != -1)
    {
        road.push(parent[p]);
        p = parent[p];
    }
    lastNodeNum = road.top();
    int cost = 0;
    cout << "solution: ";
    while (!road.empty())
    {
        cout << road.top() << "-> ";
        if (road.top() != lastNodeNum)
        {
            cost += graph.getEdge(lastNodeNum, road.top());
            lastNodeNum = road.top();
        }
        road.pop();
    }
    cout << "end" << endl;
    cout << "cost:" << cost;
}

AlphaBeta pruning algorithm solves the optimal choice of game tree

# -*- coding:utf-8 -*-

import copy     # 注意对象的深拷贝和浅拷贝的使用!!!

class GameNode:
    '''博弈树结点数据结构
    成员变量:
    name - string 结点名字
    val - int  结点值
    children - list[GameNode] 子结点列表
    '''
    def __init__(self, name='', val=0):
        self.name = name        # char
        self.val = val          # int
        self.children = []      # list of nodes

class GameTree:
    '''博弈树结点数据结构
    成员变量:
    root - GameNode 博弈树根结点
    成员函数:
    buildTree - 创建博弈树
    '''
    def __init__(self):
        self.root = None                # GameNode 博弈树根结点

    def buildTree(self, data_list, root):
        '''递归法创建博弈树
        参数:
        data_list - list[] like this ['A', ['B', ('E', 3), ('F', 12)], ['C', ('H', 2)], ['D', ('K', 14)]]
        root - GameNode
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if self.root==None:
            self.root = root
        for i in range(1, len(data_list)):
            if type(data_list[i])==list:
                root.children.append(GameNode(data_list[i][0]))
                self.buildTree(data_list[i], root.children[i-1])
            else:
                root.children.append(GameNode(data_list[i][0], data_list[i][1]))

        #********** End **********#


class AlphaBeta:
    '''博弈树结点数据结构
    成员变量:
    game_tree - GameTree 博弈树
    成员函数:
    minmax_with_alphabeta - 带AlphaBeta剪枝的极大极小值算法,计算最优行动
    max_value - 计算最大值
    min_value - 计算最小值
    get_value - 返回结点的值
    isTerminal - 判断某结点是否为最终结点
    '''
    def __init__(self, game_tree):
        self.game_tree = game_tree      # GameTree 博弈树

    def minmax_with_alphabeta(self, node):
        '''带AlphaBeta剪枝的极大极小值算法,计算最优行动
        参数:
        node - GameNode 博弈树结点
        返回值:
        clf - GameNode 最优行动的结点
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        clf = self.max_value(node,-0x3f3f3f3f,0x3f3f3f3f)
        for child in node.children:
            if clf==child.val:
                return child
        #********** End **********#


    def max_value(self, node, alpha, beta):
        '''计算最大值
        参数:
        node - GameNode 博弈树结点
        alpha - int 剪枝区间下限值
        beta - int 剪枝区间上限值
        返回值:
        clf - int 子结点中的最大的评估值
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        #max层节点算下界最大值
        clf = -0x3f3f3f3f
        if self.isTerminal(node):
            return self.get_value(node)
        for child in node.children:
            clf = max(self.min_value(child, alpha, beta), clf)
            alpha = max(clf, alpha)
            if alpha>=beta:
                return alpha;
        node.val = alpha
        return alpha
        #********** End **********#


    def min_value(self, node, alpha, beta):
        '''计算最小值
        参数:
        node - GameNode 博弈树结点
        alpha - int 剪枝区间下限值
        beta - int 剪枝区间上限值
        返回值:
        clf - int 子结点中的最小的评估值
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        #MIN层求上界
        clf = 0x3f3f3f3f
        if self.isTerminal(node):
            return self.get_value(node)
        for child in node.children:
            clf = min(self.max_value(child,alpha,beta), clf)
            beta = min(beta, clf)
            if(alpha>=beta):
                return beta
        node.val = beta
        return beta

        #********** End **********#


    def get_value(self, node):
        '''返回结点的值
        参数:
        node - GameNode 博弈树结点
        返回值:
        clf - int 结点的值,即 node.val
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        return node.val

        #********** End **********#


    def isTerminal(self, node):
        '''判断某结点是否为最终结点(无子结点)
        参数:
        node - GameNode 博弈树结点
        返回值:
        clf - bool 是最终状态,返回True,否则返回False
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if len(node.children)==0:
            return True
        else:
            return False

        #********** End **********#

Naive Bayes Classification

import numpy as np


class NaiveBayesClassifier(object):
    def __init__(self):
        '''
        self.label_prob表示每种类别在数据中出现的概率
        例如,{0:0.333, 1:0.667}表示数据中类别0出现的概率为0.333,类别1的概率为0.667
        '''
        self.label_prob = {}
        '''
        self.condition_prob表示每种类别确定的条件下各个特征出现的概率
        例如训练数据集中的特征为 [[2, 1, 1],
                              [1, 2, 2],
                              [2, 2, 2],
                              [2, 1, 2],
                              [1, 2, 3]]
        标签为[1, 0, 1, 0, 1]
        那么当标签为0时第0列的值为1的概率为0.5,值为2的概率为0.5;
        当标签为0时第1列的值为1的概率为0.5,值为2的概率为0.5;
        当标签为0时第2列的值为1的概率为0,值为2的概率为1,值为3的概率为0;
        当标签为1时第0列的值为1的概率为0.333,值为2的概率为0.666;
        当标签为1时第1列的值为1的概率为0.333,值为2的概率为0.666;
        当标签为1时第2列的值为1的概率为0.333,值为2的概率为0.333,值为3的概率为0.333;
        因此self.label_prob的值如下:     
        {
            0:{
                0:{
                    1:0.5
                    2:0.5
                }
                1:{
                    1:0.5
                    2:0.5
                }
                2:{
                    1:0
                    2:1
                    3:0
                }
            }
            1:
            {
                0:{
                    1:0.333
                    2:0.666
                }
                1:{
                    1:0.333
                    2:0.666
                }
                2:{
                    1:0.333
                    2:0.333
                    3:0.333
                }
            }
        }
        '''
        self.condition_prob = {}
    def fit(self, feature, label):
        '''
        对模型进行训练,需要将各种概率分别保存在self.label_prob和self.condition_prob中
        :param feature: 训练数据集所有特征组成的ndarray
        :param label:训练数据集中所有标签组成的ndarray
        :return: 无返回
        '''
        #********* Begin *********#
        #计算label_prob
        cnt = 0
        num = 0
        for item in label:
            num+=1
            if item == 1:
                cnt+=1
        self.label_prob[0] = (num-cnt)/num
        self.label_prob[1] = cnt/num
        #计算condition_prob
        self.condition_prob[0] = {}
        self.condition_prob[1] = {}
        #初始化每个特征取值的字典
        for item in self.condition_prob:
            for feat in range(len(feature[0])):
                self.condition_prob[item][feat] = {}
        #记录每个特征的取值
        i=0         #样本编号
        for data in feature:
            j=0     #特征序号
            for feat in data:
                if(self.condition_prob[0][j].get(feat)==None):
                    self.condition_prob[0][j][feat] = 0
                if(self.condition_prob[1][j].get(feat)==None):
                    self.condition_prob[1][j][feat] = 0
                if label[i]==0:
                    self.condition_prob[0][j][feat] += 1
                else:
                    self.condition_prob[1][j][feat] += 1
                j+=1
            i+=1
        #计算条件概率,每个特征取值除label为0和1的个数
        for feat in range(len(feature[0])):
            for item in self.condition_prob[0][feat]:
                self.condition_prob[0][feat][item] /= (num-cnt)
            for item in self.condition_prob[1][feat]:
                self.condition_prob[1][feat][item] /= cnt
        #********* End *********#


    def predict(self, feature):
        '''
        对数据进行预测,返回预测结果
        :param feature:测试数据集所有特征组成的ndarray
        :return:
        '''
        # ********* Begin *********#
        res = []
        for item in feature:
            P_good = self.label_prob[1]
            P_bad = self.label_prob[0]
            feat_idx = 0
            for feat in item:
                P_good *= self.condition_prob[1][feat_idx][feat]
                P_bad *= self.condition_prob[0][feat_idx][feat]
                feat_idx+=1
            if P_good>P_bad:
                res.append(1)
            else:
                res.append(0)
        return res
        #********* End *********#

Laplacian smoothing

import numpy as np

class NaiveBayesClassifier(object):
    def __init__(self):
        '''
        self.label_prob表示每种类别在数据中出现的概率
        例如,{0:0.333, 1:0.667}表示数据中类别0出现的概率为0.333,类别1的概率为0.667
        '''
        self.label_prob = {}
        '''
        self.condition_prob表示每种类别确定的条件下各个特征出现的概率
        例如训练数据集中的特征为 [[2, 1, 1],
                              [1, 2, 2],
                              [2, 2, 2],
                              [2, 1, 2],
                              [1, 2, 3]]
        标签为[1, 0, 1, 0, 1]
        那么当标签为0时第0列的值为1的概率为0.5,值为2的概率为0.5;
        当标签为0时第1列的值为1的概率为0.5,值为2的概率为0.5;
        当标签为0时第2列的值为1的概率为0,值为2的概率为1,值为3的概率为0;
        当标签为1时第0列的值为1的概率为0.333,值为2的概率为0.666;
        当标签为1时第1列的值为1的概率为0.333,值为2的概率为0.666;
        当标签为1时第2列的值为1的概率为0.333,值为2的概率为0.333,值为3的概率为0.333;
        因此self.label_prob的值如下:     
        {
            0:{
                0:{
                    1:0.5
                    2:0.5
                }
                1:{
                    1:0.5
                    2:0.5
                }
                2:{
                    1:0
                    2:1
                    3:0
                }
            }
            1:
            {
                0:{
                    1:0.333
                    2:0.666
                }
                1:{
                    1:0.333
                    2:0.666
                }
                2:{
                    1:0.333
                    2:0.333
                    3:0.333
                }
            }
        }
        '''
        self.condition_prob = {}

    def fit(self, feature, label):
        '''
        对模型进行训练,需要将各种概率分别保存在self.label_prob和self.condition_prob中
        :param feature: 训练数据集所有特征组成的ndarray
        :param label:训练数据集中所有标签组成的ndarray
        :return: 无返回
        '''

        #********* Begin *********#
        #计算label_prob
        cnt = 0
        num = 0
        for item in label:
            num+=1
            if item == 1:
                cnt+=1
        types = 2
        self.label_prob[0] = (num-cnt)+1/(num+types)
        self.label_prob[1] = cnt+1/(num+types)
        #计算condition_prob
        self.condition_prob[0] = {}
        self.condition_prob[1] = {}
        #初始化每个特征取值的字典
        for item in self.condition_prob:
            for feat in range(len(feature[0])):
                self.condition_prob[item][feat] = {}
        #记录每个特征的取值
        i=0         #样本编号
        for data in feature:
            j=0     #特征序号
            for feat in data:
                if(self.condition_prob[0][j].get(feat)==None):
                    self.condition_prob[0][j][feat] = 1     #分子加一,初始化为1
                if(self.condition_prob[1][j].get(feat)==None):
                    self.condition_prob[1][j][feat] = 1     #分子加一,初始化为1
                if label[i]==0:
                    self.condition_prob[0][j][feat] += 1
                else:
                    self.condition_prob[1][j][feat] += 1
                j+=1
            i+=1
        #计算条件概率,每个特征取值除label为0和1的个数
        for feat in range(len(feature[0])):
            for item in self.condition_prob[0][feat]:
                self.condition_prob[0][feat][item] /= (num-cnt)+len(self.condition_prob[0][feat])
            for item in self.condition_prob[1][feat]:
                self.condition_prob[1][feat][item] /= cnt+len(self.condition_prob[1][feat])
        #********* End *********#


    def predict(self, feature):
        '''
        对数据进行预测,返回预测结果
        :param feature:测试数据集所有特征组成的ndarray
        :return:
        '''

        result = []
        # 对每条测试数据都进行预测
        for i, f in enumerate(feature):
            # 可能的类别的概率
            prob = np.zeros(len(self.label_prob.keys()))
            ii = 0
            for label, label_prob in self.label_prob.items():
                # 计算概率
                prob[ii] = label_prob
                for j in range(len(feature[0])):
                    prob[ii] *= self.condition_prob[label][j][f[j]]
                ii += 1
            # 取概率最大的类别作为结果
            result.append(list(self.label_prob.keys())[np.argmax(prob)])
        return np.array(result)



Decision tree algorithm solves classification prediction problems

# -*- coding: UTF-8 -*-

import math

class TreeNode:
    '''决策树结点数据结构
    成员变量:
    row - int 列表数据的行数,初始13
    col - int 列表数据的列数,初始12
    data - list[[]] 二维列表数据,初始数据形式在testDecisionTree.py里
                    第0行:[第0列:example(样本名字) 中间各列(1-10):各个特征属性名称 第11列:WillW ait(目标分类) ]
                    第1-12行:[样本名字,具体属性值,分类目标]
        data = [
        ['example', 'Alt', 'Bar', 'Fri', 'Hun', 'Pat',  'Price', 'Rain', 'Res', 'Type',   'Est',   'WillW ait'],
        ['x1',      'Yes', 'No',  'No',  'Yes', 'Some', '$$$',   'No',   'Yes', 'French', '0-10',  'y1=Yes'   ],
        ['x2',      'Yes', 'No',  'No',  'Yes', 'Full', '$',     'No',   'No',  'Thai',   '30-60', 'y2=No'    ],
            ........            .....       .....       .........           ............
        ['x12',     'Yes', 'Yes', 'Yes', 'Yes', 'Full', '$',     'No',   'No',  'Burger', '30-60', 'y12=Yes'  ] ]
    targ - string 分类结果 Yes No
    name - string 结点名字:特征属性名称
    attr - list[string] 该特征属性下的各个属性值
    children - list[GameNode] 该特征属性下的各个决策树子结点,与 attr 一一对应
    '''
    def __init__(self, row, col, data):
        self.row = row
        self.col = col
        self.data = data
        self.targ = ''          # target result
        self.name = ''          # attribute name
        self.attr = []          # attribute value list
        self.child = []         # attribute - TreeNode List

class DecisionTree:
    '''决策树
    成员变量:
    root - TreeNode 博弈树根结点
    成员函数:
    buildTree - 创建决策树
    predict - 预测样本分类标签
    _parse_data_ - 解析数据中最大信息增益的特性属性
    _calc_all_gain_ - 计算整个样本的信息熵
    _calc_attr_gain_ - 计算某一特征属性的信息熵
    _calc_bool_gain_ - 通用计算函数:计算二值随机变量的信息熵
    _get_targ_ - 获取叶子结点的决策分类标签
    _is_leaf_ - 判断该结点是否为叶子结点
    '''
    def __init__(self, row, col, data):
        self.root = TreeNode(row, col, data)

    def build(self, root):
        '''递归法创建博弈树
        参数:
        root - TreeNode 初始为决策树根结点
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if self._is_leaf_(root):
            root.targ = self._get_targ_(root)
            return
        root.name, root.attr = self._parse_data_(root.row,root.col,root.data)
        x = [i for i in range(root.col) if root.data[0][i]==root.name][0]
        for attr in root.attr:
            row = 0
            col = root.col - 1
            data = []
            for i in range(root.row):
                if i!=0 and root.data[i][x]!=attr:
                    continue
                vec = []
                for j in range(root.col):
                    if j==x:
                        continue 
                    vec.append(root.data[i][j]) 
                data.append(vec)
                row+=1
            node = TreeNode(row,col,data)
            root.child.append(node)
        for node in root.child:
            self.build(node)

        #********** End **********#

    def predict(self, root, x):
        '''分类预测
        参数:
        root - TreeNode 决策树根结点
        x - [[]] 测试数据,形如:
           [ ['example', 'Alt', 'Bar', 'Fri', 'Hun', 'Pat', 'Price', 'Rain', 'Res', 'Type',  'Est'],
             ['x1',      'Yes', 'No',  'No',  'Yes', 'Some', '$$$',  'No',   'Yes', 'French','0-10'] ]
        返回值:
        clf - string 分类标签 Yes No
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if self._is_leaf_(root):
            return root.targ
        idx = x[0].index(root.name)
        for idattr,attr in enumerate(root.attr):
            if attr==x[1][idx]:
                return self.predict(root.child[idattr],x)

        #********** End **********#

    def _parse_data_(self, row, col, data):
        '''解析数据:计算数据中最大信息增益的特性属性
        参数:
        row - int 列表数据的行数
        col - int 列表数据的列数
        data - list[[]] 二维列表数据,形如:
                第0行:[第0列:example(样本名字) 中间各列(1-10):各个特征属性名称 第11列:WillW ait(目标分类) ]
                第1-12行:[样本名字,具体属性值,分类目标]
        data = [
        ['example', 'Alt', 'Bar', 'Fri', 'Hun', 'Pat',  'Price', 'Rain', 'Res', 'Type',   'Est',   'WillW ait'],
        ['x1',      'Yes', 'No',  'No',  'Yes', 'Some', '$$$',   'No',   'Yes', 'French', '0-10',  'y1=Yes'   ],
        ['x2',      'Yes', 'No',  'No',  'Yes', 'Full', '$',     'No',   'No',  'Thai',   '30-60', 'y2=No'    ],
            ........            .....       .....       .........           ............
        ['x12',     'Yes', 'Yes', 'Yes', 'Yes', 'Full', '$',     'No',   'No',  'Burger', '30-60', 'y12=Yes'  ] ]
        返回值:
        clf - string, list[] 信息增益最大的属性名称 及其 属性值列表
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        maxGain = -float('inf')
        maxName = ''
        maxAttr = []
        maxIdx = -1
        gains = self._calc_all_gain_(row-1,[x[-1] for x in data[1:]])
        for i in range(1,col-1,1):
            tmp = []
            for j in range(1,row,1):
                tmp.append([data[j][i],data[j][-1]])
            tmpGain = self._calc_attr_gain_(row-1,tmp)
            if (gains-tmpGain) > maxGain:
                maxGain = gains - tmpGain
                maxName = data[0][i]
                maxIdx = i
        
        for i in range(1,row,1):
            if data[i][maxIdx] not in maxAttr:
                maxAttr.append(data[i][maxIdx])
        return maxName,maxAttr
        #********** End **********#

    def _calc_all_gain_(self, row, data):
        '''计算整个样本的信息熵
        参数:
        row - int 列表数据的行数
        data - list[] 一维列表数据,形如:[分类目标]
                data = ['y1=Yes', 'y2=No', ........, 'y12=Yes']
        返回值:
        clf - float 信息熵
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        dict_ = {'yes':0.0,'no':0.0}
        for i in range(row):
            if data[i][-1]=='s':
                dict_['yes'] += 1.0
            else:
                dict_['no'] += 1.0
        sum = 0.0
        for _key in dict_:
            sum +=(1.0*dict_[_key]/float(row))*math.log(1.0*dict_[_key]/float(row),2)
        return -sum
        #********** End **********#

    def _calc_attr_gain_(self, row, data):
        '''计算某一特征属性的信息熵
        参数:
        row - int 列表数据的行数
        data - list[[]] 二维列表数据(2列),形如:[[某一属性值,分类目标]]
                  [ ['0-10',  'y1=Yes'   ],
                    ['30-60', 'y2=No'    ],
                      ........
                    ['30-60', 'y12=Yes'  ] ]
        返回值:
        clf - float 信息熵
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        dict_ = {}
        for i in range(row):
            if data[i][0] not in dict_:
                dict_[data[i][0]] = [0.0,0.0]
            if data[i][1][-1] =='s':
                dict_[data[i][0]][0] += 1.0
            else:
                dict_[data[i][0]][1] += 1.0
        sum = 0.0
        for _key in dict_:
            p = 1.0*dict_[_key][0] / (dict_[_key][0] + dict_[_key][1])
            sum += (1.0*(dict_[_key][0]+dict_[_key][1])/float(row)) * self._calc_bool_gain_(p)
        return sum
        #********** End **********#

    def _calc_bool_gain_(self, p):
        '''通用计算函数:计算二值随机变量的信息熵
        参数:
        p - float 二值随机变量的概率 在[0, 1]之间
        返回值:
        clf - float 信息熵
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if p==1 or p==0:
            return 0.0
        return -(p*math.log(p,2)+(1-p)*math.log((1-p),2))

        #********** End **********#

    def _get_targ_(self, node):
        '''计算叶子结点的决策分类标签
        参数:
        node - TreeNode 决策树结点
        返回值:
        clf - string 分类标签 Yes No
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        yes = 0
        no = 0
        for i in range(1,node.row,1):
            if node.data[i][-1][-1] == 's':
                yes +=1 
            else:
                no +=1
        if yes>no:
            return 'Yes'
        else:
            return 'No'

        #********** End **********#

    def _is_leaf_(self, node):
        '''判断该结点是否为叶子结点
        参数:
        node - TreeNode 决策树结点
        返回值:
        clf - bool 叶子结点True 非叶子结点False
        '''
        #请在这里补充代码,完成本关任务
        #********** Begin **********#
        if node.col ==2:
            return True
        targ = node.data[-1][-1][-1]
        for i in range(node.row):
            if i==0:
                continue
            if node.data[i][-1][-1] != targ:
                return False
        return True

        #********** End **********#

Neurons and perceptrons

#encoding=utf8

import numpy as np
#构建感知机算法
class Perceptron(object):
    def __init__(self, learning_rate = 0.01, max_iter = 200):
        self.lr = learning_rate
        self.max_iter = max_iter
    def fit(self, data, label):
        '''
        input:data(ndarray):训练数据特征
              label(ndarray):训练数据标签
        output:w(ndarray):训练好的权重
               b(ndarry):训练好的偏置
        '''
        #编写感知机训练方法,w为权重,b为偏置
        self.w = np.random.randn(data.shape[1])
        self.b = np.random.rand(1)
        #********* Begin *********#
        for i in range(len(label)):
            while label[i]*(np.matmul(self.w,data[i])+self.b)<=0:
                self.w = self.w+self.lr*(label[i]*data[i])
                self.b = self.b + self.lr*label[i]
        #********* End *********#
        return None
    def predict(self, data):
        '''
        input:data(ndarray):测试数据特征
        '''
        #编写感知机预测方法,若是正类返回1,负类返回-1
        #********* Begin *********#        
        predicted = np.matmul(data,self.w)+self.b
        for i in range(len(predicted)):
            if predicted[i]>=0:
                predicted[i] = 1
            else:
                predicted[i] = -1
        return predicted
        #********* End *********#

Back propagation algorithm

#encoding=utf8
import numpy as np
from math import sqrt

#bp神经网络训练方法
def bp_train(feature,label,n_hidden,maxcycle,alpha,n_output):
    '''
    计算隐含层的输入
    input:feature(mat):特征
          label(mat):标签
          n_hidden(int)隐藏层的节点个数
          maxcycle(int):最大迭代次数
          alpha(float):学习率
          n_output(int):输出层的节点个数
    output:w0(mat):输入层到隐藏层之间的权重
           b0(mat):输入层到隐藏层之间的偏置
           w1(mat):隐藏层到输出层之间的权重
           b1(mat):隐藏层到输出层之间的偏置          
    '''
    m,n = np.shape(feature)
    #初始化
    w0 = np.mat(np.random.rand(n,n_hidden))
    w0 = w0*(8.0*sqrt(6)/sqrt(n+n_hidden))-\
         np.mat(np.ones((n,n_hidden)))*\
         (4.0*sqrt(6)/sqrt(n+n_hidden))
    b0 = np.mat(np.random.rand(1,n_hidden))
    b0 = b0*(8.0*sqrt(6)/sqrt(n+n_hidden))-\
         np.mat(np.ones((1,n_hidden)))*\
         (4.0*sqrt(6)/sqrt(n+n_hidden))
    w1 = np.mat(np.random.rand(n_hidden,n_output))
    w1 = w1*(8.0*sqrt(6)/sqrt(n_hidden+n_output))-\
         np.mat(np.ones((n_hidden,n_output)))*\
         (4.0*sqrt(6)/sqrt(n_hidden+n_output))
    b1 = np.mat(np.random.rand(1,n_output))
    b1 = b1*(8.0*sqrt(6)/sqrt(n_hidden+n_output))-\
         np.mat(np.ones((1,n_output)))*\
         (4.0*sqrt(6)/sqrt(n_hidden+n_output))

    #训练
    i = 0
    while i <= maxcycle:
        #********* Begin *********# 
        #前向传播
        #计算隐藏层的输入
        hidden_input = hidden_in(feature,w0,b0)
        #计算隐藏层的输出
        hidden_output = hidden_out(hidden_input)
        #计算输出层的输入
        output_in = predict_in(hidden_output,w1,b1)
        #计算输出层的输出
        output_out = predict_out(output_in)
        #反向传播
        #隐藏层到输出层之间的残差
        delta_output = -np.multiply((label-output_out),partial_sig(output_in))
        #输入层到隐藏层之间的残差
        delta_hidden = np.multiply((delta_output*w1.T),partial_sig(hidden_input))
        #更新权重与偏置
        w1 = w1-alpha*(hidden_output.T*delta_output)
        b1 = b1-alpha*np.sum(delta_output,axis=0)*(1.0/m)
        w0=w0-alpha*(feature.T*delta_hidden)
        b0 = b0-alpha*np.sum(delta_hidden,axis=0)*(1.0/m)
        #********* End *********#
        i +=1
    return w0,w1,b0,b1

#计算隐藏层的输入函数    
def hidden_in(feature,w0,b0):
    m = np.shape(feature)[0]
    hidden_in = feature*w0
    for i in range(m):
        hidden_in[i,] += b0
    return hidden_in

#计算隐藏层的输出函数
def hidden_out(hidden_in):
    hidden_output = sig(hidden_in)
    return hidden_output
    
#计算输出层的输入函数
def predict_in(hidden_out,w1,b1):
    m = np.shape(hidden_out)[0]
    predict_in = hidden_out*w1
    for i in range(m):
        predict_in[i,] +=b1
    return predict_in

#计算输出层的输出的函数
def predict_out(predict_in):
    result = sig(predict_in)
    return result

#sigmoid函数
def sig(x):
    return 1.0/(1+np.exp(-x))

#计算sigmoid函数偏导
def partial_sig(x):
    m,n = np.shape(x)
    out = np.mat(np.zeros((m,n)))
    for i in range(m):
        for j in range(n):
            out[i,j] = sig(x[i,j])*(1-sig(x[i,j]))
    return out
        

Perceptron practice

#encoding=utf8
import os
from sklearn.linear_model.perceptron import Perceptron
import pandas as pd
if os.path.exists('./step2/result.csv'):
    os.remove('./step2/result.csv')

#********* Begin *********#
train_data = pd.read_csv('./step2/train_data.csv')
train_label = pd.read_csv('./step2/train_label.csv')
train_label = train_label['target']
test_data = pd.read_csv('./step2/test_data.csv')
clf = Perceptron(eta0=0.8,max_iter=1000)
clf.fit(train_data, train_label)
result = clf.predict(test_data)
write_res = pd.DataFrame({'result':result})
write_res.to_csv('./step2/result.csv', index = False)
#********* End *********#

cat and dog war

It is normal for this to fail. It is a problem with the platform. You may be able to pass if you submit multiple times.


from keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, MaxPooling2D
import keras
import os
import numpy as np
import cv2
# 设置随机种子
np.random.seed(1447)
IMAGE_HEIGHT = 128
IMAGE_WIDTH = 128
def get_train_data(data_path):
    '''
    读取并处理数据
    :return:处理好的图像和对应的one-hot编码
    '''
    images = []
    onehot = np.zeros((500, 2))
    #********* Begin *********#
    for i, img_name in enumerate(os.listdir(data_path)):
        if 'cat' in img_name:
            onehot[i, 0] = 1
        else:
            onehot[i, 1] = 1
        img = cv2.imread(os.path.join(data_path, img_name))
        img = cv2.resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH))
        img = img / 255.0
        images.append(img)
    #********* End *********#
    return np.array(images), onehot
def build_model():
    '''
    构建模型
    :return:构建好的模型
    '''
    model = keras.Sequential()
    #********* Begin *********#
    model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3]))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(96, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    #********* End *********#
    return model
def fit_and_predict(model, train_images, onehot, test_images):
    '''
    训练模型,并对测试图像进行预测
    :param model: 训练好的模型
    :param train_images: 训练集图像
    :param onehot: 训练集的one-hot编码
    :param test_images: 测试集图像
    :return: 预测结果
    '''
    #********* Begin *********#
    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.0001), metrics=['accuracy'])
    #********* End *********#
    model.fit(train_images, onehot, epochs=20, batch_size=32, verbose=0)
    result = model.predict(test_images, batch_size=10)
    predict_idx = np.argmax(result, axis=1)
    return predict_idx


Guess you like

Origin blog.csdn.net/Aaron503/article/details/132625554
Recommended