Intel daal4py demo运行过程

daal安装：

git clone https://github.com/IntelPython/daal4py.git
cd daal4py
conda create -n DAAL4PY -c intel -c intel/label/test -c conda-forge python=3.6 mpich cnc tbb-devel daal daal-include cython jinja2 numpy
source activate DAAL4PY
export CNCROOT=$CONDA_PREFIX
export TBBROOT=$CONDA_PREFIX
export DAALROOT=$CONDA_PREFIX
python setup.py build_ext
python setup.py install
# 运行后面的demo

source deactivate DAAL4PY # 退出

注意：安装过程较慢，耐心等待。

随机森林：

#*******************************************************************************
# Copyright 2014-2018 Intel Corporation
# All Rights Reserved.
#
# This software is licensed under the Apache License, Version 2.0 (the
# "License"), the following terms apply:
#
# You may not use this file except in compliance with the License.  You may
# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
#*******************************************************************************

# daal4py Decision Forest Classification example for shared memory systems

import daal4py as d4p
import numpy as np

# let's try to use pandas' fast csv reader
try:
    import pandas
    read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32).values
except:
    # fall back to numpy loadtxt
    read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32)


def main():
    # input data file
    infile = "./data/batch/df_classification_train.csv"
    testfile = "./data/batch/df_classification_test.csv"

    # Configure a training object (5 classes)
    train_algo = d4p.decision_forest_classification_training(5, nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine = d4p.engines_mt19937(seed=777),
                                                             varImportance='MDI', bootstrap=True, resultsToCompute='computeOutOfBagError')
    
    # Read data. Let's use 3 features per observation
    data   = read_csv(infile, range(3))
    labels = read_csv(infile, range(3,4))
    train_result = train_algo.compute(data, labels)
    # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance

    # Now let's do some prediction
    predict_algo = d4p.decision_forest_classification_prediction(5)
    # read test data (with same #features)
    pdata = read_csv(testfile, range(3))
    plabels = read_csv(testfile, range(3,4))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # Prediction result provides prediction
    assert(predict_result.prediction.shape == (pdata.shape[0], 1))

    return (train_result, predict_result, plabels)


if __name__ == "__main__":
    (train_result, predict_result, plabels) = main()
    print("\nVariable importance results:\n", train_result.variableImportance)
    print("\nOOB error:\n", train_result.outOfBagError)
    print("\nDecision forest prediction results (first 10 rows):\n", predict_result.prediction[0:10])
    print("\nGround truth (first 10 rows):\n", plabels[0:10])
    print('All looks good!')

demo示例数据：

0.00125126,0.563585,8,2,
0.193304,0.808741,12,1,
0.585009,0.479873,6,1,
0.350291,0.895962,13,4,
0.82284,0.746605,11,2,
0.174108,0.858943,12,0,
0.710501,0.513535,10,2,
0.303995,0.0149846,1,2,
0.0914029,0.364452,4,0,
0.147313,0.165899,0,4,
0.988525,0.445692,7,2,
0.119083,0.00466933,0,2,
0.0089114,0.37788,4,2,
0.531663,0.571184,10,3,
0.601764,0.607166,10,4,
0.166234,0.663045,8,4,
0.450789,0.352123,5,3,
0.0570391,0.607685,8,4,
0.783319,0.802606,15,3,
0.519883,0.30195,6,2,
0.875973,0.726676,11,1,
0.955901,0.925718,15,3,
0.539354,0.142338,2,3,
0.462081,0.235328,1,2,
0.862239,0.209601,3,1,
0.779656,0.843654,15,3,
0.996796,0.999695,15,2,
0.611499,0.392438,6,0,
0.266213,0.297281,5,2,
0.840144,0.0237434,3,1,
0.375866,0.0926237,1,0,
0.677206,0.0562151,2,3,
0.00878933,0.91879,12,2,
0.275887,0.272897,5,2,
0.587909,0.691183,10,4,
0.837611,0.726493,11,1,
0.484939,0.205359,1,2,
0.743736,0.468459,6,2,
0.457961,0.949156,13,3,
0.744438,0.10828,2,2,
0.599048,0.385235,6,0,
0.735008,0.608966,10,2,
0.572405,0.361339,6,0,
0.151555,0.225105,0,3,
0.425153,0.802881,13,3,

计算均值方差等统计特征：

#*******************************************************************************

# Copyright 2014-2018 Intel Corporation

# All Rights Reserved.

#

# This software is licensed under the Apache License, Version 2.0 (the

# "License"), the following terms apply:

#

# You may not use this file except in compliance with the License.  You may

# obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

#

# See the License for the specific language governing permissions and

# limitations under the License.

#*******************************************************************************



# daal4py low order moments example for shared memory systems



import daal4py as d4p

import numpy as np



# let's try to use pandas' fast csv reader

try:

    import pandas

    read_csv = lambda f, c: pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float64).values

except:

    # fall back to numpy loadtxt

    read_csv = lambda f, c: np.loadtxt(f, usecols=c, delimiter=',', ndmin=2)





def main():

    # read data from file

    file = "./data/batch/covcormoments_dense.csv"

    data = read_csv(file, range(10))



    # compute

    alg = d4p.low_order_moments()

    res = alg.compute(data)



    # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered,

    # mean, secondOrderRawMoment, variance, standardDeviation, variation

    assert res.minimum.shape == (1, data.shape[1])

    assert res.maximum.shape == (1, data.shape[1])

    assert res.sum.shape == (1, data.shape[1])

    assert res.sumSquares.shape == (1, data.shape[1])

    assert res.sumSquaresCentered.shape == (1, data.shape[1])

    assert res.mean.shape == (1, data.shape[1])

    assert res.secondOrderRawMoment.shape == (1, data.shape[1])

    assert res.variance.shape == (1, data.shape[1])

    assert res.standardDeviation.shape == (1, data.shape[1])

    assert res.variation.shape == (1, data.shape[1])



    return res





if __name__ == "__main__":

    res = main()

    # print results

    print("\nMinimum:\n", res.minimum)

    print("\nMaximum:\n", res.maximum)

    print("\nSum:\n", res.sum)

    print("\nSum of squares:\n", res.sumSquares)

    print("\nSum of squared difference from the means:\n", res.sumSquaresCentered)

    print("\nMean:\n", res.mean)

    print("\nSecond order raw moment:\n", res.secondOrderRawMoment)

    print("\nVariance:\n", res.variance)

    print("\nStandard deviation:\n", res.standardDeviation)

    print("\nVariation:\n", res.variation)

    print('All looks good!')

Intel daal4py demo运行过程

猜你喜欢