.net随笔-vb.net 机器学习ml.net情绪分析(3)

版权声明:本博客所有文章版权归博主刘兴所有,转载请注意来源 https://blog.csdn.net/AI_LX/article/details/89186549

(11)评估模型,保存模型

Imports System
Imports System.Collections
Imports System.IO
Imports System.Linq
Imports Microsoft.Data.DataView
Imports Microsoft.ML
Imports Microsoft.ML.Data
Imports Microsoft.ML.Trainers
Imports Microsoft.ML.Transforms.Text

Module Program
    Private ReadOnly _dataPath As String = Path.Combine(Environment.CurrentDirectory, "data", "yelp_labelled.txt")
    Private ReadOnly _modelPath As String = Path.Combine(Environment.CurrentDirectory, "data", "model.zip")

    Public ReadOnly Property DataPath As String
        Get
            Return _dataPath
        End Get
    End Property
    Public ReadOnly Property ModelPath As String
        Get
            Return _modelPath
        End Get
    End Property

    Sub Main(args As String())
        '创建上下文ML作业
        Dim mlConText As New MLContext
        Dim splitDataView As TrainCatalogBase.TrainTestData = LoadData(mlConText)
        Dim model As ITransformer = BuildAndTrainModel(mlConText, splitDataView.TrainSet)
        Evaluate(mlConText, model, splitDataView.TestSet)
    End Sub
    Public Function BuildAndTrainModel(mlContext As MLContext, splitTrainSet As IDataView) As ITransformer
        '将文本列特征化为机器学习算法使用的名为Features的数值向量的FeaturizeText,再将决策树算法追加到管道
        Dim pipleline = mlContext.Transforms.Text.FeaturizeText(outputColumnName:=DefaultColumnNames.Features, inputColumnName:=NameOf(SentimentData.SentimentText)).Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves:=50, numTrees:=50, minDatapointsInLeaves:=20))
        Dim model = pipleline.Fit(splitTrainSet)
        Return model
    End Function
    Public Function LoadData(mlContext As MLContext) As TrainCatalogBase.TrainTestData
        '加载数据,将数据集分为训练集与测试集并返回
        '加载数据集通过基本的数据管道dataview
        Dim dataView As IDataView = mlContext.Data.LoadFromTextFile(Of SentimentData)(_dataPath, hasHeader:=False)
        '拆分数据集进行模型训练和测试,20%的测试集
        Dim splitDataView As TrainCatalogBase.TrainTestData = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction:=0.2)
        Return splitDataView
    End Function
    Public Sub Evaluate(mlContext As MLContext, model As ITransformer, splitTestSet As IDataView)
        '加载测试数据集,创建分类计算器,评估模型并创建指标,显示效果指标
        Console.WriteLine("===用测试数据评估模型正确率===")
        '返回预测
        Dim predictions As IDataView = model.Transform(splitTestSet)
        '计算预测模型质量指标
        Dim metrics As CalibratedBinaryClassificationMetrics = mlContext.BinaryClassification.Evaluate(predictions, "label")
        '显示模型验证指标
        Console.WriteLine("正确率:" & metrics.Accuracy)
        Console.WriteLine("AUC:" & metrics.Auc)
        Console.WriteLine("F1Score:" & metrics.F1Score)
        SaveModelAsFile(mlContext, model)

    End Sub

    Private Sub SaveModelAsFile(mlContext As MLContext, model As ITransformer)
        Throw New NotImplementedException()
        Using fs As New FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
            mlContext.Model.Save(model, fs)
            Console.WriteLine("模型存入" & ModelPath)
        End Using
    End Sub
End Module

猜你喜欢

转载自blog.csdn.net/AI_LX/article/details/89186549