经常参加培训讲座。发现最受欢迎的讲座之一是"ML.NET和AutoML的介绍"。ML.NET是一个代码库,可用于创建经典(非神经网络)机器学习预测模型。AutoML 是命令行工具中的非正式术语,可自动为您生成ML.NET代码。
以下是我使用的两个数据文件,演示ML.NET程序的源代码,以及 AutoML 的 shell 命令。目标是从年龄、工作类型、年收入和工作满意度中预测一个人的性别。
文件:employees_norm_train.tsv
文件:employees_norm_test.tsv
文件:GenderMLdotNETProgram.cs
using System;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace GenderMLdotNET
{
class GenderMLdotNETProgram
{
static void Main(string[] args)
{
Console.WriteLine("\nBegin ML.NET gender demo \n");
MLContext mlc = new MLContext(seed: 1);
// 1. load data and create data pipeline
Console.WriteLine("\nLoading norm data into memory \n");
string trainDataPath =
"..\\..\\..\\Data\\employees_norm_train.tsv";
IDataView trainData =
mlc.Data.LoadFromTextFile
(trainDataPath, '\t', hasHeader: true);
var a = mlc.Transforms.Categorical.OneHotEncoding(new[]
{ new InputOutputColumnPair("job", "job") });
var b = mlc.Transforms.Categorical.OneHotEncoding(new[]
{ new InputOutputColumnPair("satisfac", "satisfac") });
var c = mlc.Transforms.Concatenate("Features", new[]
{ "age", "job", "income", "satisfac" });
var dataPipe = a.Append(b).Append(c);
Console.WriteLine("Creating logistic regression model");
var options =
new LbfgsLogisticRegressionBinaryTrainer.Options()
{
LabelColumnName = "isMale",
FeatureColumnName = "Features",
MaximumNumberOfIterations = 100,
OptimizationTolerance = 1e-8f
};
var trainer =
mlc.BinaryClassification.Trainers.
LbfgsLogisticRegression(options);
var trainPipe = dataPipe.Append(trainer);
Console.WriteLine("Starting training");
ITransformer model = trainPipe.Fit(trainData);
Console.WriteLine("Training complete");
// 3. evaluate model
IDataView predictions = model.Transform(trainData);
var metrics = mlc.BinaryClassification.
EvaluateNonCalibrated(predictions, "isMale", "Score");
Console.Write("Model accuracy on training data = ");
Console.WriteLine(metrics.Accuracy.ToString("F4") + "\n");
// 4. use model
ModelInput X = new ModelInput();
X.Age = 0.32f; X.Job = "mgmt"; X.Income = 0.4900f;
X.Satisfac = "medium";
var pe = mlc.Model.CreatePredictionEngine(model);
var Y = pe.Predict(X);
Console.Write("Set age = 32, job = mgmt, income = $49K, ");
Console.WriteLine("satisfac = medium");
Console.Write("Predicted isMale : ");
Console.WriteLine(Y.PredictedLabel);
Console.WriteLine("\nEnd ML.NET demo ");
Console.ReadLine();
} // Main
} // Program
class ModelOutput
{
[ColumnName("predictedLabel")]
public bool PredictedLabel { get; set; }
[ColumnName("score")]
public float Score { get; set; }
}
class ModelInput
{
[ColumnName("isMale"), LoadColumn(0)]
public bool IsMale { get; set; }
[ColumnName("age"), LoadColumn(1)]
public float Age { get; set; }
[ColumnName("job"), LoadColumn(2)]
public string Job { get; set; }
[ColumnName("income"), LoadColumn(3)]
public float Income { get; set; }
[ColumnName("satisfac"), LoadColumn(4)]
public string Satisfac { get; set; }
}
} // ns
AutoML命令:
mlnet auto-train ^
--task binary-classification ^
--dataset ".\Data\employees_norm_train.tsv" ^
--test-dataset ".\Data\employees_norm_test.tsv" ^
--label-column-name isMale ^
--max-exploration-time 60 ^
--name PredictGenderAutoML