【实践】数据挖掘DM课程课业打卡实验2 朴素贝叶斯分类器

  • 一、实验目的
  • 二、实验内容
  • 1、(难度1)编程实现朴素贝叶斯分类器的训练。假设数据只涉及连续属性。
  • 2、(难度2)编程实现朴素贝叶斯分类器的训练和分类。
  • 3、(难度2)编程实现朴素贝叶斯分类器的训练。
  • 4、(难度3)编程实现朴素贝叶斯分类器的训练和分类。




贝叶斯分类器python 贝叶斯分类器实验报告_类属性

一、实验目的

(1)理解朴素贝叶斯分类器的工作原理。
(2)编程实现朴素贝叶斯分类器。

二、实验内容

1、(难度1)编程实现朴素贝叶斯分类器的训练。假设数据只涉及连续属性。

测试代码如下:

load('trainingData.mat');
load('testingData.mat');
[Params,prior,AllLabels] = NaiveBayesTrain1(trAttr, trLabels);

(将执行正确的代码粘贴在此处,核心代码要求有注释)

function [Params,prior,AllLabels] = NaiveBayesTrain(trAttr, trLabels)
%NaiveBayesTrain只处理连续分类,不处理分类属性;

AllLabels = unique(trLabels);
numClass = length(AllLabels);    %取得所有类标号的种类个数;

for i =1:numClass
    ind=find(trLabels==AllLabels(i));
    Ind4Class{i,1}=ind;
    prior(i)=length(ind)/length(trLabels);
end

numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];%初始化参数cell;

for AttrInd =1:numAttr
    AttrVals=trAttr(:,AttrInd);
    for i=1:numClass
        Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
    end
end
end

function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass);   %求得均值;
Params1(2)=var(AttrVals4ThisClass);    %求得方差;

end

贝叶斯分类器python 贝叶斯分类器实验报告_贝叶斯分类器_02

2、(难度2)编程实现朴素贝叶斯分类器的训练和分类。

假设数据只涉及连续属性。

测试代码如下:

load('trainingData.mat');
load('testingData.mat');
[Labels]=bys_classify1(trAttr, trLabels,tstAttr);
acc=sum(Labels ==tstLabels)/length(Labels);

(将执行正确的代码粘贴在此处,核心代码要求有注释)

% load('trainingData.mat');
% load('testingData.mat');
% [Labels]=bys_classify1(trAttr, trLabels,tstAttr);
% acc=sum(Labels ==tstLabels)/length(Labels);
 
%bys_classify1实现朴素贝叶斯分类器的训练和分类。数据只涉及连续属性。
function [Labels]=bys_classify1(trAttr,trLabels,tstAttr)
[Params,prior,AllLabels]=NaiveBayesTrain(trAttr,trLabels);
N=size(tstAttr,1);
Labels=zeros(N,1);
for i=1:N
    tstAttrSample=tstAttr(i,:);
    post=NaiveBayesPredict(Params,prior,tstAttrSample);
    [maxValue,index]=max(post);
    Labels(i)=AllLabels(index);
end
end
 
function post=NaiveBayesPredict(Params,prior,tstAttr)%%分类
numClass=length(prior);
numAttr=length(tstAttr);
post=[];
for i=1;numClass
    p_AttrCond=1;
    for AttrInd=1;numAttr
        AttrVals=tstAttr(AttrInd);
            mean=Params{i,AttrInd}(1);
            varriance=Params{i,AttrInd}(2);
            sig=sqrt(varriance);
        end
    end
    post(i)=p_AttrCond*prior(i);
end
 
 
function [Params,prior,AllLabels]=NaiveBayesTrain(trAttr,trLabels)%%训练
AllLabels=unique(trLabels);
numClass=length(AllLabels);
for i=1:numClass
    ind=find(trLabels==AllLabels(i));
    Ind4Class{i,1}=ind;
    prior(i)=length(ind)/length(trLabels);
end
 
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];
 
for AttrInd=1:numAttr
    AttrVals=trAttr(:,AttrInd);
    for i=1:numClass
       Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
    end
end
end
 
 
function Params1=GetParams4ContiAttr(AttrVals,ind)
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass);
Params1(2)=var(AttrVals4ThisClass);
end

贝叶斯分类器python 贝叶斯分类器实验报告_贝叶斯分类器python_03

3、(难度2)编程实现朴素贝叶斯分类器的训练。

假设数据既涉及连续属性,也涉及分类属性。

测试代码如下:

refund=[1 2 2 1 2 2 1 2 2 2]';
mar=[1 2 1 2 3 2 3 1 2 1]';
inc=[125,100,70,120,95,60,220,85,75,90]';
trAttr =[refund,mar,inc];
trLabels=[1 1 1 1 2 1 1 2 1 2]';
tstAttr=[2,2,120];
[type,Params,prior,AllLabels] = NaiveBayesTrain2(trAttr, trLabels);

(将执行正确的代码粘贴在此处,核心代码要求有注释)

% refund=[1 2 2 1 2 2 1 2 2 2]';
% mar=[1 2 1 2 3 2 3 1 2 1]';
% inc=[125,100,70,120,95,60,220,85,75,90]';
% trAttr =[refund,mar,inc];
% trLabels=[1 1 1 1 2 1 1 2 1 2]';
% tstAttr=[2,2,120];
% [type,Params,prior,AllLabels] = NaiveBayesTrain2(trAttr, trLabels);
 
function [type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels)
AllLabels=unique(trLabels);
numClass=length(AllLabels);  %取得所有类标号的种类个数
 
for i=1:numClass
    ind=find(trLabels == AllLabels(i));
    Ind4Class{i,1}=ind;
    prior(i)=length(ind)/length(trLabels);
end
 
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];  %初始化参数cell;
 
for AttrInd=1:numAttr
    AttrVals=trAttr(:,AttrInd);
    numAttrValsClass=length(unique(AttrVals));%取得所有属性标号的种类个数;
    maxAttrVals=max(unique(AttrVals));%取得当前的最大属性标号;
    
    if(isempty(find(rem(AttrVals,1)~=0)) && numAttrValsClass<10 && maxAttrVals ==numAttrValsClass)
        %若属性为分类属性K种取值情况,要求取值为1,2,....k;
        %属性为分类属性,type=0;
        type(AttrInd)=0;
        for i=1:numClass
            Params{i,AttrInd}=GetParams4CategoricalAttr(AttrVals,Ind4Class{i,1});
        end
        
    else
        type(AttrInd)=1;
        
        for i=1:numClass
            Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
        end
    end
    
end
end
 
 
function Params1=GetParams4ContiAttr(AttrVals,ind)
 
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass);   %求得均值;
Params1(2)=var(AttrVals4ThisClass);    %求得方差;
 
end
 
function Params1=GetParams4CategoricalAttr(AttrVals,ind)
Params1=[];
AllAttrVals=unique(AttrVals);
numAttrValsClass=length(AllAttrVals);
AttrVals4ThisClass=AttrVals(ind);
nj=length(ind);
 
for i=1:numAttrValsClass
    nij=length(find(AttrVals4ThisClass==AllAttrVals(i)));
    Params1(i)=nij/nj;
    
end
 
end

贝叶斯分类器python 贝叶斯分类器实验报告_贝叶斯分类器python_04

4、(难度3)编程实现朴素贝叶斯分类器的训练和分类。

(将执行正确的代码粘贴在此处,核心代码要求有注释)

function [Labels]=bys_classify(trAttr,trLabels,tstAttr)
[type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels);
N=size(tstAttr,1);
Labels=zeros(N,1);
 
for i=1:N
    tstAttrSample=tstAttr(i,:);
    post=NaiveBayesPredict(type,Params,prior,tstAttrSample);
    [maxValue,index]=max(post);
    Labels(i)=AllLabels(index);
end
end
 
 
 
function post=NaiveBayesPredict(type,Params,prior,tstAttr)
numClass=length(prior);   %取得所有类标号的种类个数;
numAttr=length(tstAttr);
post=[];
for i=1:numClass
    p_AttrCond=1;
    for AttrInd=1:numAttr
        AttrVals=tstAttr(AttrInd);
        if(type(AttrInd)==0)
            p=Params{i,AttrInd}(AttrVals);    %属性编号是1,2;属性编号必须连续且从1开始;
            p_AttrCond=p_AttrCond*p;
            
            %属性为连续属性,type=1;
        else
            mean=Params{i,AttrInd}(1);
            varriance=Params{i,AttrInd}(2);
            sig=sqrt(varriance);
            p=(1/(sqrt(2*pi)*sig))*exp(-(AttrVals-mean)^2/(2*sig^2));
            p_AttrCond=p_AttrCond*p;
        end
    end
    
    post(i)=p_AttrCond*prior(i);
    
end
 
end
 
 
function [type,Params,prior,AllLabels]=NaiveBayesTrain2(trAttr,trLabels)
AllLabels=unique(trLabels);
numClass=length(AllLabels);  %取得所有类标号的种类个数
 
for i=1:numClass
    ind=find(trLabels == AllLabels(i));
    Ind4Class{i,1}=ind;
    prior(i)=length(ind)/length(trLabels);
end
 
numAttr=size(trAttr,2);
Params{numClass,numAttr}=[];  %初始化参数cell;
 
for AttrInd=1:numAttr
    AttrVals=trAttr(:,AttrInd);
    numAttrValsClass=length(unique(AttrVals));%取得所有属性标号的种类个数;
    maxAttrVals=max(unique(AttrVals));%取得当前的最大属性标号;
    
    if(isempty(find(rem(AttrVals,1)~=0)) && numAttrValsClass<10 && maxAttrVals ==numAttrValsClass)
        %若属性为分类属性K种取值情况,要求取值为1,2,....k;
        %属性为分类属性,type=0;
        type(AttrInd)=0;
        for i=1:numClass
            Params{i,AttrInd}=GetParams4CategoricalAttr(AttrVals,Ind4Class{i,1});
        end
        
    else
        type(AttrInd)=1;
        
        for i=1:numClass
            Params{i,AttrInd}=GetParams4ContiAttr(AttrVals,Ind4Class{i,1});
        end
    end
    
end
end
 
function Params1=GetParams4ContiAttr(AttrVals,ind)
 
AttrVals4ThisClass=AttrVals(ind);
Params1(1)=mean(AttrVals4ThisClass);   %求得均值;
Params1(2)=var(AttrVals4ThisClass);    %求得方差;
 
end
 
function Params1=GetParams4CategoricalAttr(AttrVals,ind)
Params1=[];
AllAttrVals=unique(AttrVals);
numAttrValsClass=length(AllAttrVals);
AttrVals4ThisClass=AttrVals(ind);
nj=length(ind);
 
for i=1:numAttrValsClass
    nij=length(find(AttrVals4ThisClass==AllAttrVals(i)));
    Params1(i)=nij/nj;
    
end
 
end

贝叶斯分类器python 贝叶斯分类器实验报告_数据挖掘_05

Ending!