python实现朴素bayes算法-白红宇

python实现朴素bayes算法

阅读量：7081 次

发布时间：2019-06-28

本文共 2315 字，大约阅读时间需要 7 分钟。

import numpy as np

from sklearn.utils import check_arrays

from sklearn.cross_validation import train_test_split

from sklearn import datasets

import os

os.system("clear")

##########################

class myBayes:

def __init__(self):

self.Px={}

self.Py={}

self.nx=0

self.lx=0

self.result=None

def fit(self, X, y):

Py={}

Px={}

k=len(np.unique(y))

for i in list(set(y)):

Py[i]=(y.tolist().count(i)+1)*1.0/(len(y)+k)

n_row, n_col=X.shape

for i in range(n_col):

Px.setdefault(i, {})

xylist=zip(X[:,i],y)

s=len(np.unique(X[:,i]))

for xy in list(set(xylist)):

Px[i][xy]=(xylist.count(xy)+1)*1.0/(Py[xy[1]]*(len(y)+k)+s-1)

self.Py=Py

self.Px=Px

self.nx=n_col

#print 'y',self.Py

#print 'x',self.Px

def predict(self, test_X):

tX=np.array(test_X)

ts=tX.shape

if len(ts)==0 :

return None

elif len(ts)==1 :

if len(tX)!=self.nx:

return None

else:

result={}

for i in self.Py.keys():

py=self.Py[i]

for j in range(ts[0]):

py=py*self.Px[j].get((tX[j],i),1-sum([Px[j][t] for t in Px[j].keys() if t[1]==i]))

result[py]=i

self.result=np.array(result[max(result.keys())])

return self.result

elif len(ts)==2:

if ts[1]!=self.nx:

return None

else:

result_list=[]

for x in tX:

#print x

result={}

for i in self.Py.keys():

py=self.Py[i]

for j in range(ts[1]):

py=py*self.Px[j].get((x[j],i),0)

result[py]=i

#print result

result_list.append(result[max(result.keys())])

self.result=np.array(result_list)

return self.result

#x1=[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3]

#x2=['s','m','m','s','s','s','m','m','l','l','l','m','m','l','l']

#y=[0,0,1,1,0,0,0,1,1,1,1,1,1,1,0]

#x2dict={'s':1,'m':2,'l':3}

#X=np.array(zip(x1,[x2dict[x] for x in x2]))

#y=np.array(y)

#print X,y

iris=datasets.load_iris()

X=iris.data

y=iris.target

trainX,testX,trainy,testy=train_test_split(X,y,test_size=0.2)

clf=myBayes()

clf.fit(trainX,trainy)

predicted=clf.predict(testX)

print testy

print predicted

print np.mean(testy==predicted)

###########################################

输出结果：

yuanzhen@yuanzhen-ThinkPad-X121e:~/P_script$ python mybayes.py

[2 0 2 0 2 0 2 1 2 0 0 0 1 2 2 1 2 0 2 1 2 2 2 1 1 2 1 1 0 2]

[2 0 2 0 2 0 2 1 2 0 0 2 2 2 2 1 2 0 2 1 2 1 2 2 1 2 1 1 0 2]

0.866666666667

[0 1 1 0 1 2 1 1 0 0 0 1 1 0 2 1 2 0 1 2 0 2 0 2 2 2 2 2 0 0]

[0 1 2 0 1 2 1 1 0 0 0 1 2 0 1 1 1 0 1 2 2 2 0 2 2 2 2 1 2 0]

0.766666666667

结果显示预测并不稳定

转载于:https://my.oschina.net/lCQ3FC3/blog/841532

你可能感兴趣的文章

SQL Server2008 忘记sa密码解决办法

查看>>

[LeetCode]24. Swap Nodes in Pairs两两交换链表中的节点

查看>>

linux修改系统时间

查看>>

什么是p12证书？ios p12证书怎么获取？

查看>>

如何在Python上实现用文本进度条体现π的计算过程

查看>>

Samba服务器搭建，匿名访问，用户密码访问

面向对象二次整理(基础,属性引用,方法引用.绑定方法)

查看>>

Quick-cocos2d-x3.3 Study （十）--------- 添加动画