| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 1679 人关注过本帖
标题:ID3算法python实现的问题
只看楼主 加入收藏
大黄鸡
Rank: 1
等 级:新手上路
帖 子:1
专家分:0
注 册:2018-4-26
收藏
 问题点数:0 回复次数:0 
ID3算法python实现的问题
程序代码:
from numpy import *
import math
import copy
import pickle as pp
class ID3DTree(object):
    def __init__(self):
        self.tree = {}
        self.dataSet = []
        self.labels = []

    def loadDataSet(self,path,labels):
        recordlist = []
        fp = open(path,"r")
        content = fp.read()
        fp.close()
        rowlist = content.splitlines()
        recordlist = [row.split("\t") for row in rowlist if row.strip()]
        self.dataSet = recordlist
        self.labels = labels

    def train(self):
        labels = copy.deepcopy(self.labels)
        self.tree = self.buildTree(self.dataSet,labels)

    def buildTree(self,dataSet,labels):
        catelist = [data[-1] for data in dataSet]
        if catelist.count(catelist[0]) == len(catelist):
            return catelist[0]
        if len(dataSet[0]) == 1:
            return self.maxCate(catelist)
        besfFeat = self.getBestFeat(dataSet)
        bestFeatLabel = labels[besfFeat]
        tree = {bestFeatLabel:{}}
        del(labels[besfFeat])
        uniqueVals = set([data[bestFeat] for data in dataSet])
        for value in uniqueVals:
            subLabels = labels[:]
            splitDataSet = self.splitDataSet(dataSet,besfFeat,value)
            subTree = self.buildTree(splitDataSet,subLabels)
            tree[bestFeatLabel][value] = subTree
        return tree

    def maxCate(self,catelist):
        items = dict([(catelist.count(i),i) for i in catelist])
        return items[max(items.keys())]

    def getBestFeat(self,dataSet):
        numFeatures = len(dataSet[0]) - 1
        baseEntropy = (dataSet)
        bestInfoGain = 0.0;
        besfFeature = -1
        for i in range(numFeatures):
            uniqueVals = set([data[i] for data in dataSet])
            newEntropy = 0.0
            for value in uniqueVals:
                subDataSet = self.splitDataSet(dataSet,i,value)
                prob = len(subDataSet)/float(len(dataSet))
                newEntropy += prob * (subDataSet)
            infoGain = baseEntropy - newEntropy
            if (infoGain > bestInfoGain):
                bestInfoGain = infoGain
                besfFeature = id
        return besfFeature

    def computeEntropy(self,dataSet):
        datalen = float(len(dataSet))
        catelist = [data[-1] for data in dataSet]
        items = dict([(i,catelist.count(i)) for i in catelist])
        infoEntropy = 0.0
        for key in items:
            prob = float(items[key])/datalen
            infoEntropy -= prob * math.log(prob,2)
        return infoEntropy

    def splitDataSet(self,dataSet,axis,value):
        rtnList = []
        for featVec in dataSet:
            if featVec[axis] == value:
                rFeatVec = featVec[:axis]
                rFeatVec.extend(featVec[axis+1:])
                rtnList.append(rFeatVec)
        return rtnList
图片附件: 游客没有浏览图片的权限,请 登录注册

这个是哪里错误了,怎么解决啊?
搜索更多相关主题的帖子: def dataSet data for return 
2018-04-26 16:20
快速回复:ID3算法python实现的问题
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.020400 second(s), 11 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved