Python计算信息熵代码

#coding=utf-8
import pandas as pd
import numpy as np
import math

data = pd.read_csv('useriddayu1.csv')
df=data.loc[()]
data.shape  #(34250, 8)
df.shape #(34250, 8)

#df=df.head()
grouped=df.groupby(df['userid'])

def getIndexSize(df):
    grouped=df.groupby(df['userid'])
    count=0
    for n,g in grouped:#这一行是n,g两个变量从grouped的返回值里面取元素
        count+=1#这个等价于count=count+1
    return count

results = pd.DataFrame(index=range(getIndexSize(df)),columns=df.columns)#存放处理后的数据结果

ind=0
for id,pp in grouped:
    #print(id,"\n",pp)
    lenning=len(pp) 
    for u in df.columns:
        print("===",u)
        if(u=='userid'):
            results.iloc[ind]['userid']=id
            continue
        statics=pp[u].value_counts()
        sum=0.0
        for i  in statics:
            p=(float)(i/lenning)
            sum-=p*math.log(p)
        print("sum:  ",sum)
        results.iloc[ind][u]=sum
    ind+=1

print("wowowowowowowow")
#print(results)
results.to_csv('10.csv',columns=df.columns,index=False)

猜你喜欢

转载自blog.csdn.net/youxinyuchu/article/details/83821612
今日推荐