利用Python读取stata文件,并防止中文乱码。其中 load_large_dta用于读取stata文件,decode_str用于编译中文字符串。
import pandas as pd
import numpy as np
import os
from pyecharts import Geo, Map
def load_large_dta(fname):
import sys
reader = pd.read_stata(fname,iterator=True)
df = pd.DataFrame()
try:
chunk = reader.get_chunk(100*1000)
while len(chunk) > 0:
df = df.append(chunk, ignore_index=True)
chunk = reader.get_chunk(100*1000)
print ('.')
sys.stdout.flush()
except (StopIteration, KeyboardInterrupt):
pass
print('\nloaded {} rows'.format(len(df)))
return df
def deconde_str(string):
"""
解码 dta文件防止 乱码
"""
re = string.encode('latin-1').decode('utf-8')
return re
# example
df_2002_path="C:/Users/Administrator/Desktop/企业经纬度数据/final_data_2002.dta"
df_2002=load_large_dta(df_2002_path)
解码字符串实例: