Python——序列化模块

1. 序列化与反序列化

序列化：数据类型-->字符串
反序列化：字符串-->数据类型

2. 序列化模块

模块	重要程度	简单介绍
json	*****	通用的序列化格式(各种编程语言都用)
pickle	****	所有的python数据类型都可以通过pickle转化为字符串
shelve	***	使用句柄直接操作，很方便

（1）json　

　优点：通用的序列化格式
　　缺点：只有很少一部分数据类型可以通过json转化为字符串

　　json —— 数字、字符串、列表、字典、元祖(被当作列表处理)

　　<1> 直接操作内存中的数据类型——dumps loads

import json
dic = {'k1':'v1'}
print(type(dic),dic)    # <class 'set'> {'k1', 'v1'}

# dumps序列化方法
str_d = json.dumps(dic) #序列化
print(type(str_d),str_d)    # <class 'str'> {"k1": "v1"}
# json 本身是''   所以其内部字符串都用 ""       举例：{"k1": "v1"}
# loads反序列化方法
dic_d = json.loads(str_d)   #反序列化
print(type(dic_d),dic_d)    #<class 'dict'> {'k1': 'v1'}

View Code

　　 <2> 和文件相关的操作——dump load

# dump的参数ensure_ascii

import json
dic = {1:'中国',2:'b'}
f = open('file','w',encoding='utf-8')
# dump方法
json.dump(dic,f)        # 成功写入文件,ensure_ascii默认为True {"1": "\u4e2d\u56fd", "2": "b"}
json.dump(dic,f,ensure_ascii=False)        # 成功写入文件 {'1': '中国', '2': 'b'}
f.close()
# load方法
f = open('file','r',encoding='utf-8')
res = json.load(f)      # 成功读取文件 {'1': '中国', '2': 'b'}
f.close()
print(type(res),res)     # <class 'dict'> {'1': '中国', '2': 'b'}

View Code

# 一次性写进去dump，再一次性读出来load

import json
dic = {1:'a',2:'b'}
f = open('file','w',encoding='utf-8')
# dump方法
json.dump(dic,f)        # 成功写入文件 {"1": "a", "2": "b"}
f.close()
# load方法
f = open('file','r')
res = json.load(f)      # 成功读取文件 {'1': 'a', '2': 'b'}
f.close()
print(type(res),res)     # <class 'dict'> {'1': 'a', '2': 'b'}

View Code

# 一次写进去一行dumps，再一行一行读出来loads

import json

l = [{'k1':111},{'k2':1111},{'k3':11}]
f = open('f','w')

for dic in l:
    str_dic = json.dumps(dic)
    f.write(str_dic+'\n')
f.close()

f = open('f')
import json
l = []
for line in f:
    dic = json.loads(line.strip())     # strip去空格，去换行符
    l.append(dic)
f.close()
print(l)

View Code

（2）pickle

　　优点：所有的python数据类型都可以通过pickle转化为字符串
　　缺点：pickle序列化的内容只有python能理解；且部分反序列化依赖python代码

　　<1> 直接操作内存中的数据类型——dumps loads

import pickle

dic = {'k1':'v1'}
print(type(dic),dic)    # <class 'set'> {'k1', 'v1'}

# dumps序列化方法
str_dic = pickle.dumps(dic) #序列化
print(type(str_dic),str_dic)    # 一串二进制内容 <class 'bytes'> b'\x80\x03}q\x00X\x02\x00\x00\x00k1q\x01X\x02\x00\x00\x00v1q\x02s.'

# loads反序列化方法
dic_d = pickle.loads(str_dic)   #反序列化
print(type(dic_d),dic_d)    # 字典 <class 'dict'> {'k1': 'v1'}

View Code

　　<2> 和文件相关的操作——dump load 【文件操作形式要加b，因为是bytes类型的】

import pickle
import time

struct_time = time.localtime(1000000000)
print(struct_time)
f = open('pickle_file','wb')
# dump方法
pickle.dump(struct_time,f)        # 成功写入文件,time.struct_time(tm_year=2001, tm_mon=9, tm_mday=9, tm_hour=9, tm_min=46, tm_sec=40, tm_wday=6, tm_yday=252, tm_isdst=0)

f.close()
# load方法
f = open('pickle_file','rb')
struct_time2 = pickle.load(f)
print(struct_time2.tm_year)     # 2001
f.close()

View Code

　　<3> pickle 可以分步dump和load，json不可以

import pickle
import time

struct_time1 = time.localtime(1000000000)
struct_time2 = time.localtime(1000000000)
print(struct_time1)     # time.struct_time(tm_year=2001, tm_mon=9, tm_mday=9, tm_hour=9, tm_min=46, tm_sec=40, tm_wday=6, tm_yday=252, tm_isdst=0)
print(struct_time2)     # time.struct_time(tm_year=2001, tm_mon=9, tm_mday=9, tm_hour=9, tm_min=46, tm_sec=40, tm_wday=6, tm_yday=252, tm_isdst=0)
f = open('pickle_file','wb')
# dump方法
pickle.dump(struct_time1,f)        # 写入文件
pickle.dump(struct_time2,f)        # 写入文件
f.close()
# load方法
f = open('pickle_file','rb')
struct_time1 = pickle.load(f)
struct_time2 = pickle.load(f)
print(struct_time1.tm_year)     # 2001
print(struct_time2.tm_year)     # 2001
f.close()

View Code

（3）shelve

　　优点：序列化句柄，使用句柄直接操作，很方便
　　缺点：新模块，有小问题，不够完善

import shelve

f = shelve.open('shelve_file')
f['key'] = {'int':10,'float':9.5,'string':'Sample data'}    # 直接对文件句柄操作，就可以存入数据
f.close()

f = shelve.open('shelve_file')
existing = f['key']     # 取出数据时只需要直接用key获取，key不存在就报错
f.close()
print(existing)     #{'int': 10, 'float': 9.5, 'string': 'Sample data'}

View Code

　　<1> 不支持多个应用同一时间往同一个DB进行写操作

import shelve

f = shelve.open('shelve_file','r')  # 只进行读操作时，通过只读方式打开
existing = f['key']
f.close()
print(existing)     # {'int': 10, 'float': 9.5, 'string': 'Sample data'}

View Code

　　<2> 小问题：只读的时候可以修改

import shelve

f = shelve.open('shelve_file','r')  # 只进行读操作时，通过只读方式打开
existing = f['key']
print(existing)     # 100
# f['key']= 100
f['key']= 111
f.close()

f = shelve.open('shelve_file','r')  # 只进行读操作时，通过只读方式打开
existing2 = f['key']
f.close()
print(existing2)     # 111

View Code

　　<3> writeback参数
　　　　优点：减少出错概率，且让对象的持久化更加透明
　　　　缺点：增加额外的内存消耗和时间浪费
　　　　默认情况下不会记录持久化对象的修改，如下f1
　　　　需要修改默认参数，否则对象的修改不会保存,如下f2

import shelve

f = shelve.open('shelve_file')
f['key'] = {'int':10,'float':9.5,'string':'Sample data'}    # 直接对文件句柄操作，就可以存入数据
f.close()

# 不设置writeback参数
f1 = shelve.open('shelve_file')
print(f1['key'])    # {'int': 10, 'float': 9.5, 'string': 'Sample data'}
f1['key']['new_value'] = 'this was not here before'
print(f1['key'])    # {'int': 10, 'float': 9.5, 'string': 'Sample data'}
f1.close()

# 设置writeback参数
f2 = shelve.open('shelve_file',writeback=True)
print(f2['key'])    # {'int': 10, 'float': 9.5, 'string': 'Sample data'}
f2['key']['new_value'] = 'this was not here before'
print(f2['key'])    # {'int': 10, 'float': 9.5, 'string': 'Sample data', 'new_value': 'this was not here before'}
f2.close()

View Code