3.31课堂

今日内容：
六 json&pickle模块
七 shelve模块（了解）
八 xml模块（了解）

九 configparser模块

十 hashlib模块

subprocess模块.py

课外知识：用pycharm装模块的步骤

六 json&pickle模块
# 1、什么是序列化&反序列化
# 内存中的数据类型---->序列化---->特定的格式（json格式或者pickle格式）
# 内存中的数据类型<----反序列化<----特定的格式（json格式或者pickle格式）

# 土办法：
# {'aaa':111}--->序列化str({'aaa':111})----->"{'aaa':111}"
# {'aaa':111}<---反序列化eval("{'aaa':111}")<-----"{'aaa':111}"

# 2、为何要序列化
# 序列化得到结果=>特定的格式的内容有两种用途
# 1、可用于存储=》用于存档
# 2、传输给其他平台使用=》跨平台数据交互
# python java
# 列表特定的格式数组

# 强调：
# 针对用途1的特定一格式：可是一种专用的格式=》pickle只有python可以识别
# 针对用途2的特定一格式：应该是一种通用、能够被所有语言识别的格式=》json

# 3、如何序列化与反序列化
# 示范1
# import json
# # 序列化
# json_res=json.dumps([1,'aaa',True,False])
# # print(json_res,type(json_res)) # "[1, "aaa", true, false]"

# # 反序列化
# l=json.loads(json_res)
# print(l,type(l))

# 示范2：
import json

# 序列化的结果写入文件的复杂方法
# json_res=json.dumps([1,'aaa',True,False])
# # print(json_res,type(json_res)) # "[1, "aaa", true, false]"
# with open('test.json',mode='wt',encoding='utf-8') as f:
# f.write(json_res)

# 将序列化的结果写入文件的简单方法
# with open('test.json',mode='wt',encoding='utf-8') as f:
# json.dump([1,'aaa',True,False],f)

# 从文件读取json格式的字符串进行反序列化操作的复杂方法
# with open('test.json',mode='rt',encoding='utf-8') as f:
# json_res=f.read()
# l=json.loads(json_res)
# print(l,type(l))

# 从文件读取json格式的字符串进行反序列化操作的简单方法
# with open('test.json',mode='rt',encoding='utf-8') as f:
# l=json.load(f)
# print(l,type(l))

pickle下也有load和dump方法

json 格式是字符串类型，必须是用双引号，没有单引号的形式

# json验证: json格式兼容的是所有语言通用的数据类型，不能识别某一语言的所独有的类型
# json.dumps({1,2,3,4,5})

# json强调：一定要搞清楚json格式，不要与python混淆
# l=json.loads('[1, "aaa", true, false]')
# l=json.loads("[1,1.3,true,'aaa', true, false]")
# print(l[0])

# 了解
# l = json.loads(b'[1, "aaa", true, false]')
# print(l, type(l))

# with open('test.json',mode='rb') as f:
# l=json.load(f)

# res=json.dumps({'name':'哈哈哈'})
# print(res,type(res))

# res=json.loads('{"name": "\u54c8\u54c8\u54c8"}')
# print(res,type(res))

# 在python解释器2.7与3.6之后都可以json.loads(bytes类型)，但唯独3.5不可以
>>> import json
>>> json.loads(b'{"a":111}')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/linhaifeng/anaconda3/lib/python3.5/json/__init__.py", line 312, in loads
s.__class__.__name__))
TypeError: the JSON object must be str, not 'bytes'

# 4、猴子补丁
# 比如用别人的模块，但是模块中的一些功能想要换成自己的，这个时候就需要打个补丁上去，也就是猴子补丁
# 在入口处打猴子补丁（bin下的start.py 文件）
# import json
# import ujson

# def monkey_patch_json():
# json.__name__ = 'ujson' # 这步做不做无关紧要
# 答补丁就是在给这两行内容打，ujson 比dumps运行快
# json.dumps = ujson.dumps
# json.loads = ujson.loads
#
# monkey_patch_json() # 在入口文件出运行

# import ujson as json # 这个操作不行

# 后续代码中的应用
# json.dumps()
# json.dumps()
# json.dumps()
# json.dumps()

# 5.pickle模块
import pickle
# 序列化
# res=pickle.dumps({1,2,3,4,5})
# print(res,type(res))

# 反序列化
# s=pickle.loads(res)
# print(s,type(s))

# coding:utf-8
import pickle

with open('a.pkl',mode='wb') as f:
# 一：在python3中执行的序列化操作如何兼容python2
# python2不支持protocol>2，默认python3中protocol=4
# 所以在python3中dump操作应该指定protocol=2
pickle.dump('你好啊',f,protocol=2)

with open('a.pkl', mode='rb') as f:
# 二：python2中反序列化才能正常使用
res=pickle.load(f)
print(res)

xml模块（了解）

xml的格式如下，就是通过<>节点来区别数据结构的:

<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>

xml协议在各个语言里的都是支持的，在python中可以用以下模块操作xml：
# print(root.iter('year')) #全文搜索
# print(root.find('country')) #在root的子节点找，只找一个
# print(root.findall('country')) #在root的子节点找，找所有

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root.tag)

#遍历xml文档
for child in root:
print('========>',child.tag,child.attrib,child.attrib['name'])
for i in child:
print(i.tag,i.attrib,i.text)

#只遍历year 节点
for node in root.iter('year'):
print(node.tag,node.text)
#---------------------------------------

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()

#修改
for node in root.iter('year'):
new_year=int(node.text)+1
node.text=str(new_year)
node.set('updated','yes')
node.set('version','1.0')
tree.write('test.xml')

#删除node
for country in root.findall('country'):
rank = int(country.find('rank').text)
if rank > 50:
root.remove(country)

tree.write('output.xml')

shelve模块（了解）
shelve模块比pickle模块简单，只有一个open函数，返回类似字典的对象，可读可写;key必须为字符串，而值可以是python所支持的数据类型

import shelve

f=shelve.open(r'sheve.txt')
# f['stu1_info']={'name':'egon','age':18,'hobby':['piao','smoking','drinking']}
# f['stu2_info']={'name':'gangdan','age':53}
# f['school_info']={'website':'http://www.pypy.org','city':'beijing'}

print(f['stu1_info']['hobby'])
f.close()

configparser模块
该模块用来加载某种格式配置文件的配置项，比settings用着方便

import configparser

config=configparser.ConfigParser()
config.read('test.ini')

# 1、获取sections
# print(config.sections())

# 2、获取某一section下的所有options
# print(config.options('section1'))

# 3、获取items
# print(config.items('section1'))

# 4、
# res=config.get('section1','user')
# print(res,type(res))

# res=config.getint('section1','age')
# print(res,type(res))

# res=config.getboolean('section1','is_admin')
# print(res,type(res))

# res=config.getfloat('section1','salary')
# print(res,type(res))

配置文件如下：

# 注释1
; 注释2

[section1]
k1 = v1
k2:v2
user=egon
age=18
is_admin=true
salary=31

[section2]
k1 = v1

读取

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')

#查看所有的标题
res=config.sections() #['section1', 'section2']
print(res)

#查看标题section1下所有key=value的key
options=config.options('section1')
print(options) #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']

#查看标题section1下所有key=value的(key,value)格式
item_list=config.items('section1')
print(item_list) #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]

#查看标题section1下user的值=>字符串格式
val=config.get('section1','user')
print(val) #egon

#查看标题section1下age的值=>整数格式
val1=config.getint('section1','age')
print(val1) #18

#查看标题section1下is_admin的值=>布尔值格式
val2=config.getboolean('section1','is_admin')
print(val2) #True

#查看标题section1下salary的值=>浮点型格式
val3=config.getfloat('section1','salary')
print(val3) #31.0

改写

import configparser

config=configparser.ConfigParser()
config.read('a.cfg',encoding='utf-8')

#删除整个标题section2
config.remove_section('section2')

#删除标题section1下的某个k1和k2
config.remove_option('section1','k1')
config.remove_option('section1','k2')

#判断是否存在某个标题
print(config.has_section('section1'))

#判断标题section1下是否有user
print(config.has_option('section1',''))

#添加一个标题
config.add_section('egon')

#在标题egon下添加name=egon,age=18的配置
config.set('egon','name','egon')
config.set('egon','age',18) #报错,必须是字符串

#最后将修改的内容写入文件,完成最终的修改
config.write(open('a.cfg','w'))

基于上述方法添加一个ini文档
import configparser

config = configparser.ConfigParser()
config["DEFAULT"] = {'ServerAliveInterval': '45',
'Compression': 'yes',
'CompressionLevel': '9'}

config['bitbucket.org'] = {}
config['bitbucket.org']['User'] = 'hg'
config['topsecret.server.com'] = {}
topsecret = config['topsecret.server.com']
topsecret['Host Port'] = '50022' # mutates the parser
topsecret['ForwardX11'] = 'no' # same here
config['DEFAULT']['ForwardX11'] = 'yes'
with open('example.ini', 'w') as configfile:
config.write(configfile)

hashlib模块
# 1、什么是哈希hash
# hash一类算法，该算法接受传入的内容，经过运算得到一串hash值
# hash值的特点：
#I 只要传入的内容一样，得到的hash值必然一样
#II 不能由hash值返解成内容
#III 不管传入的内容有多大，只要使用的hash算法不变，得到的hash值长度是一定

# 2、hash的用途
# 用途1：特点II用于密码密文传输与验证
# 用途2：特点I、III用于文件完整性校验

# 3、如何用
# import hashlib
#
# m=hashlib.md5()
# m.update('hello'.encode('utf-8'))
# m.update('world'.encode('utf-8'))
# res=m.hexdigest() # 'helloworld'
# print(res)
#
# m1=hashlib.md5('he'.encode('utf-8'))
# m1.update('llo'.encode('utf-8'))
# m1.update('w'.encode('utf-8'))
# m1.update('orld'.encode('utf-8'))
# res=m1.hexdigest()# 'helloworld'
# print(res)

# 模拟撞库
# cryptograph='aee949757a2e698417463d47acac93df'
# import hashlib
#
# # 制作密码字段
# passwds=[
# 'alex3714',
# 'alex1313',
# 'alex94139413',
# 'alex123456',
# '123456alex',
# 'a123lex',
# ]
#
# dic={}
# for p in passwds:
# res=hashlib.md5(p.encode('utf-8'))
# dic[p]=res.hexdigest()
#
# # 模拟撞库得到密码
# for k,v in dic.items():
# if v == cryptograph:
# print('撞库成功，明文密码是：%s' %k)
# break

# 提升撞库的成本=>密码加盐
import hashlib

m=hashlib.md5()

m.update('天王'.encode('utf-8'))
m.update('alex3714'.encode('utf-8'))
m.update('盖地虎'.encode('utf-8'))
print(m.hexdigest())

# m.update(文件所有的内容)
# m.hexdigest()
#
# f=open('a.txt',mode='rb')
# f.seek()
# f.read(2000) # 巨琳
# m1.update(文见的一行)
#
# m1.hexdigest()

subprocess模块：用来执行系统命令

import subprocess

obj=subprocess.Popen('echo 123 ; ls / ; ls /root',shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)

# print(obj)
# res=obj.stdout.read()
# print(res.decode('utf-8'))

err_res=obj.stderr.read()
print(err_res.decode('utf-8'))

1 import subprocess
2
3 '''
4 sh-3.2# ls /Users/egon/Desktop |grep txt$
5 mysql.txt
6 tt.txt
7 事物.txt
8 '''
9
10 res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
11 res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
12 stdout=subprocess.PIPE)
13
14 print(res.stdout.read().decode('utf-8'))
15
16
17 #等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
18 res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
19 print(res1.stdout.read().decode('utf-8'))
20
21
22 #windows下:
23 # dir | findstr 'test*'
24 # dir | findstr 'txt$'
25 import subprocess
26 res1=subprocess.Popen(r'dir C:\Users\Administrator\PycharmProjects\test\函数备课',shell=True,stdout=subprocess.PIPE)
27 res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
28 stdout=subprocess.PIPE)

猜你喜欢