第八章 常用模块

常用模块

一、re模块

import re

# 1. \w:匹配一个字符,该字符可以是字母、数字、或下划线
res = re.findall("\w", "aBc123_ *()-=")
print(res)  # ['a', 'B', 'c', '1', '2', '3', '_']

res = re.findall("abc", "abc ab123 aa1 bc12abc3")
print(res)  # ["abc","abc"]

# 2、\W:匹配一个字符,该字符可以任意非字母、数字、或下划线
res = re.findall("\W", "aBc123_ *()-=")
print(res)


# 3、\d:匹配一个字符,该字符是任意数字
res = re.findall("\d", "aBc123_ *()-=")
print(res)

# 4、\D:匹配一个字符,该字符是任意非数字
res = re.findall("\D", "aBc12.3_ *()-=")
print(res)

# 5、\s:匹配一个字符,该字符是任意空白字符
res = re.findall("\s", "aBc12.3_ *()\n\t-=")
print(res)

# 6、\S:匹配一个字符,该字符是任意非空白字符
res = re.findall("\S", "aBc12.3_ *()\n\t-=")
print(res)

# 7、\n:只匹配换行符
res = re.findall("\n", "aBc12.3_ *()\n\t-=")
print(res)

# 8、\t:只匹配制表符
res = re.findall("\t", "aBc12.3_ *()\n\t-=")
print(res)

# 9、^:从头开始匹配
res = re.findall("abc", "abc ab123 aa1 bc12abc3")
# res = re.findall("^abc", "abc ab123 aa1 bc12abc3")
# res = re.findall("^abc", "a1c ab123 aa1 bc12abc3")
print(res)

# 10、$:从末尾开始匹配
# res = re.findall("abc$", "a1c ab123 aa1 bc12abc3")
res = re.findall("abc$", "a1c ab123 aa1 bc12abc")
print(res)

res = re.findall("^egon$", "egon")
print(res)

# 11 .:匹配一个字符,该字符可以是任意字符,换行符除外
res = re.findall("a.c", "abc a1c a2c a c a+c aaaaaaac")
print(res)
# ["abc","a1c","a2c","a c","a+c","aac"]

res = re.findall("a.c", "abc a1c a2c a c a+c aaaaaaac a\nc",re.DOTALL)
print(res)

# 12 []:匹配一个字符,但是我们可以指定该字符的范围
res = re.findall("a[b1]c", "abc a1c a2c a c a+c aaaaaaac")
res = re.findall("a[0-9]c", "abc a1c a2c a c a+c aaaaaaac")
res = re.findall("a\dc", "abc a1c a2c a c a+c aaaaaaac a\nc")

res = re.findall("a[a-z]c", "abc aAc aBc a1c a2c a c a+c aaaaaaac")
res = re.findall("a[A-Z]c", "abc aAc aBc a1c a2c a c a+c aaaaaaac")

res = re.findall("a[^0-9]c", "abc a1c a2c a c a+c aaaaaaac a\nc")


res = re.findall("a[-+*/]c", "a*c a+c a-c a/c")
print(res)


# 下述字符不能独立使用,必须跟其他符号配合使用,代表左侧字符连续出现几次
# 13 ?: 左侧哪一个字符出现0次或者1次
res = re.findall("ab?",'a ab abb abbb abbbb bbbb')
print(res)
# ["a","ab","ab","ab","ab"]

# 14 *: 左侧哪一个字符出现0次或者无穷次
res = re.findall("ab*",'a ab abb abbb abbbb bbba')
print(res)
# ["a","ab","abb","abbb","abbbb",'a']

# 15 +:左侧哪一个字符出现1次或者无穷次
res = re.findall("ab+",'a ab abb abbb abbbb bbba')
print(res)
["ab","abb","abbb","abbbb"]

# 16 {n,m}:左侧那一个字符出现n到m次
res = re.findall("ab{2,5}",'a ab abb abbb abbbb abbbbbb bbba')
print(res)
["abb","abbb","abbbb","abbbbb"]

res = re.findall("ab{2}",'a ab abb abbb abbbb abbbbbb bbba')
print(res)

res = re.findall("ab{0,1}",'a ab abb abbb abbbb abbbbbb bbba')
res = re.findall("ab{0,}",'a ab abb abbb abbbb abbbbbb bbba')
res = re.findall("ab{1,}",'a ab abb abbb abbbb abbbbbb bbba')

# 17 .*: 匹配任意个数的字符,贪婪
res = re.findall("a.*c",'123 a13+dadsc45788lkdc 123213')
print(res)

# 把*变成非贪婪:.*?
res = re.findall("a.*?c",'123 a13+dadsc45788lkdc 123213')
print(res)

res = re.findall("a.*c",'a123c a13+dadsc45788lkdc 123213')
print(res)
res = re.findall("a.*?c",'a123c a13+dadsc45788lkdc 123213')
print(res)

# 18 分组
html='<a href="https://www.baidu.com">"点击我啊"</a><a href="https://www.sina.com.cn">"点击我啊"</a>'
print(re.findall('href=".*?"',html))
print(re.findall('href="(.*?)"',html))


# 17 |:或者
res = re.findall("compan(?:ies|y)","Too many companies have gone bankrupt, and the next one is my company")
print(res)


# re模块的方法

res=re.findall("a.c",'abc a1c a2c')
print(res)


res=re.search("a.c",'asadf123c a1c a2c')
print(res)
print(res.group())


res=re.search("adddd",'asadf123c a1c a2c')
print(res)

res=re.search("a.c",'1 abc a1c a2c')
print(res)

res=re.match("a.c",'1 abc a1c a2c')  # 等同于re.search("^a.c",'1 abc a1c a2c')
print(res)


pattern = re.compile('href="(.*?)"')

res=pattern.findall('<a href="https://www.baidu.com">"点击我啊"</a><a href="https://www.sina.com.cn">"点击我啊"</a>')
print(res)


print(re.findall("<\w+>\w+</\w+>","<h1>hello</h2>"))
print(re.findall("<(?P<tag_name>\w+)>\w+</(?P=tag_name)>","<h1>hello</h1>"))
print(re.search("<(?P<tag_name>\w+)>\w+</(?P=tag_name)>","<h1>hello</h2>"))


# 零宽度断言
res = re.findall("egon(?=100|N)(?=N)N123","egonN123")
                                         # egon(?=100|N)(?=N)N123
print(res)


print(re.findall("egon(?=100|N)(?=N)N123","egonN123"))
print(re.findall("egon(?=100|N)(?=N)123","egonN123"))
                                        # egon(?=100|N)(?=N)123


pwd_pattern= re.compile("(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[!@#%&])^([a-zA-Z0-9!@#%&]){6,}$")

res=pwd_pattern.search("12Aa#")
print(res)


text ='site sea sue sweet see case sse ssee loses'
print(re.findall(r'\bs\S*?e\b',text))
print(re.findall(r'\bs\S*?e\b',text))

二、time模块

(一)time简介

# 在Python中,通常有这几种方式来表示时间:
#
# 时间戳(timestamp):通常来说,时间戳表示的是从1970年1月1日00:00:00开始按秒计算的偏移量。我们运行“type(time.time())”,返回的是float类型。
# 格式化的时间字符串(Format String)
# 结构化的时间(struct_time):struct_time元组共有9个元素共九个元素:(年,月,日,时,分,秒,一年中第几周,一年中第几天,夏令时)

(二) 具体使用

import time

# 1、时间戳:用于时间计算
print(time.time())

# 2、格式化的字符:用于显示
print(time.strftime("%Y-%m-%d %H:%M:%S %p"))
print(time.strftime("%Y-%m-%d %X"))

# 3、结构化的时间:单独获取时间的某一部分
res = time.localtime()
print(res)
res = time.gmtime()
print(res)
print(res)
print(res.tm_mday)
print(res.tm_yday)

# 4、时间转换
# 时间戳-》结构化的时间-》格式化的字符串
struct_t = time.localtime(13123123)
print(struct_t)
res = time.strftime("%Y-%m-%d %H:%M:%S %p", struct_t)
print(res)

# 格式化的字符串-》结构化的时间-》时间戳
s = "1970-06-03 11:11:11"
struct_t = time.strptime(s, "%Y-%m-%d %H:%M:%S")
res = time.mktime(struct_t)
print(res)

import datetime

print(datetime.datetime.now())
# res=datetime.datetime.now() - datetime.timedelta(days=3,hours=3)
res = datetime.datetime.now() + datetime.timedelta(days=-3, hours=-3)
print(res)

(三) 格式化字符串的时间格式

%a    Locale’s abbreviated weekday name.     
%A    Locale’s full weekday name.     
%b    Locale’s abbreviated month name.     
%B    Locale’s full month name.     
%c    Locale’s appropriate date and time representation.     
%d    Day of the month as a decimal number [01,31].     
%H    Hour (24-hour clock) as a decimal number [00,23].     
%I    Hour (12-hour clock) as a decimal number [01,12].     
%j    Day of the year as a decimal number [001,366].     
%m    Month as a decimal number [01,12].     
%M    Minute as a decimal number [00,59].     
%p    Locale’s equivalent of either AM or PM.    (1)
%S    Second as a decimal number [00,61].    (2)
%U    Week number of the year (Sunday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Sunday are considered to be in week 0.    (3)
%w    Weekday as a decimal number [0(Sunday),6].     
%W    Week number of the year (Monday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Monday are considered to be in week 0.    (3)
%x    Locale’s appropriate date representation.     
%X    Locale’s appropriate time representation.     
%y    Year without century as a decimal number [00,99].     
%Y    Year with century as a decimal number.     
%z    Time zone offset indicating a positive or negative time difference from UTC/GMT of the form +HHMM or -HHMM, where H represents decimal hour digits and M represents decimal minute digits [-23:59, +23:59].     
%Z    Time zone name (no characters if no time zone exists).     
%%    A literal '%' character.

(四) 时间戳转换关系图

                  strftime               localtime
格式化的字符串时间<-------------结构化的时间<------------时间戳
(format\string)<-----------(struct_time)<--------(Timestamp)


                  strptime                mktime
格式化的字符串时间------------->结构化的时间<------------时间戳
(format\string)----------->(struct_time)------------>(Timestamp)

三、random模块

import random

print(random.random())
print(random.uniform(1,3))
print(random.randint(1,3))
print(random.randrange(1,3))
print(random.choice([1,"a","b"]))
print(random.sample([1,"a","b"],2))


item=[10,"a",4,111]
random.shuffle(item)
print(item)

应用

def make_code(n=6):
    res = ''
    for i in range(n):
        num=str(random.randint(0,9))
        char=chr(random.randint(65,90))
        res+=random.choice([num,char])
    return res


print(make_code())

四、os模块

import os
print(os.listdir('.'))

file_path=r"C:/a/b/c/c.txt"

res=os.system("tasklisst")
print("===>",res)
import os
os.environ['k1']='111111'

print(os.environ)

print(__file__)
res=os.path.split(__file__)
print(res)

print(os.path.dirname(__file__))
print(os.path.basename(__file__))

print(os.path.exists(r'D:\weekend_s7\day08'))
print(os.path.exists(r'D:\weekend_s7\day08\04 os模块.py'))
print(os.path.isfile(r'D:\weekend_s7\day08\04 os模块.py'))
print(os.path.isdir(r'D:\weekend_s7\day08'))

# res = os.path.join('C:\\','a',"D:\\",'b','c.txt')
res = os.path.join('/','a','/b','c.txt')
print(res)

print(os.path.normcase(res))

print(os.path.normcase('c:/windows\\system32\\')   )

print(os.path.normpath('c://windows\\System32\\../Temp/'))


# 方法一:
BASE_DIR=os.path.dirname(os.path.dirname(__file__))
print(BASE_DIR)

# 方法二:(了解)
res=os.path.join(
    __file__,  # D:\weekend_s7\day08\04 os模块.py
    "..",
    "..",
)
# print(os.path.normpath(res))


print(os.path.getsize(__file__))

五、sys模块

'''
1 sys.argv           命令行参数List,第一个元素是程序本身路径
2 sys.exit(n)        退出程序,正常退出时exit(0)
3 sys.version        获取Python解释程序的版本信息
4 sys.maxint         最大的Int值
5 sys.path           返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值
6 sys.platform       返回操作系统平台名称
'''

六、shutil模块

打包压缩

import tarfile

t=tarfile.open('aaa.tar','w')
t.add("01 re模块.py",arcname="a.py")
t.add("02 时间模块.py",arcname="b.py")
t.close()


t=tarfile.open('aaa.tar','r')
t.extractall(r'bak/')
t.close()

七、hashlib模块

一、 hash是一种算法,该算法用于校验文本然后得到一串hash值,hash值有三个特点:

# 1、使用hash算法固定,传入的内容一样,得到的hash值一定一样
# 2、不能通过hash值反解出原内容
# 3、只要使用的hash算法固定,hash值的长度就固定了,不会随着传入内容的增多而变长

二、hash算法的两种用途:

# 1+2=》传输密文密码
# 1+3=》文件完整性校验

三、使用

import hashlib

m1 = hashlib.md5()
m1.update(b"hello")
m1.update(b"world")
print(m1.hexdigest())

m2 = hashlib.md5()
m2.update(b"h")
m2.update(b"ell")
m2.update(b"ow")
m2.update(b"or")
m2.update(b"ld")  #b"helloworld"
print(m2.hexdigest())


m=hashlib.md5()
m.update("西北连天一片云".encode('utf-8'))
m.update(b"egon123")
m.update("乌鸦落在凤凰群".encode('utf-8'))
print(m.hexdigest())

m=hashlib.md5()
with open(r'01 re模块.py',mode='rb') as f:
    # m.update(f.read())
    for line in f:
        m.update(line)
    hash_value=m.hexdigest()
    print(hash_value)
import hmac

h=hmac.new(b'hello',digestmod='md5')
h.update(b'world')
print(h.hexdigest())

作者:吴常文
出处:https://blog.csdn.net/qq_41405475
本文版权归作者和CSDN共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。

猜你喜欢

转载自blog.csdn.net/qq_41405475/article/details/114898375