python简单文本匹配

当发现有匹配时返回当前的匹配行以及最后检查过的文本

from collections import deque


def serach(lines, pattern, history=5):
    prelines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, prelines
            prelines.append(line)


if __name__ == '__main__':
    i=0
    with open(r"python.txt") as f:
        for line, prelines in serach(f, "python", 5):
            for pline in prelines:
                i+=1
                print(line, end="")
                print(i)

添加简单队列：deque

找出集合中最大或者最小的N个元素：heapq，可指定长度

import heapq
nums=[1,3,324,24,123,-1,45,234,65,53]
print(heapq.nlargest(3,nums))
print(heapq.nsmallest(3,nums))

数组的排序：

import heapq
nums=[3,4,5,1,2,7,0]
heap=list(nums)
heapq.heapify(heap)
print(heap)

数组排序2：sort改变原数组，，，sorted不改变原数组

nums=[3,4,5,1,2,7,0]
nums.sort()
print(nums)

nums=[3,4,5,1,2,7,0]
nums_2=sorted(nums)
print(nums_2)
print(nums)

自定义优先级排序：

import heapq
class PriorityQueue:

    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self,item,priority):
        heapq.heappush(self._queue,(-priority,self._index,item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

class Item:
    def __init__(self,name):
        self.name=name
    def __repr__(self):
        return "ITEM({!r})".format(self.name)

if __name__ == '__main__':
    q=PriorityQueue()
    q.push(Item("foo"),2)
    q.push(Item("bar"), 1)
    q.push(Item("foo"), 4)
    print(q.pop())

自定义字符段分割：re.spilt()更灵活

import re

line = "asdf ghhh; afed ,fejk, foo"
print(re.split(r"[;,\s]\s*,*", line))

找出以特定字符开头的文件：

import os
filenames = os.listdir(".")
print(filenames)
print([name for name in filenames if name.startswith("p")])

choices=["http","https"]
url="http:www.baidu.com"
if url.startswith(tuple(choices)):
    print(url)
else:print("no")

不区分大小写查找文本：

import re
text="UPPER PYTHON,lower python,Mixed python"
print(re.findall("python", text, re.IGNORECASE))

replace替换文本：

s="--======hello world--- \n"
s2=s.replace("-", "")
print(s2.replace("=", ""))

读取txt文本文件：

with open(r"python.txt") as f:
    lines=(line.strip() for line in f)
    for line in lines:
        print(line)

文本左右对齐和添加特定字符：

text="hello world"
print("=",text.rjust(20))
print(text.ljust(20),"===")
print(text.center(20,"*"))
#居中对齐^,左对齐<，居中对齐…^
print(format(text,"=^20s"))
#自定义格式对齐
print('{:>10s} {:>10s}'.format("hello", "world"))

保留两位小数：

x=1.2645
print(format(x, ".2f"))

字符串合并：

part=["wo","shi","bobo"]
# " ,"中可以放任意符号
x=" ".join(part)
print(x)

自定义字符串连接：format

a="wo shi"
b="bobo"
print(a+" "+b)
print("{} {}".format(a,b))

利用迭代器在将数据转换成字符串的时候完成连接操作：

data=["acme",50,91]
print("".join([str(i) for i in data]))

random的使用：

从序列中随机挑选元素：

values=[2,3,4,1,5,2,1,78]
import random

print(random.choice(values))
print(random.sample(values, 3))
#原地洗牌
random.shuffle(values)
print(values)

#随机数：
print(random.randint(0, 100))

##0--1之间的浮点随机数
print(random.random())

基本时间换算：datatime timedelta

from datetime import timedelta
a=timedelta(days=2,hours=6)
b=timedelta(hours=4)
c=a+b
print(c.days)
print(c.seconds)

python简单文本匹配

猜你喜欢