python cookbook 0806

1、怎样在一个序列上面保持元素顺序的同时消除重复的值？
提示：集合set里没有重复的值，可以利用集合的这一特性来进行消除重复，但如果要保持顺序的话那就得多几步进行
对于hashable类型：

def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

a = [1, 5, 2, 1, 9, 1, 5, 10]
print(list(dedupe(a)))

对于不可哈希类型(比如字典)：

def dedupe(items, key=None):
    #key是一个函数
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
a = [{'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
print(list(dedupe(a, key=lambda d: (d['x'],d['y'])))) 
#[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]
print(list(dedupe(a, key=lambda d: d['x'])))
#[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]

至于什么是hashable类型呢，点击这里解释

yield
这个东西一直困扰了我很久，看了一些文章解释一下
参考文章：详细版通俗版
yield是一个类似return的关键字，遇到yield就返回yield后面的值，并且暂停，下一次运行时就从这里开始，一个函数有了yield这个关键字就会变成迭代器，可进行迭代操作（for循环）

def func(n):
    for i in range(n):
        yield i*2       

for i in func(5):
    print(i)
#0 2 4 6 8

2、切片操作
可以用内置的slice()创建一个切片对象

items = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
a = slice(2, 6)
print(items[a])
#[2,3,4,5]

items = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
a = slice(2, 8, 2)  #定义步长为2
print(items[a])
#[2, 4, 6]

print(a.start)  #2
print(a.stop)   #8
print(a.step)   #2

3、怎样找出一个序列中出现次数最多的元素呢
collections.Counter就专门为这类问题设计的

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
print(word_counts)
#Counter({'eyes': 8, 'the': 5, 'look': 4, 'my': 3, 'into': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})

top_three = word_counts.most_common(3)//most_common()可以直接给出最多的几个值
print(top_three)
#[('eyes', 8), ('the', 5), ('look', 4)]

print(word_counts['the']//可以直接检索数量
#5


#增加计数的方法
1)简单粗暴法
newwords = ['why','are','you','not','looking','in','my','eyes']
for word in newwords:
    word_counts[word] += 1

2)update法
word_counts.update(newwords)

3)数学方法
a = Counter(words)
#Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2,
"you're": 1, "don't": 1, 'under': 1, 'not': 1})
b = Counter(newwords)
#Counter({'eyes': 1, 'looking': 1, 'are': 1, 'in': 1, 'not': 1, 'you': 1,
'my': 1, 'why': 1})
c = a + b
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2,
'around': 2, "you're": 1, "don't": 1, 'in': 1, 'why': 1,
'looking': 1, 'are': 1, 'under': 1, 'you': 1})
d = a - b
#Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2,
"you're": 1, "don't": 1, 'under': 1})

4、通过某个关键字排序一个字典列表
sorted()里有个关键字key=，用来顺序或者倒序排列，但同是也可以根据输入的表达式进行排序，通过使用 operator 模块的 itemgetter 函数，可以非常容易的排序这样的数据结构

rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
#[{'fname': 'Big', 'uid': 1004, 'lname': 'Jones'},
#{'fname': 'Brian', 'uid': 1003, 'lname': 'Jones'},
#{'fname': 'David', 'uid': 1002, 'lname': 'Beazley'},
#{'fname': 'John', 'uid': 1001, 'lname': 'Cleese'}]
print(rows_by_uid)
#[{'fname': 'John', 'uid': 1001, 'lname': 'Cleese'},
#{'fname': 'David', 'uid': 1002, 'lname': 'Beazley'},
#{'fname': 'Brian', 'uid': 1003, 'lname': 'Jones'},
#{'fname': 'Big', 'uid': 1004, 'lname': 'Jones'}]

rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))#同时也可以多个key
print(rows_by_lfname)
#[{'fname': 'David', 'uid': 1002, 'lname': 'Beazley'},
#{'fname': 'John', 'uid': 1001, 'lname': 'Cleese'},
#{'fname': 'Big', 'uid': 1004, 'lname': 'Jones'},
#{'fname': 'Brian', 'uid': 1003, 'lname': 'Jones'}]

也可以用lambda代替
rows_by_fname = sorted(rows, key=lambda r: r['fname'])
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'],r['fname']))
不过用itemgetter()会运行的稍微快点

这个格式对min()和max()同样适用
min(rows, key=itemgetter('uid'))    #{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
max(rows, key=itemgetter('uid'))    #{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}

猜你喜欢