python practice question 2

Exercise 1: Statistics on key words in a file and code optimization.

Material requirements: A normal English document.

Code for normal implementation:

def makekey(s:str):
    chars = set(r"""!'"#./\()[],*-""")
    key = s.lower()
    ret = []
    for i,c in enumerate(key):
        if c in chars:
            ret.append(' ')
        else:
            ret.append(c)
    return ''.join(ret).split()
#The first solution above: makekey code is inefficient
#
#The second scheme below: makekey1 can also be optimized:
def makekey1(s:str):
    chars = set(r"""!'"#./\()[],*-""")
    key = s.lower()
    ret = []
    start = 0

    for i,c in enumerate(key):
        if c in chars:
            if start == i: #If it is next to a special character, start must be equal to i.
                start += 1 #Add 1 and continue
                continue
            ret.append((key[start:i]))
            start = i+1 #Adding 1 is to skip this unnecessary special character c.
    else:
        if start < len(key): # Less than, indicating that there are valid characters, the program needs to be executed until the end.
            ret.append(key[start:])

    return right
#------------------------------------------#

d= {}
with open('sample.txt',encoding='utf-8') as f:
    for line in f:
        words = line.split()
        for wordlist in map(makekey1,words):
            for word in wordlist:
                d[word] = d.get(word,0) + 1

for i,(k,v) in enumerate(sorted(d.items(),key=lambda item:item[1],reverse=True),1):
    if not i > 10:
        print(i,k,v)

Results of the:

1 path 138
2 the 136
3 is 60
4 a 59
5 out of 49
6 if 43
7 and 40
8 to 34
9 on 33
10 of 33

Process finished with exit code 0

The following is the optimization of the code:

#todo ================The following is to optimize makekey1 in the above code ======================= =#

#todo : remove the first scenario above:
#todo : Optimize the second scheme: makekey1:
#TODO : Exclude characters can be written in two ways, feel free to do so.
# CHARS = set("""!'"#./\()[],*- \r\n\t""")
# def _makekey2(key:str,chars=CHARS):
def _makekey2(key: str, chars=set("""!'"#./\()[],*- \r\n\t""")): #If you use it multiple times, you can use the above plan.
    start = 0

    for i,c in enumerate(key):
        if c in chars:
            if start == i: #If it is next to a special character, start must be equal to i.
                start += 1 #Add 1 and continue
                continue
            # ret.append((key[start:i]))
            yield key[start:i]
            start = i+1 #Adding 1 is to skip this unnecessary special character c.
    else:
        if start < len(key): #Less than, indicating that there are valid characters, and it has been until the end.
            # ret.append(key[start:])
            yield key[start:]


#todo : [The following is case-insensitive]:
def wordcount(filename:str,encoding='utf-8',ignorewords=set())->dict: #Add the filter character ignorewords here.
    """This function performs word statistics

    """
    d= {}
    with open('sample.txt',encoding=encoding) as f:
        for line in f:
            for word in map(str.lower,_makekey2(line)): #Case-insensitive.
                if word not in ignorewords:
                    d[word] = d.get(word,0) + 1
    return d

# todo : [The following is case sensitive]:
# def wordcount(filename:str,encoding='utf-8',ignorewords=set())->dict:
# """This function performs word statistics
#
#     """
#     d= {}
#     with open('sample.txt',encoding=encoding) as f:
#         for line in f:
# for word in _makekey2(line): #Case sensitive.
#                     d[word] = d.get(word,0) + 1

# top10
def top(d:dict,n:int=10): #Iterate out the top data you want
    for i,(k,v) in enumerate(sorted(d.items(),key = lambda item:item[1],reverse=True)):
        if i >= n:
            break
        # print(k,v) #No need to print normally.
        yield k,v

for k,v in top(wordcount('sample',ignorewords={'the','is'})):
    print (k, v)

Results of the:

path 138
a 59
the 49
if 43
and 40
to 34
on 33
of 33
return 30
windows 25

Process finished with exit code 0


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325505402&siteId=291194637