Python collection flipping teaching

Table of contents

Level 1: Counting the number of words in a novel

Level 2 Count the number of non-repeated words in novels

Level 3 After removing duplicate elements from the list, keep the order in which the elements appear in the same order

Level 4 Output the number of words that coexist in the two novels

Level 5 Output the number of all words that appear in the two novels

Level 6 Count the number of words that only appear in the first novel

Level 7 Count the number of words that do not appear in the two novels at the same time

Level 1: Counting the number of words in a novel

The task of this level: Write a small program that counts the number of words in English novels.

Test input:The Old Man and the Sea.txt

Expected output:27154

import string


def file_to_str(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写，返回字符串"""
    # 补充你的代码
    with open(file, 'r', encoding='utf-8') as f:  # 创建文件对象
        txt = f.read()  # 读取文件为一个字符串
    return txt.lower()   # 返回字符串，其中字母全部转为小写


def file_to_lst(txt):
    """替换掉字符串txt中的符号和数字，根据空格切分为列表，返回列表"""
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls                # 返回列表


if __name__ == '__main__':
    filename = input()                   # 输入文件名
    path = '/data/bigfiles/'             # 文件存放路径
    text = file_to_str(path + filename)  # 读文件返回字符串
    words_lst = file_to_lst(text)        # 字符串切分为列表
    print(len(words_lst))                # 输出列表长度

The second level counts the number of non-repeated words in the novel

The task of this level: Write a small program that counts the number of unique words in English novels.

Test input:The Old Man and the Sea.txt

Expected output:2557

import string


def file_to_str(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写"""
    # 补充你的代码
    with open(file, 'r', encoding='utf-8') as f:  # 创建文件对象
        txt = f.read()  # 读取文件为一个字符串
    return txt.lower()   # 返回字符串，其中字母全部转为小写

def file_to_lst(txt):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写，
    替换掉其中的符号和数字，根据空格切分为列表，返回列表"""
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls                # 返回列表


def no_repeat(words_ls):
    """接收列表为参数，去除里面的重复单词，保持原来单词出现的顺序，返回列表"""
    # 补充你的代码
    words_no_repeat = set(words_ls)  # 去掉重复单词，返回值为集合
    return words_no_repeat  # 返回集合

if __name__ == '__main__':
    filename = input()             # 输入文件名
    path = '/data/bigfiles/'         # 文件存放路径
    text = file_to_str(path + filename)  # 读文件返回字符串
    words_lst = file_to_lst(text)      # 字符串切分为列表
    print(len(no_repeat(words_lst)))    # 输出集合长度

After removing the repeated elements in the third level list, keep the order in which the elements appear in the same order

The task of this level: Write a small program that can remove repeated words in the novel and output them in the order of appearance.

Test input:The Old Man and the Sea.txt 10

Expected output:['a', 'distributed', 'proofreaders', 'canada', 'ebook', 'this', 'is', 'made', 'available', 'at']

import string

def file_to_str(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写"""
    # 补充你的代码
    with open(file, 'r', encoding='utf-8') as f:  # 创建文件对象
        txt = f.read()  # 读取文件为一个字符串
    return txt.lower()   # 返回字符串，其中字母全部转为小写


def file_to_lst(txt):
    """替换掉字符串中的符号和数字，根据空白字符切分为列表，返回列表"""
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls                # 返回列表


def no_repeat(words_ls):
    """接收列表为参数，去除里面的重复单词，保持原来单词出现的顺序，返回列表"""
    # 补充你的代码
    words_no_repeat = list(set(words_ls))  # 去掉重复单词，返回值为列表
    return sorted(words_no_repeat, key=lambda x: words_ls.index(x))  # 返回列表 根据元素在列表中出现的序号排序
   

if __name__ == '__main__':
    filename = input()             # 输入文件名
    n = int(input())               # 输入一个正整数n
    path = '/data/bigfiles/'         # 文件存放路径
    text = file_to_str(path + filename)  # 读文件返回字符串
    words_lst = file_to_lst(text)      # 字符串切分为列表
    print(no_repeat(words_lst)[:n])    # 输出不重复的前n个单词的子列表

Level 4 outputs the number of words that coexist in the two novels

Task for this level: Write a small program that counts the number of words that coexist in two novels.

Test input:The Old Man and the Sea.txt The Torrents of Spring.txt

Expected output:1127

import string


def file_to_set(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写。
    替换掉字符串中的符号，根据空白字符切分为列表，转为集合类型。"""
    path = '/data/bigfiles/'       # 文件路径
    with open(path+file, 'r', encoding='utf-8') as fr:  # 创建文件对象
        txt = fr.read().lower()    # 读取文件为一个字符串，其中字母全部转为小写
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls

def words_both(file1, file2):
    """接收两个文件名为参数，返回两个文件中共同存在的单词，相同单词只计算一次"""
    # 补充你的代码
    file1 = {i for i in file_to_set(file1)}
    file2 = {i for i in file_to_set(file2)}
    file3 = file1 & file2
    return file3


if __name__ == '__main__':
    filename1 = input()  # 输入文件名
    filename2 = input()  # 输入文件名
    print(len(words_both(filename1, filename2)))

Level 5 outputs the number of all words that appear in the two novels

The task of this level: Write a small program that counts the number of all words that appear in the two novels.

Test input:The Old Man and the Sea.txt The Torrents of Spring.txt

Expected output:4723

import string


def file_to_set(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写。
    替换掉字符串中的符号，根据空白字符切分为列表，转为集合类型。"""
    path = '/data/bigfiles/'       # 文件路径
    with open(path+file, 'r', encoding='utf-8') as fr:  # 创建文件对象
        txt = fr.read().lower()    # 读取文件为一个字符串，其中字母全部转为小写
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls                # 返回列表


def words_all(file1, file2):
    """接收两个文件名为参数，返回两个文件中出现的所有单词，相同单词只计算一次"""
    # 补充你的代码
    file1 = {i for i in file_to_set(file1)}
    file2 = {i for i in file_to_set(file2)}
    file3 = file1 | file2
    return file3

if __name__ == '__main__':
    filename1 = input()  # 输入文件名
    filename2 = input()  # 输入文件名
    print(len(words_all(filename1, filename2)))

Level 6 counts the number of words that only appear in the first novel

Task for this level: Write a small program that counts the number of words that only appear in the first novel.

Test input:The Old Man and the Sea.txt The Torrents of Spring.txt

Expected output:1429

import string


def file_to_set(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写。
    替换掉字符串中的符号，根据空白字符切分为列表，转为集合类型。"""
    path = '/data/bigfiles/'       # 文件路径
    with open(path+file, 'r', encoding='utf-8') as fr:  # 创建文件对象
        txt = fr.read().lower()    # 读取文件为一个字符串，其中字母全部转为小写
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls


def only_in_first(file1, file2):
    """接收两个文件名为参数，返回仅在第一本小说中出现且在第二本小说中未出现的单词集合，相同单词只计算一次"""
    # 补充你的代码
    file1 = {i for i in file_to_set(file1)}
    file2 = {i for i in file_to_set(file2)}
    file3 = file1 - file2
    return file3

if __name__ == '__main__':
    filename1 = input()  # 输入文件名
    filename2 = input()  # 输入文件名
    print(len(only_in_first(filename1, filename2)))

Level 7 counts the number of words that do not appear in the two novels at the same time

The task of this level: Write a small program that counts the number of words that appear in two novels that do not appear in the two books at the same time.

Test input:The Old Man and the Sea.txt The Torrents of Spring.txt

Expected output:3596

import string


def file_to_set(file):
    """将文件名变量file指向的文件读为字符串，全部字母转为小写。
    替换掉字符串中的符号，根据空白字符切分为列表，转为集合类型。"""
    path = '/data/bigfiles/'       # 文件路径
    with open(path+file, 'r', encoding='utf-8') as fr:  # 创建文件对象
        txt = fr.read().lower()    # 读取文件为一个字符串，其中字母全部转为小写
    # 补充你的代码
    for c in string.punctuation:   # 遍历符号集
        txt = txt.replace(c, ' ')  # 将全部符号都替换为空格
    words_ls = txt.split()         # 根据空白字符切分为列表
    return words_ls


def only_in_one(file1, file2):
    """接收两个文件名为参数，返回仅在一个小说中存在，不在两个文件中共同存在的单词，相同单词只计算一次"""
    # 补充你的代码
    file1 = {i for i in file_to_set(file1)}
    file2 = {i for i in file_to_set(file2)}
    file3 = file1 ^ file2
    return file3


if __name__ == '__main__':
    filename1 = input()  # 输入文件名
    filename2 = input()  # 输入文件名
    print(len(only_in_one(filename1, filename2)))