按要求完成任务

实现要求:
现有 TtrainCACD.txt 文件,记录了2000人的照片地址信息,为16w行如下数据:
‘/home/d201/cropcacd/52_Christopher_Meloni_0001.jpg 41 41’
其中第一部分是照片地址,第二部分是身份号(即某人是第41号),第三部分是年龄。
现要求重新排序,两行数据为一对,每对都必须来自同一个人,最后将所有对随机打乱写入新的txt文件中。注:如果存在一个无法配对的,则将那一个删除。

代码实现:

from random import shuffle
import time

class Guoxuezhang():
    def __init__(self, tips):
        self.tips = tips

    def get_new_list(self, txt_path):
        f = open(txt_path)
        lines = f.readlines()
        print(len(lines))
        person_list = []
        for line in lines:
            line_list = line.split(' ')
            person_list.append(line_list[1])
        # 计算一共有多少人
        person_list_set = set(person_list)

        # 将身份一样的照片放在一起
        person_all = []
        for person_id in person_list_set:
            person_single = []
            for line in lines:
                line_list = line.split(' ')
                if line_list[1] == person_id:
                    person_single.append(line)
            # 判断奇数偶数,奇数-1
            if len(person_single) == 1:
                print('the person_id is : ', person_id)
            if len(person_single) % 2 != 0:
                person_single.remove(person_single[0])
            # 将person_single 分成两部分,重新组合,两个一组
            half_length = int(len(person_single) / 2)
            list_1 = person_single[:half_length]  # list1 = [1,2,3,4,4]
            list_2 = person_single[half_length:]  # list2 = [2,3,4,5,2]
            merge_list = list(zip(list_1, list_2))  # merge_list = [(1, 2), (2, 3), (3, 4), (4, 5), (4, 2)]
            # merge_list[1][1] = 3 merge_list[2] = (3, 4)
            for i in merge_list:
                person_all.append(i)

        # 随机打乱顺序
        shuffle(person_all)
        person_all_shuffle = person_all
        f.close()
        return person_all_shuffle


    def get_new_txt(self, person_all_shuffle, new_txt_path):
        f_new = open(new_txt_path, 'w')
        # 写入新新文件
        for i in person_all_shuffle:
            for j in i:
                f_new.write(j)
        f_new.close()


if __name__ == '__main__':
    txt_path = '/home/jianghusanren/Pictures/TtrainCACD.txt'
    new_txt_path = '/home/jianghusanren/Pictures/TtrainCACD_new.txt'
    tips = 'The process has done!'
    start_time = time.time()
    G_xz = Guoxuezhang(tips)
    person_all_shuffle = G_xz.get_new_list(txt_path)
    G_xz.get_new_txt(person_all_shuffle, new_txt_path)
    end_time = time.time()
    print(end_time - start_time)

结果如下图:
结果如图

发布了36 篇原创文章 · 获赞 1 · 访问量 2151

猜你喜欢

转载自blog.csdn.net/jianghusanren3/article/details/96993241