Python code to read Excel (.xlsx) file and save JSON file

Planning requirements require statistics of data in all xlsx files. And summarize them into one file for easy reading.
At this time, Python's advantages over C# are reflected. Of course, C# is still very strong, just choose the right tool for certain functions.
Idea: Read all xlsx files and process their data into a unified data structure. Then use the characteristics of the data collection to merge them and export the json file.
The following is the code for Python to read the xlsx file, beware of forgetting to find it, and make a special record!

import json
import base64
import xlrd


class ReadExcel:
    def __init__(self, file_path):
        try:
            self.book = xlrd.open_workbook(file_path)
        except:
            print('No File %s' % file_path)
        self.sheet_names = self.book.sheet_names()
        self.sheet_num = self.book.nsheets
        self.sheet = self.book.sheet_by_index(0)
        self.row_num = self.sheet.nrows
        self.col_num = self.sheet.ncols

    def read_sheet_name(self, sheet_name):
        try:
            self.sheet = self.book.sheet_by_name(sheet_name)
        except:
            print("No Sheet %s" % sheet_name)
        # 获取行数列数
        self.row_num = self.sheet.nrows
        self.col_num = self.sheet.ncols

    def read_sheet_index(self, sheet_index):
        try:
            self.sheet = self.book.sheet_by_index(sheet_index)
        except:
            print("No Sheet Index %s" % sheet_index)
        # 获取行数列数
        self.row_num = self.sheet.nrows
        self.col_num = self.sheet.ncols

    def get_sheet_names(self):
        return self.sheet_names

    # 读取单元格内容
    def get_cell_value(self, row, col):
        return self.sheet.cell_value(row, col)

    # 读取某行数据
    def get_row_data(self, row):
        return self.sheet.row_values(row)

    # 读取某列数据
    def get_col_data(self, col):
        return self.sheet.col_values(col)

    # 读取所有行数据
    def get_sheet_rows_data(self):
        data = []
        for i in range(0, self.row_num):
            row_value_list = self.sheet.row_values(i)
            data.append(row_value_list)
        return data

    # 读取所有列数据
    def get_sheet_cols_data(self):
        data = []
        for i in range(0, self.col_num):
            col_value_list = self.sheet.col_values(i)
            data.append(col_value_list)
        return data

    # 读取所有sheet行数据
    def get_file_rows_data(self):
        data = []
        for name in self.sheet_names:
            self.read_sheet_name(name)
            sheet_data = self.get_sheet_rows_data()
            sheet_data.append(['SheetName', name])
            data.append(sheet_data)
        return data

    # 读取所有sheet列数据
    def get_file_cols_data(self):
        data = []
        for i in range(0, self.sheet_num):
            self.read_sheet_index(i)
            data.append(self.get_sheet_cols_data())
        return data

    # 读取指定列的行数据
    def get_choose_rows_data(self, start_index, end_index):
        data = []
        for i in range(1, self.row_num):
            row_value_list = self.sheet.row_values(i, start_index, end_index)
            data.append(row_value_list)
        return data

    # 读取指定列关键字的行数据
    def get_keys_rows_data(self, keys):
        data = []
        keys_value = self.sheet.row_values(0)
        key_indexes = []
        for key_index in range(0, len(keys)):
            for index in range(0, len(keys_value)):
                if keys_value[index] == keys[key_index]:
                    key_indexes.append(index)
        for j in range(1, self.row_num):
            row_value_list = []
            for i in range(0, len(key_indexes)):
                row_value_list.append(self.sheet.cell_value(j, key_indexes[i]))
            data.append(row_value_list)
        return data


if __name__ == '__main__':
    xlsx = ReadExcel(R"D:\WorkFiles\项目工程\新疆援疆\第九批照片\第九批名单.xlsx")
    templi = xlsx.get_sheet_rows_data()
    templi = templi[2:-1]
    li = []
    for i in templi:
        if i[1] != "":
            lichild = []
            for index in range(0, len(i)):
                lichild.append(base64.b64encode(str(i[index]).encode()))
            li.append(i)
            # print('LiChild= ',lichild)
    print(li)
 # filename = 'D:\WorkFiles\项目工程\新疆援疆\第九批照片\test.json' # 写入数据的文件名
    # 写入JSON文件
    with open('D:\Program Files (x86)\DeskTop\Demo\HotfixDemo\wecatdata.json', 'w', encoding='utf-8') as f_obj:
        json.dump(li, f_obj, ensure_ascii=False)

Guess you like

Origin blog.csdn.net/qq_41088607/article/details/124966183