import xlrd
file='TEST.xlsx'defread_excel():
wb = xlrd.open_workbook(filename=file)# 打开文件print(wb.sheet_names())# 获取所有表格名字
sheet1 = wb.sheet_by_name('test1')# 通过名字读取表格print(sheet1.name, sheet1.nrows, sheet1.ncols)for i inrange(1, sheet1.nrows):print(sheet1.row_values(i))#行for i inrange(1, sheet1.ncols):print(sheet1.col_values(i))#列print(sheet1.cell(1,0).value)#获取表格里的内容,三种方式print(sheet1.cell_value(1,0))print(sheet1.row(1)[0].value)
python读取pdf
import pdfplumber
path ='TEST.pdf'
pdf = pdfplumber.open(path)for page in pdf.pages:# 获取当前页面的全部文本信息,包括表格中的文字print(page.extract_text())for table in page.extract_tables():print(table)print('-----------------------')for row in table:print(row)print('-----------------------')
pdf.close()