先安装三个库:
pip intall pdfplumber
pip intall openpyxl
pip intall tqdm
功能实现代码:
#coding:utf-8
#pdf转excel
import pdfplumber
from openpyxl import Workbook
from tqdm import tqdm
data_folder = 'F:/pdfdata/'
file_name = data_folder+'pdf文件.pdf'
data_name = data_folder+'文件.xls'
def analysis_table(pdf_file_path):
# 打开表格
workbook = Workbook()
sheet = workbook.active
# 打开pdf
with pdfplumber.open(pdf_file_path) as pdf:
# 遍历每页pdf
for page in tqdm(pdf.pages):
# 提取表格信息
try:
table = page.extract_table()
# 格式化表格数据
for i, row in enumerate(table):
if str(page) == '<Page:1>' and i== -1:
pass
#elif i==0 or i==1:
...
else:
sheet.append(row)
except:
break
workbook.save(filename=data_name)
if __name__ == '__main__':
analysis_table(file_name)
还不快抢沙发