很简陋,小白利用ai做的
代码如下:
[Python] 纯文本查看 复制代码import os
import pdfplumber
import subprocess
from openpyxl import Workbook, load_workbook
def process_pdfs_with_formulas(folder='.', output_excel='高铁票带公式.xlsx'):
print("📂 正在扫描文件夹,查找 PDF 文件...")
all_rows = []
pdf_files = sorted([fn for fn in os.listdir(folder) if fn.lower().endswith('.pdf')])
if not pdf_files:
print("⚠️ 没有找到 PDF 文件,程序结束。")
return
for i, fn in enumerate(pdf_files, start=1):
print(f"🔍 [{i}/{len(pdf_files)}] 正在处理:{fn}")
path = os.path.join(folder, fn)
try:
with pdfplumber.open(path) as pdf:
txt = "\n".join(
(page.extract_text() or "").strip()
for page in pdf.pages
)
except Exception as e:
print(f"❌ 无法处理 [{fn}]: {e}")
continue
all_rows.append([fn, txt])
print("✅ PDF 文本提取完成,正在生成 Excel 文件...")
# 用 openpyxl 写入 Excel(不使用 pandas)
wb = Workbook()
ws = wb.active
ws.title = "提取结果"
# 写标题行
ws.append(['文件名', '提取文本'])
# 写内容
for row in all_rows:
ws.append(row)
wb.save(output_excel)
print("📊 正在插入公式和列格式...")
wb = load_workbook(output_excel)
ws = wb.active
ws.insert_cols(idx=2, amount=5)
ws['A1'] = '文件名'
ws['B1'] = '金额'
ws['C1'] = '姓名'
ws['D1'] = '车次'
ws['E1'] = '发车时间'
ws['F1'] = '提取文本'
ws['G1'] = '提取文本'
for row in range(2, ws.max_row + 1):
ws.cell(row, 2).value = f'=IFERROR(TRIM(LEFT(F{row},FIND("票",F{row})-1)),"")'
ws.cell(row, 3).value = f'=IFERROR(TRIM(RIGHT(F{row},LEN(F{row})-FIND(" ",F{row}))),"")'
ws.cell(row, 4).value = (
f'=IFERROR(TRIM(MID(SUBSTITUTE(G{row},CHAR(10),REPT(" ",100)),(6-1)*100+1,100)),"")'
)
ws.cell(row, 5).value = (
f'=IFERROR(TRIM(MID(SUBSTITUTE(G{row},CHAR(10),REPT(" ",100)),(10-1)*100+1,100)),"")'
)
ws.cell(row, 6).value = (
f'=IFERROR(SUBSTITUTE(MID(G{row},FIND("¥",G{row})+1,FIND("电子客票号",G{row})-FIND("¥",G{row})-1),CHAR(10),""),"")'
)
wb.save(output_excel)
print(f"✅ 已生成 Excel 文件:{output_excel}")
try:
abs_path = os.path.abspath(output_excel)
subprocess.Popen(f'explorer /select,"{abs_path}"')
print("📂 文件位置已在资源管理器中打开。")
except Exception as e:
print(f"⚠️ 无法打开资源管理器:{e}")
try:
os.startfile(output_excel)
print("📄 正在打开 Excel 文件...")
except Exception as e:
print(f"⚠️ 无法自动打开 Excel 文件:{e}")
if __name__ == "__main__":
process_pdfs_with_formulas(folder='.', output_excel='高铁票带公式.xlsx')