提取Word表格数据到Excel表实用工具（逆向邮件合并） ...

作者：pythonfun 发布时间：2025-9-13 14:37:35

一、软件名称：Word表格数据批量提取写入Excel表工具
二、软件功能：
1. 根据template.docx中的占位符锁定word文件表格数据所在坐标，批量提取Files文件夹下其它同一版本word文件数据到Excel表。
2. 可以批量把多个Word文件数据批量提取存入到Excel表，一个文件一行数据。执行状态可显示。
3. 提取对象主要是Word表格中用占位符标记的数据。
三、使用方法
1. 模板中用 {{标记名}} 标注要提取的单元格，点击导入模板，右侧显示要提取数据信息
2. 点击文件标记信息按钮，软件按从上到下、从左到右提取数据，并遍历 Files 下所有 .docx ，doc要转为docx, 在相同(表序, 行, 列) 处取值写入汇总表.xlsx
四、注意事项
1. 首先要设定Word文件表格模板，要提取的数据要放入双花括号中。
2. 每个Word文件的表格列数、行数一致、版式相同，以便能够提取数据。
3. 更换Word文件时，注意修改模板文件。
五、软件截图

六、代码展示
[Python] 纯文本查看复制代码# -*- coding: utf-8 -*-
"""
基于模板标记的 Word 表格批量提取工具 (tkinter)
- 模板中用 {{标记名}} 标注要提取的单元格
- 解析模板所有表格，按从上到下、从左到右顺序记录 (表序, 行, 列, 标记名)
- 遍历 ./Files 下所有 .docx ，doc要转为docx, 在相同(表序, 行, 列) 处取值写入 ./汇总.xlsx
- UI 左：导入模板 / 提取文件标记信息 / 退出程序；右：Text 显示坐标与标记
"""
import os
import re
import traceback
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
# 依赖
missing = []
try:
from docx import Document
except Exception:
missing = ["python-docx"]
try:
from openpyxl import Workbook
except Exception:
missing.append("openpyxl")
if missing:
raise SystemExit("缺少依赖：{}\n请先执行：pip install {}".format(", ".join(missing), " ".join(missing)))
FILES_DIR = os.path.join(os.getcwd(), "Files")
OUTPUT_XLSX = os.path.join(os.getcwd(), "汇总.xlsx")
MARK_PATTERN = re.compile(r"\{\{(.+?)\}\}")  # {{标记名}}
# ----------------- Word 表解析：合并单元格锚点模型 -----------------
def _get_tcPr_prop(cell, name):
try:
      tcPr = cell._tc.tcPr
      if tcPr is None:
         return None
      return getattr(tcPr, name)
except Exception:
      return None
def _to_int(val, default=1):
try:
      return int(str(val).strip())
except Exception:
      return default
class GridCell:
__slots__ = ("anchor", "visible", "text", "rowspan", "colspan")
def __init__(self, anchor=None, visible=False, text="", rowspan=1, colspan=1):
      self.anchor = anchor
      self.visible = visible
      self.text = text
      self.rowspan = rowspan
      self.colspan = colspan
def build_table_grid(table):
"""将 python-docx 的表解析为仅锚点可见的网格，处理 gridSpan / vMerge(continue/restart)"""
# 列数（考虑横向合并）
row_col_counts = []
for row in table.rows:
      cnt = 0
      for cell in row.cells:
         gs = _get_tcPr_prop(cell, "gridSpan")
         val = getattr(gs, "val", None) if (gs is not None) else None
         colspan = _to_int(val, 1) if (val is not None) else 1
         cnt += max(1, colspan)
      row_col_counts.append(cnt)
n_rows = len(table.rows)
n_cols = max(row_col_counts) if row_col_counts else 0
grid = [[None for _ in range(n_cols)] for _ in range(n_rows)]
anchors = {}
def occupy(ar, ac, rs, cs, text):
      for rr in range(ar, ar + rs):
         for cc in range(ac, ac + cs):
            grid[rr][cc] = GridCell(anchor=(ar, ac),
                                    visible=(rr == ar and cc == ac),
                                    text=(text if rr == ar and cc == ac else ""),
                                    rowspan=(rs if rr == ar and cc == ac else 1),
                                    colspan=(cs if rr == ar and cc == ac else 1))
for r, row in enumerate(table.rows):
      c = 0
      for cell in row.cells:
         # 跳到空位
         while c = n_cols:
            break
         gs = _get_tcPr_prop(cell, "gridSpan")
         colspan = _to_int(getattr(gs, "val", 1), 1) if (gs is not None) else 1
         colspan = max(1, colspan)
         vm = _get_tcPr_prop(cell, "vMerge")
         vm_val = None
         if vm is not None:
            try:
                  vm_val = (str(vm.val).strip().lower() if vm.val is not None else None)
            except Exception:
                  vm_val = None
         # 兼容  与 w:val="continue"
         is_continue = (vm is not None) and (vm_val is None or vm_val == "continue")
         text = cell.text.replace("\n", " ").strip()
         if not is_continue:
            # 普通/起点：创建锚点
            occupy(r, c, 1, colspan, text)
            anchors[(r, c)] = {"rowspan": 1, "colspan": colspan}
         else:
            # 继续：向上找锚点并扩展
            rr = r - 1
            anchor_found = None
            while rr >= 0 and anchor_found is None:
                  gc = grid[rr][c]
                  if gc is not None:
                     anchor_found = gc.anchor
                     break
                  rr -= 1
            if anchor_found is None:
                  occupy(r, c, 1, colspan, text)
                  anchors[(r, c)] = {"rowspan": 1, "colspan": colspan}
            else:
                  ar, ac = anchor_found
                  meta = anchors.get((ar, ac))
                  if meta:
                     meta["rowspan"] += 1
                     for cc in range(ac, ac + meta["colspan"]):
                        grid[r][cc] = GridCell(anchor=(ar, ac), visible=False)
                  else:
                     occupy(r, c, 1, colspan, text)
                     anchors[(r, c)] = {"rowspan": 1, "colspan": colspan}
         c += colspan
         while c {{ {m['name']} }}")
      self.text.insert("end", "\n".join(lines))
def on_extract_all(self):
      if not self.marks:
         messagebox.showinfo("提示", "请先导入包含 {{标记}} 的模板。")
         return
      if not os.path.isdir(FILES_DIR):
         messagebox.showwarning("提示", f"未找到目录：{FILES_DIR}")
         return
      docx_paths = [os.path.join(FILES_DIR, f) for f in os.listdir(FILES_DIR) if f.lower().endswith(".docx")]
      if not docx_paths:
         messagebox.showinfo("提示", "Files 目录中没有 .docx 文件。")
         return
      self.set_status("正在遍历 Files 提取数据...")
      # 准备列头
      #headers = ["文件名"] + [m["name"] for m in self.marks]
      rows_out = []
      for p in docx_paths:
         try:
            doc = Document(p)
         except Exception as e:
            fname = os.path.splitext(os.path.basename(p))[0]
            rows_out.append([fname] + [""]*len(self.marks))
            continue
         values = []
         for m in self.marks:
            ti, r, c = m["table"], m["row"], m["col"]
            if ti >= len(doc.tables):
                  values.append("")
                  continue
            grid, n_rows, n_cols = build_table_grid(doc.tables[ti])
            if not (0 >> 在这里加过滤行
         rows_out = [row for row in rows_out if any(cell for cell in row[1:])]
         wb = Workbook()
         ws = wb.active
         ws.title = "提取结果"
         #ws.append(headers)
         for row in rows_out:
            ws.append(row)
         wb.save(OUTPUT_XLSX)
         self.set_status(f"已写入：{OUTPUT_XLSX}")
         messagebox.showinfo("完成", f"提取完成，已写入：\n{OUTPUT_XLSX}")
      except Exception:
         messagebox.showerror("写入失败", "保存 Excel 时出错，请查看控制台日志。")
         traceback.print_exc()
         self.set_status("写入失败")
if __name__ == "__main__":
try:
      app = App()
      app.mainloop()
except SystemExit as e:
      print(str(e))
except Exception:
      traceback.print_exc()
七、源码、样例下载

提取word表格数据写入到Excel pythonfun作品.zip
(75.52 KB, 下载次数: 191)
2025-8-10 12:19 上传
点击文件名下载附件

标记, 模板

提取Word表格数据到Excel表实用工具（逆向邮件合并）

相关帖子

浏览过的版块

热门主题

最近收BA的人很多交易了要立刻取消BA 教训

刚看了一个视频，让我又清醒了一下

小小农民新开中转站，欢迎来踩

港版安卓机是满血的国际版安卓机吗？

我 ThreeJSON 又回来了： V 友们批评得对！

继之前 5.4 的 “收口”之后， 5.6 Sol 好

折腾 homelab 挺长时间了建了一个群想不

codex 打开风扇狂转怎么办

Vibe 的一个中文起名小工具

你们明天要去看周星驰的电影么？

热门板块

公告

网站帮助 - Yoo趣儿

我们的愿景

在 Yoo趣儿投放广告

Yoo趣儿网站用户应遵守规则