

作用:比较两个excel文件,提取“数据文件2”中的数据到“数据文件1”
两个数据文件的行顺序和列顺序可以不同,会自动识别,但是两个数据文件的需要提取数据的列名必须存在且相同。
上全部代码:
[Python] 纯文本查看 复制代码import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import openpyxl
from openpyxl.utils import get_column_letter
class ExcelDataExtractorApp:
def __init__(self, root):
self.root = root
self.root.title("Excel数据提取工具")
self.root.geometry("800x700")
# 初始化变量
self.file1_path = tk.StringVar()
self.file2_path = tk.StringVar()
self.compare_headers = []
self.extract_headers = []
# 创建界面
self.create_widgets()
def create_widgets(self):
# 文件选择部分
file_frame = ttk.LabelFrame(self.root, text="文件选择", padding=10)
file_frame.pack(fill="x", padx=10, pady=5)
ttk.Label(file_frame, text="数据文件1:").grid(row=0, column=0, sticky="w")
ttk.Entry(file_frame, textvariable=self.file1_path, width=50).grid(row=0, column=1, padx=5)
ttk.Button(file_frame, text="浏览...", command=lambda: self.select_file(self.file1_path)).grid(row=0, column=2)
ttk.Label(file_frame, text="数据文件2:").grid(row=1, column=0, sticky="w")
ttk.Entry(file_frame, textvariable=self.file2_path, width=50).grid(row=1, column=1, padx=5)
ttk.Button(file_frame, text="浏览...", command=lambda: self.select_file(self.file2_path)).grid(row=1, column=2)
# 比较列设置部分
compare_frame = ttk.LabelFrame(self.root, text="比较列设置", padding=10)
compare_frame.pack(fill="x", padx=10, pady=5)
self.compare_listbox = tk.Listbox(compare_frame, height=5)
self.compare_listbox.pack(side="left", fill="both", expand=True, padx=(0, 5))
compare_scrollbar = ttk.Scrollbar(compare_frame, orient="vertical", command=self.compare_listbox.yview)
compare_scrollbar.pack(side="right", fill="y")
self.compare_listbox.config(yscrollcommand=compare_scrollbar.set)
compare_entry_frame = ttk.Frame(compare_frame)
compare_entry_frame.pack(fill="x", pady=5)
self.new_compare_header = tk.StringVar()
ttk.Entry(compare_entry_frame, textvariable=self.new_compare_header, width=30).pack(side="left", padx=5)
ttk.Button(compare_entry_frame, text="添加", command=self.add_compare_header).pack(side="left", padx=5)
ttk.Button(compare_entry_frame, text="删除", command=lambda: self.remove_header(self.compare_listbox, self.compare_headers)).pack(side="left", padx=5)
# 提取列设置部分
extract_frame = ttk.LabelFrame(self.root, text="提取列设置", padding=10)
extract_frame.pack(fill="both", expand=True, padx=10, pady=5)
self.extract_listbox = tk.Listbox(extract_frame, height=8)
self.extract_listbox.pack(side="left", fill="both", expand=True, padx=(0, 5))
extract_scrollbar = ttk.Scrollbar(extract_frame, orient="vertical", command=self.extract_listbox.yview)
extract_scrollbar.pack(side="right", fill="y")
self.extract_listbox.config(yscrollcommand=extract_scrollbar.set)
extract_entry_frame = ttk.Frame(extract_frame)
extract_entry_frame.pack(fill="x", pady=5)
self.new_extract_header = tk.StringVar()
ttk.Entry(extract_entry_frame, textvariable=self.new_extract_header, width=30).pack(side="left", padx=5)
ttk.Button(extract_entry_frame, text="添加", command=self.add_extract_header).pack(side="left", padx=5)
ttk.Button(extract_entry_frame, text="删除", command=lambda: self.remove_header(self.extract_listbox, self.extract_headers)).pack(side="left", padx=5)
# 操作按钮部分
button_frame = ttk.Frame(self.root)
button_frame.pack(fill="x", padx=10, pady=10)
ttk.Button(button_frame, text="开始提取", command=self.process_files).pack(side="left", padx=5)
ttk.Button(button_frame, text="清除所有", command=self.clear_all).pack(side="left", padx=5)
ttk.Button(button_frame, text="退出", command=self.root.quit).pack(side="right", padx=5)
# 状态栏
self.status_var = tk.StringVar()
self.status_var.set("就绪")
ttk.Label(self.root, textvariable=self.status_var, relief="sunken", anchor="w").pack(fill="x", padx=10, pady=(0, 10))
def select_file(self, path_var):
filename = filedialog.askopenfilename(filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")])
if filename:
path_var.set(filename)
def add_compare_header(self):
header = self.new_compare_header.get().strip()
if header and header not in self.compare_headers:
self.compare_headers.append(header)
self.compare_listbox.insert("end", header)
self.new_compare_header.set("")
def add_extract_header(self):
header = self.new_extract_header.get().strip()
if header and header not in self.extract_headers:
self.extract_headers.append(header)
self.extract_listbox.insert("end", header)
self.new_extract_header.set("")
def remove_header(self, listbox, headers_list):
selection = listbox.curselection()
if selection:
index = selection[0]
del headers_list[index]
listbox.delete(index)
def clear_all(self):
self.file1_path.set("")
self.file2_path.set("")
self.new_compare_header.set("")
self.new_extract_header.set("")
self.compare_headers = []
self.extract_headers = []
self.compare_listbox.delete(0, "end")
self.extract_listbox.delete(0, "end")
self.status_var.set("已清除所有设置")
def process_files(self):
# 验证输入
if not self.file1_path.get() or not self.file2_path.get():
messagebox.showerror("错误", "请选择两个Excel文件")
return
if not self.compare_headers:
messagebox.showerror("错误", "至少需要指定一个比较列")
return
if not self.extract_headers:
messagebox.showerror("错误", "请指定要提取的列")
return
try:
self.status_var.set("正在处理...")
self.root.update()
# 加载工作簿
wb1 = openpyxl.load_workbook(self.file1_path.get())
wb2 = openpyxl.load_workbook(self.file2_path.get())
# 假设操作第一个工作表
ws1 = wb1.active
ws2 = wb2.active
# 构建文件1的标题到列号的映射
header_to_col1 = {}
for col in range(1, ws1.max_column + 1):
header = ws1.cell(row=1, column=col).value
if header:
header_to_col1[header] = col
# 构建文件2的标题到列号的映射
header_to_col2 = {}
for col in range(1, ws2.max_column + 1):
header = ws2.cell(row=1, column=col).value
if header:
header_to_col2[header] = col
# 检查比较列是否存在
for header in self.compare_headers:
if header not in header_to_col1 or header not in header_to_col2:
messagebox.showerror("错误", f"'{header}'列在其中一个文件中不存在")
return
# 检查提取列是否存在
extract_cols = []
for header in self.extract_headers:
if header not in header_to_col2:
messagebox.showerror("错误", f"'{header}'列在文件2中不存在")
return
extract_cols.append((header, header_to_col2[header]))
# 构建文件2的索引:比较列的值 -> 行数据
file2_index = {}
for row in range(2, ws2.max_row + 1):
key_parts = []
for header in self.compare_headers:
col = header_to_col2[header]
key_parts.append(str(ws2.cell(row=row, column=col).value))
key = tuple(key_parts)
# 存储该行的提取列数据
row_data = {}
for header, col in extract_cols:
row_data[header] = ws2.cell(row=row, column=col).value
file2_index[key] = row_data
# 在文件1中查找匹配并提取数据
matches = 0
for row in range(2, ws1.max_row + 1):
key_parts = []
for header in self.compare_headers:
col = header_to_col1[header]
key_parts.append(str(ws1.cell(row=row, column=col).value))
key = tuple(key_parts)
if key in file2_index:
matches += 1
# 提取数据到文件1
row_data = file2_index[key]
for header, value in row_data.items():
# 如果文件1中没有该列,则添加到最后一列
if header not in header_to_col1:
new_col = ws1.max_column + 1
ws1.cell(row=1, column=new_col, value=header)
header_to_col1[header] = new_col
col = header_to_col1[header]
ws1.cell(row=row, column=col, value=value)
# 保存文件1
output_path = self.file1_path.get().replace(".xlsx", "_提取结果.xlsx")
wb1.save(output_path)
messagebox.showinfo("完成", f"处理完成!共找到 {matches} 条匹配记录。\n结果已保存到: {output_path}")
self.status_var.set(f"处理完成,共找到 {matches} 条匹配记录")
except Exception as e:
messagebox.showerror("错误", f"处理过程中发生错误:\n{str(e)}")
self.status_var.set("处理失败")
if __name__ == "__main__":
root = tk.Tk()
app = ExcelDataExtractorApp(root)
root.mainloop()