import pandas as pd import tkinter as tk from tkinter import ttk, filedialog, messagebox, simpledialog import os import re import numpy as np from datetime import datetime import traceback # ========================================== # 第一部分:业务逻辑核心 # ========================================== class DataProcessor: def __init__(self): # ==================== 表头定义 ==================== self.cols_asd_foreign_general = [ "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", "转为美元净合同额", "转为美元总合同额" ] self.cols_nonasd_foreign_general = [ "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", "合同币种/美元", "转为美元净合同额", "转为美元总合同额" ] self.cols_domestic_general = [ "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", "买方单位", "买方信息\n联系人、电话、邮箱", "厂家", "型号", "合同标的", "数量", "单位", "折扣率(%)", "合同额", "合同总额", "外购", "付款方式", "最晚发货期", "已收款", "未收款", "收款日期", "转为美元净合同额", "转为美元总合同额" ] self.cols_foreign_detail = [ "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", "币种", "报价单价", "报价总价", "销售单价", "销售总价", "折扣率", "外购", "合同币种/美元", "外购转美元", "报价总价美元", "净合同额美元" ] self.cols_domestic_detail = [ "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", "外币币种", "外币报价单价", "报价RMB单价", "报价RMB总价", "售价RMB单价", "售价RMB总价", "折扣率(%)", "外购", "计算汇率", "外购转美元", "报价总价美元", "净合同额美元" ] self.cols_om = [ "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", "买方单位", "买方信息\n联系人、电话、邮箱", "合同标的", "合同总额", "已收款", "未收款", "收款日期" ] # 辅助集合 self.money_cols = set([ "合同额", "总合同额", "合同总额", "外购", "已收款", "未收款", "净合同额美元", "外购转美元", "报价总价美元", "外币报价单价", "报价RMB单价", "报价RMB总价", "售价RMB单价", "售价RMB总价", "外购产品金额", "转为美元净合同额", "转为美元总合同额", "报价单价", "报价总价", "销售单价", "销售总价" ]) # [修改点] 从这里移除了 "计算汇率" 和 "合同币种/美元",不再强制进行百分比转换 self.percent_cols = set(["折扣率", "折扣率(%)"]) self.date_cols = set(["合同签订日期", "签订日期", "收款日期", "最晚发货期", "发货日期"]) self.legacy_map = { "外币币种": "币种", "汇率": "计算汇率", "折扣率(%)": "折扣率", "折扣率(%)": "折扣率(%)", "合同": "合同额" } self.standard_col_map = {} all_lists = [ self.cols_asd_foreign_general, self.cols_nonasd_foreign_general, self.cols_domestic_general, self.cols_foreign_detail, self.cols_domestic_detail, self.cols_om ] for lst in all_lists: for col in lst: clean_key = self.clean_header_key(col) self.standard_col_map[clean_key] = col # --- 工具方法 --- def clean_header_key(self, text): if not isinstance(text, str): return str(text) return re.sub(r'[\s\n\r]+', '', text) def safe_float(self, val): try: if isinstance(val, (int, float)): return float(val) if isinstance(val, str): val = val.replace(',', '').replace('¥', '').replace('$', '').strip() if val == '': return 0.0 if pd.isna(val): return 0.0 return float(val) except: return 0.0 def format_money_str(self, val): if pd.isna(val) or str(val).strip() == "": return "" try: f_val = self.safe_float(val) return "{:.2f}".format(f_val) except: return str(val) def format_percent_str(self, val): if pd.isna(val) or str(val).strip() == "": return "" try: s_val = str(val).strip() if '%' in s_val: return s_val f_val = self.safe_float(val) return "{:.2f}%".format(f_val * 100) except: return str(val) def format_date_str(self, val): if pd.isna(val) or str(val).strip() == "": return "" try: s_val = str(val).strip() if len(s_val) == 10 and s_val[4] == '-' and s_val[7] == '-': return s_val dt = pd.to_datetime(val, errors='coerce') if pd.isnull(dt): return s_val return dt.strftime('%Y-%m-%d') except: return str(val) def load_multiple_csvs(self, file_paths): """支持多 CSV 导入""" if isinstance(file_paths, str): paths = [p.strip() for p in file_paths.split(';') if p.strip()] else: paths = list(file_paths) if not paths: return None, "未选择文件" all_dfs = [] error_msgs = [] col_factory_general = '厂家' col_factory_detail = '厂家' for path in paths: if not os.path.exists(path): continue df = None for enc in ['utf-8', 'gbk', 'gb18030']: try: df = pd.read_csv(path, encoding=enc) break except UnicodeDecodeError: continue if df is None: try: df = pd.read_csv(path, encoding='gb18030', encoding_errors='replace') except: error_msgs.append(f"无法读取: {os.path.basename(path)}") continue df['合同类型'] = df['合同类型'].fillna('').astype(str) if '厂家.1' in df.columns: col_factory_detail = '厂家.1' all_dfs.append(df) if not all_dfs: return None, "\n".join(error_msgs) if error_msgs else "没有有效的数据文件" try: final_df = pd.concat(all_dfs, ignore_index=True) except Exception as e: return None, f"文件合并失败: {str(e)}" col_factory_detail = '厂家.1' if '厂家.1' in final_df.columns else '厂家' final_df[col_factory_general] = final_df[col_factory_general].fillna('').astype(str) if '合同订单编号' in final_df.columns: final_df['原始_合同订单编号'] = final_df['合同订单编号'].astype(str).str.strip() final_df['Clean_ID'] = final_df['原始_合同订单编号'].apply(lambda x: x.split()[0] if x else "") return final_df, (col_factory_general, col_factory_detail) def parse_buyer_info(self, text): info = {'name': '', 'contact_full': ''} if not isinstance(text, str) or not text.strip(): return info lines = [l.strip() for l in text.split('\n') if l.strip()] if not lines: return info info['name'] = lines[0] info['contact_full'] = " ".join(lines[1:]) return info def parse_single_line_subject(self, text): res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0.0} if not isinstance(text, str) or not text.strip(): return res text = text.strip() # Model 格式解析 if re.match(r'^Model[::]', text, re.IGNORECASE): m = re.match( r'Model[::]\s*(.+?)\s+([a-zA-Z\u4e00-\u9fa5]+)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)', text, re.IGNORECASE) if m: res['name'] = m.group(1).strip() res['model'] = m.group(1).strip() res['unit'] = m.group(2) res['qty'] = m.group(3) res['price'] = m.group(4) res['sort_price'] = self.safe_float(m.group(4)) return res else: m2 = re.match(r'Model[::]\s*(.+?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)', text, re.IGNORECASE) if m2: res['name'] = m2.group(1).strip() res['model'] = m2.group(1).strip() res['qty'] = m2.group(2) res['price'] = m2.group(3) res['sort_price'] = self.safe_float(m2.group(3)) return res if '/' in text: parts = [p.strip() for p in text.split('/')] if len(parts) >= 1: res['name'] = parts[0] if len(parts) >= 2: res['model'] = parts[1] if len(parts) >= 3: m_qty = re.match(r'^(\d+(\.\d+)?)\s*([\u4e00-\u9fa5a-zA-Z]+)?$', parts[2]) if m_qty: res['qty'] = m_qty.group(1) res['unit'] = m_qty.group(3) if m_qty.group(3) else "" else: res['qty'] = parts[2] if len(parts) >= 4: res['price'] = parts[3] res['sort_price'] = self.safe_float(parts[3]) return res name_match = re.search(r'(?:中文品名|中文名称|名称|Name)[::]\s*(.*?)(?:\n|$)', text, re.IGNORECASE) if name_match: res['name'] = name_match.group(1).strip() else: res['name'] = text.split('\n')[0] nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', '')) if nums: res['sort_price'] = self.safe_float(nums[-1]) res['price'] = nums[-1] return res # ========================================== # 数据准备函数 (Prepare Functions) # ========================================== def prepare_new_data_general(self, csv_df, trade_type, target_cols, col_factory): if csv_df.empty: return pd.DataFrame(columns=target_cols) def extract_items(row): target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) lines = [line.strip() for line in target_raw.split('\n') if line.strip()] items = [] if not lines: items.append(self.parse_single_line_subject("")) else: for line in lines: items.append(self.parse_single_line_subject(line)) return items parsed_series = csv_df.apply(extract_items, axis=1) expanded_data = [] for idx, row in enumerate(csv_df.itertuples(index=False)): items = parsed_series.iloc[idx] r_dict = csv_df.iloc[idx].to_dict() for item in items: row_base = { 'Clean_ID': r_dict.get('Clean_ID', ''), '原始_合同订单编号': r_dict.get('原始_合同订单编号', ''), '收款账户': r_dict.get('收款账户', ''), '签约日期': r_dict.get('签约日期', ''), '负责人': r_dict.get('负责人', ''), '客户名称': r_dict.get('客户名称', ''), '联系人姓名': r_dict.get('联系人姓名', ''), '合同买方_Raw': r_dict.get('合同买方(名称/联系人/电话/邮箱)', ''), '进口代理_Raw': r_dict.get('进口代理(名称/USCI/地址/联系人/电话/邮箱)', ''), '厂家_Val': r_dict.get(col_factory, ''), '货币': r_dict.get('货币(选完产品再改)', ''), '发货地': r_dict.get('发货地', ''), '目的港': r_dict.get('目的港', ''), '折扣率': r_dict.get('折扣率', ''), '合同总额': r_dict.get('合同总额', ''), '收款状态': r_dict.get('收款状态', ''), '外购产品金额': r_dict.get('外购产品金额', ''), '最新收款日期': r_dict.get('最新收款日期', ''), '最晚发货期': r_dict.get('最晚发货期', ''), '付款比例及期限': r_dict.get('付款比例及期限', ''), '合同币种/美元': r_dict.get('合同币种/美元', ''), '_item_name': item['name'], '_item_model': item['model'], '_item_qty': item['qty'], '_item_unit': item['unit'], '_item_price': item['price'], '_sort_price': item['sort_price'] } expanded_data.append(row_base) df_expanded = pd.DataFrame(expanded_data) if df_expanded.empty: return pd.DataFrame(columns=target_cols) # 排序去重:取金额最大 df_expanded.sort_values(by=['Clean_ID', '_sort_price'], ascending=[True, False], inplace=True) df_unique = df_expanded.drop_duplicates(subset=['Clean_ID'], keep='first').copy() result = pd.DataFrame(index=df_unique.index) parts = df_unique['原始_合同订单编号'].str.split(n=1, expand=True) result['合同编号'] = parts[0] contract_no_col = '外贸合同号' if trade_type == '外贸' else '内贸合同号' result[contract_no_col] = parts[1] if parts.shape[1] > 1 else "" result['签署公司'] = df_unique['收款账户'] result['合同签订日期'] = df_unique['签约日期'].apply(self.format_date_str) if '签订日期' in target_cols: result['签订日期'] = result['合同签订日期'] result['销售员'] = df_unique['负责人'] result['最终用户单位'] = df_unique['客户名称'] if any("最终用户信息" in c for c in target_cols): col_name = next(c for c in target_cols if "最终用户信息" in c) result[col_name] = df_unique['联系人姓名'] def get_buyer_info(row): raw = row['进口代理_Raw'] if trade_type == '外贸' and str(row['进口代理_Raw']) not in ['', 'nan'] else row[ '合同买方_Raw'] return self.parse_buyer_info(str(raw)) buyer_infos = df_unique.apply(get_buyer_info, axis=1) result['买方单位'] = [x['name'] for x in buyer_infos] if any("买方信息" in c for c in target_cols): col_name = next(c for c in target_cols if "买方信息" in c) result[col_name] = [x['contact_full'] for x in buyer_infos] if '收货人信息' in target_cols: result['收货人信息'] = result['买方单位'] result['厂家'] = df_unique['厂家_Val'] if '币种' in target_cols: result['币种'] = df_unique['货币'] if '发货港' in target_cols: result['发货港'] = df_unique['发货地'] if '目的港' in target_cols: result['目的港'] = df_unique['目的港'] discount_col = '折扣率' if '折扣率' in target_cols else '折扣率(%)' result[discount_col] = df_unique['折扣率'].apply(self.format_percent_str) result['合同标的'] = df_unique['_item_name'] if '型号/货号' in target_cols: result['型号/货号'] = df_unique['_item_model'] if '型号' in target_cols: result['型号'] = df_unique['_item_model'] result['数量'] = df_unique['_item_qty'] result['单位'] = df_unique['_item_unit'] if '合同额' in target_cols: result['合同额'] = df_unique['_item_price'].apply(self.format_money_str) total_col_name = '总合同额' if '总合同额' in target_cols else '合同总额' result[total_col_name] = df_unique['合同总额'].apply(self.format_money_str) result['收款情况'] = df_unique['收款状态'].fillna('').astype(str).str.strip() is_received = result['收款情况'].str.contains('已收') result['已收款'] = "" result['未收款'] = "" result.loc[is_received, '已收款'] = result.loc[is_received, total_col_name] result.loc[is_received, '未收款'] = "0.00" result['外购'] = df_unique['外购产品金额'].apply(self.format_money_str) result['收款日期'] = df_unique['最新收款日期'].apply(self.format_date_str) if '最晚发货期' in target_cols: result['最晚发货期'] = df_unique['最晚发货期'].apply(self.format_date_str) if '付款方式' in target_cols: result['付款方式'] = df_unique['付款比例及期限'] if '发货日期' in target_cols: result['发货日期'] = "" # [修改点] 这里只取 raw value,后续也不格式化 if '合同币种/美元' in target_cols: # 确保转为字符串,避免 float 警告 result['合同币种/美元'] = df_unique['合同币种/美元'].fillna("").astype(str) result['_sort_price'] = df_unique['_sort_price'] for col in target_cols: if col not in result.columns: result[col] = "" return result[target_cols + ['_sort_price']] def prepare_new_data_detail(self, csv_df, trade_type, target_cols, col_factory): if csv_df.empty: return pd.DataFrame(columns=target_cols) new_rows = csv_df.apply(lambda r: self.process_row_detail_single(r, col_factory, trade_type), axis=1) if isinstance(new_rows, pd.Series): new_rows = pd.DataFrame([new_rows]) for col in target_cols: if col not in new_rows.columns: new_rows[col] = "" return new_rows[target_cols] def process_row_detail_single(self, row, col_factory, trade_type): if trade_type == '外贸': target_cols = self.cols_foreign_detail else: target_cols = self.cols_domestic_detail new_row = {col: "" for col in target_cols} detail_manuf_val = str(row.get(col_factory, '')) raw_order_no = str(row.get('原始_合同订单编号', row.get('合同订单编号', ''))).strip() parts_no = raw_order_no.split() new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else raw_order_no new_row['销售员'] = row.get('负责人', '') new_row['厂家'] = detail_manuf_val new_row['货号'] = row.get('产品编码', '') if trade_type == '外贸': new_row['币种'] = row.get('原币种', '') else: new_row['外币币种'] = row.get('原币种', '') target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) if '/' in target_raw: new_row['合同标的'] = target_raw.split('/')[0].strip() else: new_row['合同标的'] = target_raw.split('\n')[0].strip() csv_qty = str(row.get('数量', '')).strip() if csv_qty and csv_qty.lower() != 'nan': new_row['数量'] = csv_qty val_product_subtotal = self.safe_float(row.get('产品小计', 0)) if '外购' in detail_manuf_val: new_row['外购'] = self.format_money_str(val_product_subtotal) remark = str(row.get('备注', '')).strip() if not remark or remark.lower() == 'nan': outsourced = str(row.get('外购产品明细', '')).strip() new_row['产品描述'] = outsourced if outsourced.lower() != 'nan' else "" else: new_row['产品描述'] = remark else: new_row['外购'] = "" new_row['产品描述'] = row.get('产品名称', '') if '外币报价单价' in new_row: new_row['外币报价单价'] = self.format_money_str(row.get('美元报价', '')) if '报价单价' in new_row: new_row['报价单价'] = self.format_money_str(row.get('美元报价', '')) if '报价RMB总价' in new_row: new_row['报价RMB总价'] = self.format_money_str(row.get('产品小计', '')) if '报价总价' in new_row: new_row['报价总价'] = self.format_money_str(row.get('产品小计', '')) # [修改点] 汇率不格式化,只转字符串 rate_val = str(row.get('汇率', '')).strip() if rate_val.lower() == 'nan': rate_val = "" if '计算汇率' in new_row: new_row['计算汇率'] = rate_val if '合同币种/美元' in new_row: new_row['合同币种/美元'] = rate_val discount_col = '折扣率' if '折扣率' in new_row else '折扣率(%)' if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', '')) if '售价RMB单价' in new_row: new_row['售价RMB单价'] = self.format_money_str(row.get('销售单价', '')) if '销售单价' in new_row: new_row['销售单价'] = self.format_money_str(row.get('销售单价', '')) if '售价RMB总价' in new_row: new_row['售价RMB总价'] = self.format_money_str(row.get('销售总价', '')) if '销售总价' in new_row: new_row['销售总价'] = self.format_money_str(row.get('销售总价', '')) new_row['外购转美元'] = self.format_money_str(row.get('外购转美元', '')) new_row['报价总价美元'] = self.format_money_str(row.get('报价总价美元', '')) new_row['净合同额美元'] = self.format_money_str(row.get('净合同额美元', '')) if '报价RMB单价' in new_row: new_row['报价RMB单价'] = self.format_money_str(row.get('报价RMB单价', '')) return pd.Series(new_row) def prepare_new_data_om(self, csv_df, target_cols): if csv_df.empty: return pd.DataFrame(columns=target_cols) def extract_items(row): target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) lines = [line.strip() for line in target_raw.split('\n') if line.strip()] items = [] if not lines: items.append(self.parse_single_line_subject("")) else: for line in lines: items.append(self.parse_single_line_subject(line)) return items parsed_series = csv_df.apply(extract_items, axis=1) expanded_data = [] for idx, row in enumerate(csv_df.itertuples(index=False)): r_dict = csv_df.iloc[idx].to_dict() items = parsed_series.iloc[idx] for item in items: row_base = { 'Clean_ID': r_dict.get('Clean_ID', ''), '原始_合同订单编号': r_dict.get('原始_合同订单编号', ''), '收款账户': r_dict.get('收款账户', ''), '签约日期': r_dict.get('签约日期', ''), '负责人': r_dict.get('负责人', ''), '客户名称': r_dict.get('客户名称', ''), '联系人姓名': r_dict.get('联系人姓名', ''), '合同买方_Raw': r_dict.get('合同买方(名称/联系人/电话/邮箱)', ''), '合同总额': r_dict.get('合同总额', ''), '收款状态': r_dict.get('收款状态', ''), '最新收款日期': r_dict.get('最新收款日期', ''), '_item_name': item['name'], '_sort_price': item['sort_price'] } expanded_data.append(row_base) df_expanded = pd.DataFrame(expanded_data) if df_expanded.empty: return pd.DataFrame(columns=target_cols) df_expanded.sort_values(by=['Clean_ID', '_sort_price'], ascending=[True, False], inplace=True) df_unique = df_expanded.drop_duplicates(subset=['Clean_ID'], keep='first').copy() result = pd.DataFrame(index=df_unique.index) parts = df_unique['原始_合同订单编号'].str.split(n=1, expand=True) result['合同编号'] = parts[0] result['内贸合同号'] = parts[1] if parts.shape[1] > 1 else "" result['合同总额'] = df_unique['合同总额'].apply(self.format_money_str) result['收款情况'] = df_unique['收款状态'].fillna('').astype(str).str.strip() is_received = result['收款情况'].str.contains('已收') result['已收款'] = "" result['未收款'] = "" result.loc[is_received, '已收款'] = result.loc[is_received, '合同总额'] result.loc[is_received, '未收款'] = "0.00" result['签署公司'] = df_unique['收款账户'] result['签订日期'] = df_unique['签约日期'].apply(self.format_date_str) result['销售员'] = df_unique['负责人'] result['最终用户单位'] = df_unique['客户名称'] if '最终用户信息\n联系人、电话、邮箱' in target_cols: result['最终用户信息\n联系人、电话、邮箱'] = df_unique['联系人姓名'] buyer_infos = df_unique['合同买方_Raw'].astype(str).apply(self.parse_buyer_info) result['买方单位'] = [x['name'] for x in buyer_infos] if '买方信息\n联系人、电话、邮箱' in target_cols: result['买方信息\n联系人、电话、邮箱'] = [x['contact_full'] for x in buyer_infos] result['收款日期'] = df_unique['最新收款日期'].apply(self.format_date_str) result['合同标的'] = df_unique['_item_name'] result['_sort_price'] = df_unique['_sort_price'] return result[target_cols + ['_sort_price']] # ========================================== # 核心优化:智能防覆盖 + 消除类型警告 # ========================================== def merge_datasets(self, old_dfs, csv_df, is_asd): col_gen = '厂家' col_det = '厂家.1' if '厂家.1' in csv_df.columns else '厂家' if is_asd: df_subset = csv_df[csv_df[col_gen].str.contains('ASD', case=False, na=False)] else: df_subset = csv_df[~csv_df[col_gen].str.contains('ASD', case=False, na=False)] csv_foreign = df_subset[df_subset['合同类型'] == '外贸'].copy() csv_domestic = df_subset[df_subset['合同类型'] == '内贸'].copy() csv_om = df_subset[~df_subset['合同类型'].isin(['外贸', '内贸'])].copy() result_dfs = {} def vectorized_merge(old_df, new_df, unique_col, target_columns): if new_df.empty: if old_df is None or old_df.empty: return pd.DataFrame(columns=target_columns + ['_status']) old_df['_status'] = '' return old_df for col in target_columns: if col not in new_df.columns: new_df[col] = "" if '_sort_price' not in new_df.columns: new_df['_sort_price'] = 0.0 if old_df is None or old_df.empty: combined = new_df.copy() combined['_status'] = 'new' return combined old_df = old_df.copy() if unique_col not in old_df.columns: old_df[unique_col] = "" if '_status' not in old_df.columns: old_df['_status'] = '' # === 消除 FutureWarning 核心 === # 将旧数据中所有目标列强制转换为 object (字符串/混合),防止 float/int 写入 str 报错 for col in target_columns: if col in old_df.columns: old_df[col] = old_df[col].astype(object) # === 总表 (ID 唯一) - 智能字段级更新 === is_unique_index = (old_df[unique_col].duplicated().sum() == 0) and ( new_df[unique_col].duplicated().sum() == 0) if is_unique_index: old_df.set_index(unique_col, inplace=True, drop=False) new_df.set_index(unique_col, inplace=True, drop=False) # 1. 纯新增行 new_ids = new_df.index.difference(old_df.index) rows_new = new_df.loc[new_ids].copy() rows_new['_status'] = 'new' # 2. 共有行:逐列智能检查 common_ids = new_df.index.intersection(old_df.index) if not common_ids.empty: for col in target_columns: new_vals = new_df.loc[common_ids, col].astype(str).str.strip() old_vals = old_df.loc[common_ids, col].fillna("").astype(str).str.strip() # 防覆盖核心: # 1. 新数据非空 # 2. 新旧不一致 valid_new_mask = (new_vals != "") & (new_vals != "nan") & (new_vals != "None") diff_mask = valid_new_mask & (new_vals != old_vals) ids_to_update = diff_mask[diff_mask].index if not ids_to_update.empty: old_df.loc[ids_to_update, col] = new_df.loc[ids_to_update, col] old_df.loc[ids_to_update, '_status'] = 'modified' old_df.loc[common_ids, '_sort_price'] = new_df.loc[common_ids, '_sort_price'] old_df.reset_index(drop=True, inplace=True) rows_new.reset_index(drop=True, inplace=True) final_df = pd.concat([old_df, rows_new], ignore_index=True) return final_df else: # === 明细表 (ID 不唯一) - 增量追加 === new_ids = set(new_df[unique_col]) - set(old_df[unique_col]) rows_to_add = new_df[new_df[unique_col].isin(new_ids)].copy() rows_to_add['_status'] = 'new' final_df = pd.concat([old_df, rows_to_add], ignore_index=True) return final_df target_cols_foreign = self.cols_asd_foreign_general if is_asd else self.cols_nonasd_foreign_general old_gen = old_dfs.get('外贸', pd.DataFrame(columns=target_cols_foreign)) new_gen_df = self.prepare_new_data_general(csv_foreign, '外贸', target_cols_foreign, col_gen) result_dfs['外贸'] = vectorized_merge(old_gen, new_gen_df, '合同编号', target_cols_foreign) old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.cols_foreign_detail)) new_det_df = self.prepare_new_data_detail(csv_foreign, '外贸', self.cols_foreign_detail, col_det) result_dfs['外贸明细'] = vectorized_merge(old_det, new_det_df, '合同编号', self.cols_foreign_detail) old_dom_gen = old_dfs.get('内贸', pd.DataFrame(columns=self.cols_domestic_general)) new_dom_df = self.prepare_new_data_general(csv_domestic, '内贸', self.cols_domestic_general, col_gen) result_dfs['内贸'] = vectorized_merge(old_dom_gen, new_dom_df, '合同编号', self.cols_domestic_general) old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.cols_domestic_detail)) new_dom_det_df = self.prepare_new_data_detail(csv_domestic, '内贸', self.cols_domestic_detail, col_det) result_dfs['内贸明细'] = vectorized_merge(old_dom_det, new_dom_det_df, '合同编号', self.cols_domestic_detail) old_om = old_dfs.get('OM合同', pd.DataFrame(columns=self.cols_om)) new_om_df = self.prepare_new_data_om(csv_om, self.cols_om) result_dfs['OM合同'] = vectorized_merge(old_om, new_om_df, '合同编号', self.cols_om) return result_dfs def apply_formatting_to_all(self, data_dict): for sheet_name, df in data_dict.items(): if df.empty: continue for col in self.money_cols: if col in df.columns: df[col] = df[col].apply(self.format_money_str) for col in self.percent_cols: if col in df.columns: df[col] = df[col].apply(self.format_percent_str) for col in self.date_cols: if col in df.columns: df[col] = df[col].apply(self.format_date_str) return data_dict # ========================================== # 第二部分:GUI 界面 (布局修复 + 逻辑修复) # ========================================== class ContractApp: def __init__(self, root): self.root = root self.root.title("合同数据处理系统 V4.6 (布局与类型警告修复版)") self.root.geometry("1300x850") # 允许窗口调整大小,但最小尺寸有限制 self.root.minsize(1000, 700) self.colors = { 'bg': '#F0F2F5', 'panel': '#FFFFFF', 'primary': '#1890FF', 'primary_hover': '#40A9FF', 'success': '#52C41A', 'success_hover': '#73D13D', 'text_main': '#262626', 'text_sub': '#8C8C8C', 'border': '#D9D9D9', 'tag_new': '#FFFBE6', 'tag_mod': '#E6F7FF' } self.setup_styles() self.processor = DataProcessor() self.csv_paths = tk.StringVar() self.asd_path = tk.StringVar() self.non_asd_path = tk.StringVar() self.status_var = tk.StringVar(value="准备就绪") self.final_data = {} self.create_widgets() def setup_styles(self): self.style = ttk.Style() self.style.theme_use('clam') self.style.configure("TFrame", background=self.colors['bg']) self.style.configure("Panel.TFrame", background=self.colors['panel'], relief="flat") self.style.configure("TLabel", background=self.colors['panel'], foreground=self.colors['text_main'], font=("Microsoft YaHei UI", 10)) self.style.configure("Header.TLabel", font=("Microsoft YaHei UI", 18, "bold"), background=self.colors['bg'], foreground=self.colors['text_main']) self.style.configure("SubHeader.TLabel", font=("Microsoft YaHei UI", 12, "bold"), background=self.colors['panel'], foreground=self.colors['text_main']) self.style.configure("Status.TLabel", background=self.colors['bg'], foreground=self.colors['text_sub'], font=("Microsoft YaHei UI", 9)) self.style.configure("TButton", font=("Microsoft YaHei UI", 10), borderwidth=0, padding=8) self.style.map("TButton", background=[('active', '#E0E0E0')], relief=[('pressed', 'sunken')]) self.style.configure("Primary.TButton", background=self.colors['primary'], foreground='white') self.style.map("Primary.TButton", background=[('active', self.colors['primary_hover'])]) self.style.configure("Success.TButton", background=self.colors['success'], foreground='white') self.style.map("Success.TButton", background=[('active', self.colors['success_hover'])]) self.style.configure("TEntry", fieldbackground="white", padding=5) self.style.configure("Treeview", background="white", foreground=self.colors['text_main'], rowheight=30, font=("Microsoft YaHei UI", 9), fieldbackground="white") self.style.configure("Treeview.Heading", font=("Microsoft YaHei UI", 10, "bold"), background="#FAFAFA", foreground=self.colors['text_main'], relief="flat") self.style.map("Treeview", background=[('selected', self.colors['primary_hover'])], foreground=[('selected', 'white')]) def create_widgets(self): # 1. 顶部 Header (Pack Top) header_frame = ttk.Frame(self.root) header_frame.pack(side="top", fill="x", padx=25, pady=(25, 10)) ttk.Label(header_frame, text="🚀 合同数据智能处理系统", style="Header.TLabel").pack(side="left") # 2. 底部按钮栏 (Pack Bottom 优先! 确保永远可见) bottom_bar = ttk.Frame(self.root, style="Panel.TFrame", padding=15) bottom_bar.pack(side="bottom", fill="x", padx=25, pady=(0, 25)) legend_frame = ttk.Frame(bottom_bar, style="Panel.TFrame") legend_frame.pack(side="left") self.create_legend(legend_frame, "● 新增数据", self.colors['tag_new'], "#D48806") self.create_legend(legend_frame, "● 发生变更", self.colors['tag_mod'], self.colors['primary']) ttk.Label(bottom_bar, textvariable=self.status_var, style="Status.TLabel").pack(side="left", padx=20) ttk.Button(bottom_bar, text="💾 确认无误,保存写入", style="Success.TButton", command=self.save_files).pack( side="right") # 3. 输入面板 (Pack Top, under header) input_panel = ttk.Frame(self.root, style="Panel.TFrame", padding=25) input_panel.pack(side="top", fill="x", padx=25, pady=5) ttk.Label(input_panel, text="文件配置与导入", style="SubHeader.TLabel").grid(row=0, column=0, columnspan=3, sticky="w", pady=(0, 20)) self.create_file_row(input_panel, "📂 导入 CSV 源文件 (支持多选):", self.csv_paths, 1, is_multiple=True) self.create_file_row(input_panel, "📘 旧 ASD Excel 文件:", self.asd_path, 2) self.create_file_row(input_panel, "📗 旧 非ASD Excel 文件:", self.non_asd_path, 3) btn_frame = ttk.Frame(input_panel, style="Panel.TFrame") btn_frame.grid(row=4, column=0, columnspan=3, pady=(20, 0), sticky="e") ttk.Button(btn_frame, text="▶ 开始极速处理 (仅预览)", style="Primary.TButton", command=self.process_files).pack(side="right") # 4. 中间预览区域 (Pack Fill Both, Expand True) - 填充剩余所有空间 self.notebook = ttk.Notebook(self.root) self.notebook.pack(side="top", fill="both", expand=True, padx=25, pady=15) def create_file_row(self, parent, label_text, var, row_idx, is_multiple=False): lbl = ttk.Label(parent, text=label_text, width=28) lbl.grid(row=row_idx, column=0, sticky="w", pady=8) entry = ttk.Entry(parent, textvariable=var, font=("Microsoft YaHei UI", 9)) entry.grid(row=row_idx, column=1, sticky="ew", padx=10, pady=8) btn = ttk.Button(parent, text="浏览...", width=8, command=lambda: self.browse_file(var, is_multiple)) btn.grid(row=row_idx, column=2, padx=5) parent.columnconfigure(1, weight=1) def create_legend(self, parent, text, bg_color, fg_color): f = tk.Frame(parent, bg=bg_color, padx=10, pady=4) f.pack(side="left", padx=5) tk.Label(f, text=text, bg=bg_color, fg=fg_color, font=("Microsoft YaHei UI", 9, "bold")).pack() def browse_file(self, variable, is_multiple=False): if is_multiple: files = filedialog.askopenfilenames(filetypes=[("CSV Files", "*.csv")]) if files: variable.set("; ".join(files)) else: f = filedialog.askopenfilename(filetypes=[("Excel/CSV Files", "*.csv;*.xlsx")]) if f: variable.set(f) def load_excel_safe(self, path): if not path or not os.path.exists(path): return {} try: dfs = pd.read_excel(path, sheet_name=None) clean_dfs = {} for k, v in dfs.items(): new_columns = [] for col in v.columns: clean_col = self.processor.clean_header_key(str(col)) if clean_col in self.processor.standard_col_map: new_columns.append(self.processor.standard_col_map[clean_col]) elif col in self.processor.legacy_map: new_columns.append(self.processor.legacy_map[col]) else: new_columns.append(col) v.columns = new_columns v = v.loc[:, ~v.columns.duplicated()] if '合同编号' in v.columns: v['合同编号'] = v['合同编号'].astype(str) clean_dfs[k.strip()] = v return clean_dfs except Exception as e: messagebox.showwarning("读取错误", f"读取旧文件失败: {path}\n错误: {str(e)}") return {} def process_files(self): csv_paths_str = self.csv_paths.get() if not csv_paths_str: messagebox.showerror("提示", "请先选择 CSV 源文件!") return self.status_var.set("⏳ 正在读取多个数据源...") self.root.update() try: csv_df, headers_or_msg = self.processor.load_multiple_csvs(csv_paths_str) if csv_df is None: messagebox.showerror("读取错误", headers_or_msg) return self.status_var.set("🚀 正在极速合并数据...") self.root.update() self.final_data = {} path_asd = self.asd_path.get() asd_old = self.load_excel_safe(path_asd) self.final_data['ASD'] = self.processor.merge_datasets(asd_old, csv_df, True) path_non = self.non_asd_path.get() non_old = self.load_excel_safe(path_non) self.final_data['NonASD'] = self.processor.merge_datasets(non_old, csv_df, False) self.final_data['ASD'] = self.processor.apply_formatting_to_all(self.final_data['ASD']) self.final_data['NonASD'] = self.processor.apply_formatting_to_all(self.final_data['NonASD']) self.refresh_preview() self.status_var.set("✅ 预览已生成。确认无误后请点击右下角保存!") messagebox.showinfo("完成", "数据预览已生成!\n\n注意:此时尚未写入文件。\n请在下方检查数据,确认无误后点击 [保存] 按钮。") except Exception as e: self.status_var.set("❌ 发生错误") traceback.print_exc() messagebox.showerror("运行错误", str(e)) def refresh_preview(self): for tab in self.notebook.tabs(): self.notebook.forget(tab) for file_type in ['ASD', 'NonASD']: if file_type not in self.final_data: continue data_dict = self.final_data[file_type] main_frame = ttk.Frame(self.notebook, style="Panel.TFrame", padding=10) self.notebook.add(main_frame, text=f" {file_type} 预览 ") inner_notebook = ttk.Notebook(main_frame) inner_notebook.pack(fill="both", expand=True) sheet_order = ['外贸', '外贸明细', '内贸', '内贸明细', 'OM合同'] for sheet_name in sheet_order: if sheet_name in data_dict: df = data_dict[sheet_name] if not df.empty: if '合同编号' in df.columns: df['合同编号'] = df['合同编号'].astype(str) sort_cols = ['合同编号'] asc_order = [True] if '_sort_price' in df.columns: sort_cols.append('_sort_price') asc_order.append(False) df = df.sort_values(by=sort_cols, ascending=asc_order) if '明细' in sheet_name: mask = df.duplicated(subset=['合同编号'], keep='first') df.loc[mask, '合同标的'] = "" standard_cols = [] is_asd = (file_type == 'ASD') if sheet_name == '外贸': standard_cols = self.processor.cols_asd_foreign_general if is_asd else self.processor.cols_nonasd_foreign_general elif sheet_name == '内贸': standard_cols = self.processor.cols_domestic_general elif sheet_name == 'OM合同': standard_cols = self.processor.cols_om elif sheet_name == '外贸明细': standard_cols = self.processor.cols_foreign_detail elif sheet_name == '内贸明细': standard_cols = self.processor.cols_domestic_detail self.create_treeview(inner_notebook, df, sheet_name, standard_cols) def create_treeview(self, parent, df, title, target_cols): frame = ttk.Frame(parent) parent.add(frame, text=f" {title} ") scroll_y = ttk.Scrollbar(frame, orient="vertical") scroll_x = ttk.Scrollbar(frame, orient="horizontal") tree = ttk.Treeview(frame, columns=target_cols, show='headings', yscrollcommand=scroll_y.set, xscrollcommand=scroll_x.set) scroll_y.config(command=tree.yview) scroll_x.config(command=tree.xview) scroll_y.pack(side="right", fill="y") scroll_x.pack(side="bottom", fill="x") tree.pack(fill="both", expand=True) for col in target_cols: clean_header = col.replace('\n', ' ') tree.heading(col, text=clean_header) tree.column(col, width=130, anchor="center") tree.tag_configure('new', background=self.colors['tag_new']) tree.tag_configure('modified', background=self.colors['tag_mod'], foreground=self.colors['primary']) tree.tag_configure('odd', background='white') tree.tag_configure('even', background='#FAFAFA') if not df.empty: df_display = df.fillna("") last_contract_id = None count = 0 for _, row in df_display.iterrows(): values = [] for col in target_cols: val = row.get(col, "") if '明细' in title and col == '合同标的': current_id = row.get('合同编号', '') if current_id == last_contract_id: val = "" values.append(val) if '明细' in title: last_contract_id = row.get('合同编号', '') status = row.get('_status', '') tags = [status] if status else [('even' if count % 2 == 0 else 'odd')] tree.insert("", "end", values=values, tags=tags) count += 1 tree.bind("", lambda event: self.on_double_click(event, tree)) def on_double_click(self, event, tree): region = tree.identify("region", event.x, event.y) if region != "cell": return column = tree.identify_column(event.x) row_id = tree.identify_row(event.y) col_idx = int(column.replace('#', '')) - 1 col_name = tree['columns'][col_idx] current_val = tree.item(row_id, "values")[col_idx] new_val = simpledialog.askstring("快速编辑", f"修改 [{col_name}]:", initialvalue=current_val, parent=self.root) if new_val is not None: current_values = list(tree.item(row_id, "values")) current_values[col_idx] = new_val tree.item(row_id, values=current_values) def save_files(self): if not self.final_data: messagebox.showwarning("提示", "没有可保存的数据,请先处理文件!") return csv_path_str = self.csv_paths.get() first_path = csv_path_str.split(';')[0].strip() if csv_path_str else "" base_dir = os.path.dirname(first_path) if first_path else "" confirm = messagebox.askyesno("确认写入", "您确定要将预览的数据写入到 Excel 文件吗?\n\n此操作将覆盖目标文件中的数据。") if not confirm: self.status_var.set("已取消写入") return self.status_var.set("💾 正在写入文件...") self.root.update() try: for file_type, sheets in self.final_data.items(): target_path = "" if file_type == 'ASD': target_path = self.asd_path.get() if not target_path: target_path = os.path.join(base_dir, "ASD_Combined.xlsx") elif file_type == 'NonASD': target_path = self.non_asd_path.get() if not target_path: target_path = os.path.join(base_dir, "NonASD_Combined.xlsx") with pd.ExcelWriter(target_path, engine='openpyxl') as writer: valid_sheets = ['外贸', '外贸明细', '内贸', '内贸明细', 'OM合同'] for sheet_name in valid_sheets: if sheet_name in sheets: df = sheets[sheet_name] if '合同编号' in df.columns: sort_cols = ['合同编号'] asc_order = [True] if '_sort_price' in df.columns: sort_cols.append('_sort_price') asc_order.append(False) df = df.sort_values(by=sort_cols, ascending=asc_order) save_df = df.drop(columns=['_status', '_sort_price'], errors='ignore') if not save_df.empty: if '明细' in sheet_name: mask = save_df.duplicated(subset=['合同编号'], keep='first') save_df.loc[mask, '合同标的'] = "" save_df.to_excel(writer, sheet_name=sheet_name, index=False) self.status_var.set("✅ 写入成功!") messagebox.showinfo("成功", f"文件已成功写入!\n位置: {base_dir or '当前目录'}") except PermissionError: messagebox.showerror("保存失败", "文件被占用!\n请先关闭 Excel 文件后再点击保存。") except Exception as e: messagebox.showerror("保存失败", str(e)) finally: if self.status_var.get() != "已取消写入": self.status_var.set("准备就绪") if __name__ == "__main__": root = tk.Tk() try: from ctypes import windll windll.shcore.SetProcessDpiAwareness(1) except: pass app = ContractApp(root) root.mainloop()