diff --git a/提速版.py b/提速版.py new file mode 100644 index 0000000..30d43fd --- /dev/null +++ b/提速版.py @@ -0,0 +1,1054 @@ +import pandas as pd +import tkinter as tk +from tkinter import ttk, filedialog, messagebox, simpledialog +import os +import re +import numpy as np +from datetime import datetime +import traceback + + +# ========================================== +# 第一部分:业务逻辑核心 +# ========================================== + +class DataProcessor: + def __init__(self): + # ==================== 表头定义 ==================== + self.cols_asd_foreign_general = [ + "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", + "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", + "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", + "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", + "转为美元净合同额", "转为美元总合同额" + ] + + self.cols_nonasd_foreign_general = [ + "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", + "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", + "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", + "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", + "合同币种/美元", "转为美元净合同额", "转为美元总合同额" + ] + + self.cols_domestic_general = [ + "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "买方单位", "买方信息\n联系人、电话、邮箱", + "厂家", "型号", "合同标的", "数量", "单位", "折扣率(%)", + "合同额", "合同总额", "外购", "付款方式", "最晚发货期", + "已收款", "未收款", "收款日期", + "转为美元净合同额", "转为美元总合同额" + ] + + self.cols_foreign_detail = [ + "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", + "币种", "报价单价", "报价总价", "销售单价", "销售总价", "折扣率", + "外购", "合同币种/美元", "外购转美元", "报价总价美元", "净合同额美元" + ] + + self.cols_domestic_detail = [ + "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", + "外币币种", "外币报价单价", "报价RMB单价", "报价RMB总价", + "售价RMB单价", "售价RMB总价", "折扣率(%)", "外购", + "计算汇率", "外购转美元", "报价总价美元", "净合同额美元" + ] + + self.cols_om = [ + "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "买方单位", "买方信息\n联系人、电话、邮箱", "合同标的", + "合同总额", "已收款", "未收款", "收款日期" + ] + + # 辅助集合 + self.money_cols = set([ + "合同额", "总合同额", "合同总额", "外购", "已收款", "未收款", + "净合同额美元", "外购转美元", "报价总价美元", + "外币报价单价", "报价RMB单价", "报价RMB总价", + "售价RMB单价", "售价RMB总价", "外购产品金额", + "转为美元净合同额", "转为美元总合同额", "报价单价", "报价总价", "销售单价", "销售总价" + ]) + + # [修改点] 从这里移除了 "计算汇率" 和 "合同币种/美元",不再强制进行百分比转换 + self.percent_cols = set(["折扣率", "折扣率(%)"]) + + self.date_cols = set(["合同签订日期", "签订日期", "收款日期", "最晚发货期", "发货日期"]) + + self.legacy_map = { + "外币币种": "币种", "汇率": "计算汇率", "折扣率(%)": "折扣率", + "折扣率(%)": "折扣率(%)", "合同": "合同额" + } + + self.standard_col_map = {} + all_lists = [ + self.cols_asd_foreign_general, self.cols_nonasd_foreign_general, + self.cols_domestic_general, self.cols_foreign_detail, + self.cols_domestic_detail, self.cols_om + ] + for lst in all_lists: + for col in lst: + clean_key = self.clean_header_key(col) + self.standard_col_map[clean_key] = col + + # --- 工具方法 --- + def clean_header_key(self, text): + if not isinstance(text, str): return str(text) + return re.sub(r'[\s\n\r]+', '', text) + + def safe_float(self, val): + try: + if isinstance(val, (int, float)): return float(val) + if isinstance(val, str): + val = val.replace(',', '').replace('¥', '').replace('$', '').strip() + if val == '': return 0.0 + if pd.isna(val): return 0.0 + return float(val) + except: + return 0.0 + + def format_money_str(self, val): + if pd.isna(val) or str(val).strip() == "": return "" + try: + f_val = self.safe_float(val) + return "{:.2f}".format(f_val) + except: + return str(val) + + def format_percent_str(self, val): + if pd.isna(val) or str(val).strip() == "": return "" + try: + s_val = str(val).strip() + if '%' in s_val: return s_val + f_val = self.safe_float(val) + return "{:.2f}%".format(f_val * 100) + except: + return str(val) + + def format_date_str(self, val): + if pd.isna(val) or str(val).strip() == "": return "" + try: + s_val = str(val).strip() + if len(s_val) == 10 and s_val[4] == '-' and s_val[7] == '-': return s_val + dt = pd.to_datetime(val, errors='coerce') + if pd.isnull(dt): return s_val + return dt.strftime('%Y-%m-%d') + except: + return str(val) + + def load_multiple_csvs(self, file_paths): + """支持多 CSV 导入""" + if isinstance(file_paths, str): + paths = [p.strip() for p in file_paths.split(';') if p.strip()] + else: + paths = list(file_paths) + + if not paths: return None, "未选择文件" + + all_dfs = [] + error_msgs = [] + col_factory_general = '厂家' + col_factory_detail = '厂家' + + for path in paths: + if not os.path.exists(path): continue + df = None + for enc in ['utf-8', 'gbk', 'gb18030']: + try: + df = pd.read_csv(path, encoding=enc) + break + except UnicodeDecodeError: + continue + if df is None: + try: + df = pd.read_csv(path, encoding='gb18030', encoding_errors='replace') + except: + error_msgs.append(f"无法读取: {os.path.basename(path)}") + continue + + df['合同类型'] = df['合同类型'].fillna('').astype(str) + if '厂家.1' in df.columns: col_factory_detail = '厂家.1' + all_dfs.append(df) + + if not all_dfs: return None, "\n".join(error_msgs) if error_msgs else "没有有效的数据文件" + + try: + final_df = pd.concat(all_dfs, ignore_index=True) + except Exception as e: + return None, f"文件合并失败: {str(e)}" + + col_factory_detail = '厂家.1' if '厂家.1' in final_df.columns else '厂家' + final_df[col_factory_general] = final_df[col_factory_general].fillna('').astype(str) + + if '合同订单编号' in final_df.columns: + final_df['原始_合同订单编号'] = final_df['合同订单编号'].astype(str).str.strip() + final_df['Clean_ID'] = final_df['原始_合同订单编号'].apply(lambda x: x.split()[0] if x else "") + + return final_df, (col_factory_general, col_factory_detail) + + def parse_buyer_info(self, text): + info = {'name': '', 'contact_full': ''} + if not isinstance(text, str) or not text.strip(): return info + lines = [l.strip() for l in text.split('\n') if l.strip()] + if not lines: return info + info['name'] = lines[0] + info['contact_full'] = " ".join(lines[1:]) + return info + + def parse_single_line_subject(self, text): + res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0.0} + if not isinstance(text, str) or not text.strip(): return res + text = text.strip() + + # Model 格式解析 + if re.match(r'^Model[::]', text, re.IGNORECASE): + m = re.match( + r'Model[::]\s*(.+?)\s+([a-zA-Z\u4e00-\u9fa5]+)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)', + text, re.IGNORECASE) + if m: + res['name'] = m.group(1).strip() + res['model'] = m.group(1).strip() + res['unit'] = m.group(2) + res['qty'] = m.group(3) + res['price'] = m.group(4) + res['sort_price'] = self.safe_float(m.group(4)) + return res + else: + m2 = re.match(r'Model[::]\s*(.+?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)', text, + re.IGNORECASE) + if m2: + res['name'] = m2.group(1).strip() + res['model'] = m2.group(1).strip() + res['qty'] = m2.group(2) + res['price'] = m2.group(3) + res['sort_price'] = self.safe_float(m2.group(3)) + return res + + if '/' in text: + parts = [p.strip() for p in text.split('/')] + if len(parts) >= 1: res['name'] = parts[0] + if len(parts) >= 2: res['model'] = parts[1] + if len(parts) >= 3: + m_qty = re.match(r'^(\d+(\.\d+)?)\s*([\u4e00-\u9fa5a-zA-Z]+)?$', parts[2]) + if m_qty: + res['qty'] = m_qty.group(1) + res['unit'] = m_qty.group(3) if m_qty.group(3) else "" + else: + res['qty'] = parts[2] + if len(parts) >= 4: + res['price'] = parts[3] + res['sort_price'] = self.safe_float(parts[3]) + return res + + name_match = re.search(r'(?:中文品名|中文名称|名称|Name)[::]\s*(.*?)(?:\n|$)', text, re.IGNORECASE) + if name_match: + res['name'] = name_match.group(1).strip() + else: + res['name'] = text.split('\n')[0] + + nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', '')) + if nums: + res['sort_price'] = self.safe_float(nums[-1]) + res['price'] = nums[-1] + return res + + # ========================================== + # 数据准备函数 (Prepare Functions) + # ========================================== + + def prepare_new_data_general(self, csv_df, trade_type, target_cols, col_factory): + if csv_df.empty: return pd.DataFrame(columns=target_cols) + + def extract_items(row): + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + lines = [line.strip() for line in target_raw.split('\n') if line.strip()] + items = [] + if not lines: + items.append(self.parse_single_line_subject("")) + else: + for line in lines: items.append(self.parse_single_line_subject(line)) + return items + + parsed_series = csv_df.apply(extract_items, axis=1) + + expanded_data = [] + for idx, row in enumerate(csv_df.itertuples(index=False)): + items = parsed_series.iloc[idx] + r_dict = csv_df.iloc[idx].to_dict() + + for item in items: + row_base = { + 'Clean_ID': r_dict.get('Clean_ID', ''), + '原始_合同订单编号': r_dict.get('原始_合同订单编号', ''), + '收款账户': r_dict.get('收款账户', ''), + '签约日期': r_dict.get('签约日期', ''), + '负责人': r_dict.get('负责人', ''), + '客户名称': r_dict.get('客户名称', ''), + '联系人姓名': r_dict.get('联系人姓名', ''), + '合同买方_Raw': r_dict.get('合同买方(名称/联系人/电话/邮箱)', ''), + '进口代理_Raw': r_dict.get('进口代理(名称/USCI/地址/联系人/电话/邮箱)', ''), + '厂家_Val': r_dict.get(col_factory, ''), + '货币': r_dict.get('货币(选完产品再改)', ''), + '发货地': r_dict.get('发货地', ''), + '目的港': r_dict.get('目的港', ''), + '折扣率': r_dict.get('折扣率', ''), + '合同总额': r_dict.get('合同总额', ''), + '收款状态': r_dict.get('收款状态', ''), + '外购产品金额': r_dict.get('外购产品金额', ''), + '最新收款日期': r_dict.get('最新收款日期', ''), + '最晚发货期': r_dict.get('最晚发货期', ''), + '付款比例及期限': r_dict.get('付款比例及期限', ''), + '合同币种/美元': r_dict.get('合同币种/美元', ''), + '_item_name': item['name'], + '_item_model': item['model'], + '_item_qty': item['qty'], + '_item_unit': item['unit'], + '_item_price': item['price'], + '_sort_price': item['sort_price'] + } + expanded_data.append(row_base) + + df_expanded = pd.DataFrame(expanded_data) + if df_expanded.empty: return pd.DataFrame(columns=target_cols) + + # 排序去重:取金额最大 + df_expanded.sort_values(by=['Clean_ID', '_sort_price'], ascending=[True, False], inplace=True) + df_unique = df_expanded.drop_duplicates(subset=['Clean_ID'], keep='first').copy() + + result = pd.DataFrame(index=df_unique.index) + + parts = df_unique['原始_合同订单编号'].str.split(n=1, expand=True) + result['合同编号'] = parts[0] + contract_no_col = '外贸合同号' if trade_type == '外贸' else '内贸合同号' + result[contract_no_col] = parts[1] if parts.shape[1] > 1 else "" + + result['签署公司'] = df_unique['收款账户'] + result['合同签订日期'] = df_unique['签约日期'].apply(self.format_date_str) + if '签订日期' in target_cols: result['签订日期'] = result['合同签订日期'] + + result['销售员'] = df_unique['负责人'] + result['最终用户单位'] = df_unique['客户名称'] + + if any("最终用户信息" in c for c in target_cols): + col_name = next(c for c in target_cols if "最终用户信息" in c) + result[col_name] = df_unique['联系人姓名'] + + def get_buyer_info(row): + raw = row['进口代理_Raw'] if trade_type == '外贸' and str(row['进口代理_Raw']) not in ['', 'nan'] else row[ + '合同买方_Raw'] + return self.parse_buyer_info(str(raw)) + + buyer_infos = df_unique.apply(get_buyer_info, axis=1) + result['买方单位'] = [x['name'] for x in buyer_infos] + + if any("买方信息" in c for c in target_cols): + col_name = next(c for c in target_cols if "买方信息" in c) + result[col_name] = [x['contact_full'] for x in buyer_infos] + + if '收货人信息' in target_cols: result['收货人信息'] = result['买方单位'] + + result['厂家'] = df_unique['厂家_Val'] + if '币种' in target_cols: result['币种'] = df_unique['货币'] + if '发货港' in target_cols: result['发货港'] = df_unique['发货地'] + if '目的港' in target_cols: result['目的港'] = df_unique['目的港'] + + discount_col = '折扣率' if '折扣率' in target_cols else '折扣率(%)' + result[discount_col] = df_unique['折扣率'].apply(self.format_percent_str) + + result['合同标的'] = df_unique['_item_name'] + if '型号/货号' in target_cols: result['型号/货号'] = df_unique['_item_model'] + if '型号' in target_cols: result['型号'] = df_unique['_item_model'] + result['数量'] = df_unique['_item_qty'] + result['单位'] = df_unique['_item_unit'] + + if '合同额' in target_cols: result['合同额'] = df_unique['_item_price'].apply(self.format_money_str) + + total_col_name = '总合同额' if '总合同额' in target_cols else '合同总额' + result[total_col_name] = df_unique['合同总额'].apply(self.format_money_str) + + result['收款情况'] = df_unique['收款状态'].fillna('').astype(str).str.strip() + + is_received = result['收款情况'].str.contains('已收') + result['已收款'] = "" + result['未收款'] = "" + result.loc[is_received, '已收款'] = result.loc[is_received, total_col_name] + result.loc[is_received, '未收款'] = "0.00" + + result['外购'] = df_unique['外购产品金额'].apply(self.format_money_str) + result['收款日期'] = df_unique['最新收款日期'].apply(self.format_date_str) + + if '最晚发货期' in target_cols: result['最晚发货期'] = df_unique['最晚发货期'].apply(self.format_date_str) + if '付款方式' in target_cols: result['付款方式'] = df_unique['付款比例及期限'] + if '发货日期' in target_cols: result['发货日期'] = "" + + # [修改点] 这里只取 raw value,后续也不格式化 + if '合同币种/美元' in target_cols: + # 确保转为字符串,避免 float 警告 + result['合同币种/美元'] = df_unique['合同币种/美元'].fillna("").astype(str) + + result['_sort_price'] = df_unique['_sort_price'] + + for col in target_cols: + if col not in result.columns: result[col] = "" + + return result[target_cols + ['_sort_price']] + + def prepare_new_data_detail(self, csv_df, trade_type, target_cols, col_factory): + if csv_df.empty: return pd.DataFrame(columns=target_cols) + new_rows = csv_df.apply(lambda r: self.process_row_detail_single(r, col_factory, trade_type), axis=1) + if isinstance(new_rows, pd.Series): new_rows = pd.DataFrame([new_rows]) + for col in target_cols: + if col not in new_rows.columns: new_rows[col] = "" + return new_rows[target_cols] + + def process_row_detail_single(self, row, col_factory, trade_type): + if trade_type == '外贸': + target_cols = self.cols_foreign_detail + else: + target_cols = self.cols_domestic_detail + new_row = {col: "" for col in target_cols} + + detail_manuf_val = str(row.get(col_factory, '')) + raw_order_no = str(row.get('原始_合同订单编号', row.get('合同订单编号', ''))).strip() + parts_no = raw_order_no.split() + new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else raw_order_no + + new_row['销售员'] = row.get('负责人', '') + new_row['厂家'] = detail_manuf_val + new_row['货号'] = row.get('产品编码', '') + + if trade_type == '外贸': + new_row['币种'] = row.get('原币种', '') + else: + new_row['外币币种'] = row.get('原币种', '') + + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + if '/' in target_raw: + new_row['合同标的'] = target_raw.split('/')[0].strip() + else: + new_row['合同标的'] = target_raw.split('\n')[0].strip() + + csv_qty = str(row.get('数量', '')).strip() + if csv_qty and csv_qty.lower() != 'nan': new_row['数量'] = csv_qty + + val_product_subtotal = self.safe_float(row.get('产品小计', 0)) + if '外购' in detail_manuf_val: + new_row['外购'] = self.format_money_str(val_product_subtotal) + remark = str(row.get('备注', '')).strip() + if not remark or remark.lower() == 'nan': + outsourced = str(row.get('外购产品明细', '')).strip() + new_row['产品描述'] = outsourced if outsourced.lower() != 'nan' else "" + else: + new_row['产品描述'] = remark + else: + new_row['外购'] = "" + new_row['产品描述'] = row.get('产品名称', '') + if '外币报价单价' in new_row: new_row['外币报价单价'] = self.format_money_str(row.get('美元报价', '')) + if '报价单价' in new_row: new_row['报价单价'] = self.format_money_str(row.get('美元报价', '')) + if '报价RMB总价' in new_row: new_row['报价RMB总价'] = self.format_money_str(row.get('产品小计', '')) + if '报价总价' in new_row: new_row['报价总价'] = self.format_money_str(row.get('产品小计', '')) + + # [修改点] 汇率不格式化,只转字符串 + rate_val = str(row.get('汇率', '')).strip() + if rate_val.lower() == 'nan': rate_val = "" + + if '计算汇率' in new_row: new_row['计算汇率'] = rate_val + if '合同币种/美元' in new_row: new_row['合同币种/美元'] = rate_val + + discount_col = '折扣率' if '折扣率' in new_row else '折扣率(%)' + if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', '')) + + if '售价RMB单价' in new_row: new_row['售价RMB单价'] = self.format_money_str(row.get('销售单价', '')) + if '销售单价' in new_row: new_row['销售单价'] = self.format_money_str(row.get('销售单价', '')) + if '售价RMB总价' in new_row: new_row['售价RMB总价'] = self.format_money_str(row.get('销售总价', '')) + if '销售总价' in new_row: new_row['销售总价'] = self.format_money_str(row.get('销售总价', '')) + + new_row['外购转美元'] = self.format_money_str(row.get('外购转美元', '')) + new_row['报价总价美元'] = self.format_money_str(row.get('报价总价美元', '')) + new_row['净合同额美元'] = self.format_money_str(row.get('净合同额美元', '')) + if '报价RMB单价' in new_row: new_row['报价RMB单价'] = self.format_money_str(row.get('报价RMB单价', '')) + + return pd.Series(new_row) + + def prepare_new_data_om(self, csv_df, target_cols): + if csv_df.empty: return pd.DataFrame(columns=target_cols) + + def extract_items(row): + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + lines = [line.strip() for line in target_raw.split('\n') if line.strip()] + items = [] + if not lines: + items.append(self.parse_single_line_subject("")) + else: + for line in lines: items.append(self.parse_single_line_subject(line)) + return items + + parsed_series = csv_df.apply(extract_items, axis=1) + expanded_data = [] + for idx, row in enumerate(csv_df.itertuples(index=False)): + r_dict = csv_df.iloc[idx].to_dict() + items = parsed_series.iloc[idx] + for item in items: + row_base = { + 'Clean_ID': r_dict.get('Clean_ID', ''), + '原始_合同订单编号': r_dict.get('原始_合同订单编号', ''), + '收款账户': r_dict.get('收款账户', ''), + '签约日期': r_dict.get('签约日期', ''), + '负责人': r_dict.get('负责人', ''), + '客户名称': r_dict.get('客户名称', ''), + '联系人姓名': r_dict.get('联系人姓名', ''), + '合同买方_Raw': r_dict.get('合同买方(名称/联系人/电话/邮箱)', ''), + '合同总额': r_dict.get('合同总额', ''), + '收款状态': r_dict.get('收款状态', ''), + '最新收款日期': r_dict.get('最新收款日期', ''), + '_item_name': item['name'], + '_sort_price': item['sort_price'] + } + expanded_data.append(row_base) + + df_expanded = pd.DataFrame(expanded_data) + if df_expanded.empty: return pd.DataFrame(columns=target_cols) + + df_expanded.sort_values(by=['Clean_ID', '_sort_price'], ascending=[True, False], inplace=True) + df_unique = df_expanded.drop_duplicates(subset=['Clean_ID'], keep='first').copy() + + result = pd.DataFrame(index=df_unique.index) + parts = df_unique['原始_合同订单编号'].str.split(n=1, expand=True) + result['合同编号'] = parts[0] + result['内贸合同号'] = parts[1] if parts.shape[1] > 1 else "" + + result['合同总额'] = df_unique['合同总额'].apply(self.format_money_str) + result['收款情况'] = df_unique['收款状态'].fillna('').astype(str).str.strip() + is_received = result['收款情况'].str.contains('已收') + result['已收款'] = "" + result['未收款'] = "" + result.loc[is_received, '已收款'] = result.loc[is_received, '合同总额'] + result.loc[is_received, '未收款'] = "0.00" + + result['签署公司'] = df_unique['收款账户'] + result['签订日期'] = df_unique['签约日期'].apply(self.format_date_str) + result['销售员'] = df_unique['负责人'] + result['最终用户单位'] = df_unique['客户名称'] + + if '最终用户信息\n联系人、电话、邮箱' in target_cols: + result['最终用户信息\n联系人、电话、邮箱'] = df_unique['联系人姓名'] + + buyer_infos = df_unique['合同买方_Raw'].astype(str).apply(self.parse_buyer_info) + result['买方单位'] = [x['name'] for x in buyer_infos] + if '买方信息\n联系人、电话、邮箱' in target_cols: + result['买方信息\n联系人、电话、邮箱'] = [x['contact_full'] for x in buyer_infos] + + result['收款日期'] = df_unique['最新收款日期'].apply(self.format_date_str) + result['合同标的'] = df_unique['_item_name'] + result['_sort_price'] = df_unique['_sort_price'] + + return result[target_cols + ['_sort_price']] + + # ========================================== + # 核心优化:智能防覆盖 + 消除类型警告 + # ========================================== + + def merge_datasets(self, old_dfs, csv_df, is_asd): + col_gen = '厂家' + col_det = '厂家.1' if '厂家.1' in csv_df.columns else '厂家' + + if is_asd: + df_subset = csv_df[csv_df[col_gen].str.contains('ASD', case=False, na=False)] + else: + df_subset = csv_df[~csv_df[col_gen].str.contains('ASD', case=False, na=False)] + + csv_foreign = df_subset[df_subset['合同类型'] == '外贸'].copy() + csv_domestic = df_subset[df_subset['合同类型'] == '内贸'].copy() + csv_om = df_subset[~df_subset['合同类型'].isin(['外贸', '内贸'])].copy() + + result_dfs = {} + + def vectorized_merge(old_df, new_df, unique_col, target_columns): + if new_df.empty: + if old_df is None or old_df.empty: return pd.DataFrame(columns=target_columns + ['_status']) + old_df['_status'] = '' + return old_df + + for col in target_columns: + if col not in new_df.columns: new_df[col] = "" + if '_sort_price' not in new_df.columns: new_df['_sort_price'] = 0.0 + + if old_df is None or old_df.empty: + combined = new_df.copy() + combined['_status'] = 'new' + return combined + + old_df = old_df.copy() + if unique_col not in old_df.columns: old_df[unique_col] = "" + if '_status' not in old_df.columns: old_df['_status'] = '' + + # === 消除 FutureWarning 核心 === + # 将旧数据中所有目标列强制转换为 object (字符串/混合),防止 float/int 写入 str 报错 + for col in target_columns: + if col in old_df.columns: + old_df[col] = old_df[col].astype(object) + + # === 总表 (ID 唯一) - 智能字段级更新 === + is_unique_index = (old_df[unique_col].duplicated().sum() == 0) and ( + new_df[unique_col].duplicated().sum() == 0) + + if is_unique_index: + old_df.set_index(unique_col, inplace=True, drop=False) + new_df.set_index(unique_col, inplace=True, drop=False) + + # 1. 纯新增行 + new_ids = new_df.index.difference(old_df.index) + rows_new = new_df.loc[new_ids].copy() + rows_new['_status'] = 'new' + + # 2. 共有行:逐列智能检查 + common_ids = new_df.index.intersection(old_df.index) + if not common_ids.empty: + for col in target_columns: + new_vals = new_df.loc[common_ids, col].astype(str).str.strip() + old_vals = old_df.loc[common_ids, col].fillna("").astype(str).str.strip() + + # 防覆盖核心: + # 1. 新数据非空 + # 2. 新旧不一致 + valid_new_mask = (new_vals != "") & (new_vals != "nan") & (new_vals != "None") + diff_mask = valid_new_mask & (new_vals != old_vals) + + ids_to_update = diff_mask[diff_mask].index + + if not ids_to_update.empty: + old_df.loc[ids_to_update, col] = new_df.loc[ids_to_update, col] + old_df.loc[ids_to_update, '_status'] = 'modified' + + old_df.loc[common_ids, '_sort_price'] = new_df.loc[common_ids, '_sort_price'] + + old_df.reset_index(drop=True, inplace=True) + rows_new.reset_index(drop=True, inplace=True) + final_df = pd.concat([old_df, rows_new], ignore_index=True) + return final_df + + else: + # === 明细表 (ID 不唯一) - 增量追加 === + new_ids = set(new_df[unique_col]) - set(old_df[unique_col]) + rows_to_add = new_df[new_df[unique_col].isin(new_ids)].copy() + rows_to_add['_status'] = 'new' + final_df = pd.concat([old_df, rows_to_add], ignore_index=True) + return final_df + + target_cols_foreign = self.cols_asd_foreign_general if is_asd else self.cols_nonasd_foreign_general + old_gen = old_dfs.get('外贸', pd.DataFrame(columns=target_cols_foreign)) + new_gen_df = self.prepare_new_data_general(csv_foreign, '外贸', target_cols_foreign, col_gen) + result_dfs['外贸'] = vectorized_merge(old_gen, new_gen_df, '合同编号', target_cols_foreign) + + old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.cols_foreign_detail)) + new_det_df = self.prepare_new_data_detail(csv_foreign, '外贸', self.cols_foreign_detail, col_det) + result_dfs['外贸明细'] = vectorized_merge(old_det, new_det_df, '合同编号', self.cols_foreign_detail) + + old_dom_gen = old_dfs.get('内贸', pd.DataFrame(columns=self.cols_domestic_general)) + new_dom_df = self.prepare_new_data_general(csv_domestic, '内贸', self.cols_domestic_general, col_gen) + result_dfs['内贸'] = vectorized_merge(old_dom_gen, new_dom_df, '合同编号', self.cols_domestic_general) + + old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.cols_domestic_detail)) + new_dom_det_df = self.prepare_new_data_detail(csv_domestic, '内贸', self.cols_domestic_detail, col_det) + result_dfs['内贸明细'] = vectorized_merge(old_dom_det, new_dom_det_df, '合同编号', self.cols_domestic_detail) + + old_om = old_dfs.get('OM合同', pd.DataFrame(columns=self.cols_om)) + new_om_df = self.prepare_new_data_om(csv_om, self.cols_om) + result_dfs['OM合同'] = vectorized_merge(old_om, new_om_df, '合同编号', self.cols_om) + + return result_dfs + + def apply_formatting_to_all(self, data_dict): + for sheet_name, df in data_dict.items(): + if df.empty: continue + for col in self.money_cols: + if col in df.columns: df[col] = df[col].apply(self.format_money_str) + for col in self.percent_cols: + if col in df.columns: df[col] = df[col].apply(self.format_percent_str) + for col in self.date_cols: + if col in df.columns: df[col] = df[col].apply(self.format_date_str) + return data_dict + + +# ========================================== +# 第二部分:GUI 界面 (布局修复 + 逻辑修复) +# ========================================== + +class ContractApp: + def __init__(self, root): + self.root = root + self.root.title("合同数据处理系统 V4.6 (布局与类型警告修复版)") + self.root.geometry("1300x850") + + # 允许窗口调整大小,但最小尺寸有限制 + self.root.minsize(1000, 700) + + self.colors = { + 'bg': '#F0F2F5', + 'panel': '#FFFFFF', + 'primary': '#1890FF', + 'primary_hover': '#40A9FF', + 'success': '#52C41A', + 'success_hover': '#73D13D', + 'text_main': '#262626', + 'text_sub': '#8C8C8C', + 'border': '#D9D9D9', + 'tag_new': '#FFFBE6', + 'tag_mod': '#E6F7FF' + } + + self.setup_styles() + self.processor = DataProcessor() + + self.csv_paths = tk.StringVar() + self.asd_path = tk.StringVar() + self.non_asd_path = tk.StringVar() + self.status_var = tk.StringVar(value="准备就绪") + + self.final_data = {} + self.create_widgets() + + def setup_styles(self): + self.style = ttk.Style() + self.style.theme_use('clam') + + self.style.configure("TFrame", background=self.colors['bg']) + self.style.configure("Panel.TFrame", background=self.colors['panel'], relief="flat") + self.style.configure("TLabel", background=self.colors['panel'], foreground=self.colors['text_main'], + font=("Microsoft YaHei UI", 10)) + self.style.configure("Header.TLabel", font=("Microsoft YaHei UI", 18, "bold"), background=self.colors['bg'], + foreground=self.colors['text_main']) + self.style.configure("SubHeader.TLabel", font=("Microsoft YaHei UI", 12, "bold"), + background=self.colors['panel'], foreground=self.colors['text_main']) + self.style.configure("Status.TLabel", background=self.colors['bg'], foreground=self.colors['text_sub'], + font=("Microsoft YaHei UI", 9)) + self.style.configure("TButton", font=("Microsoft YaHei UI", 10), borderwidth=0, padding=8) + self.style.map("TButton", background=[('active', '#E0E0E0')], relief=[('pressed', 'sunken')]) + self.style.configure("Primary.TButton", background=self.colors['primary'], foreground='white') + self.style.map("Primary.TButton", background=[('active', self.colors['primary_hover'])]) + self.style.configure("Success.TButton", background=self.colors['success'], foreground='white') + self.style.map("Success.TButton", background=[('active', self.colors['success_hover'])]) + self.style.configure("TEntry", fieldbackground="white", padding=5) + self.style.configure("Treeview", background="white", foreground=self.colors['text_main'], rowheight=30, + font=("Microsoft YaHei UI", 9), fieldbackground="white") + self.style.configure("Treeview.Heading", font=("Microsoft YaHei UI", 10, "bold"), background="#FAFAFA", + foreground=self.colors['text_main'], relief="flat") + self.style.map("Treeview", background=[('selected', self.colors['primary_hover'])], + foreground=[('selected', 'white')]) + + def create_widgets(self): + # 1. 顶部 Header (Pack Top) + header_frame = ttk.Frame(self.root) + header_frame.pack(side="top", fill="x", padx=25, pady=(25, 10)) + ttk.Label(header_frame, text="🚀 合同数据智能处理系统", style="Header.TLabel").pack(side="left") + + # 2. 底部按钮栏 (Pack Bottom 优先! 确保永远可见) + bottom_bar = ttk.Frame(self.root, style="Panel.TFrame", padding=15) + bottom_bar.pack(side="bottom", fill="x", padx=25, pady=(0, 25)) + + legend_frame = ttk.Frame(bottom_bar, style="Panel.TFrame") + legend_frame.pack(side="left") + self.create_legend(legend_frame, "● 新增数据", self.colors['tag_new'], "#D48806") + self.create_legend(legend_frame, "● 发生变更", self.colors['tag_mod'], self.colors['primary']) + + ttk.Label(bottom_bar, textvariable=self.status_var, style="Status.TLabel").pack(side="left", padx=20) + ttk.Button(bottom_bar, text="💾 确认无误,保存写入", style="Success.TButton", command=self.save_files).pack( + side="right") + + # 3. 输入面板 (Pack Top, under header) + input_panel = ttk.Frame(self.root, style="Panel.TFrame", padding=25) + input_panel.pack(side="top", fill="x", padx=25, pady=5) + + ttk.Label(input_panel, text="文件配置与导入", style="SubHeader.TLabel").grid(row=0, column=0, columnspan=3, + sticky="w", pady=(0, 20)) + + self.create_file_row(input_panel, "📂 导入 CSV 源文件 (支持多选):", self.csv_paths, 1, is_multiple=True) + self.create_file_row(input_panel, "📘 旧 ASD Excel 文件:", self.asd_path, 2) + self.create_file_row(input_panel, "📗 旧 非ASD Excel 文件:", self.non_asd_path, 3) + + btn_frame = ttk.Frame(input_panel, style="Panel.TFrame") + btn_frame.grid(row=4, column=0, columnspan=3, pady=(20, 0), sticky="e") + + ttk.Button(btn_frame, text="▶ 开始极速处理 (仅预览)", style="Primary.TButton", + command=self.process_files).pack(side="right") + + # 4. 中间预览区域 (Pack Fill Both, Expand True) - 填充剩余所有空间 + self.notebook = ttk.Notebook(self.root) + self.notebook.pack(side="top", fill="both", expand=True, padx=25, pady=15) + + def create_file_row(self, parent, label_text, var, row_idx, is_multiple=False): + lbl = ttk.Label(parent, text=label_text, width=28) + lbl.grid(row=row_idx, column=0, sticky="w", pady=8) + + entry = ttk.Entry(parent, textvariable=var, font=("Microsoft YaHei UI", 9)) + entry.grid(row=row_idx, column=1, sticky="ew", padx=10, pady=8) + + btn = ttk.Button(parent, text="浏览...", width=8, command=lambda: self.browse_file(var, is_multiple)) + btn.grid(row=row_idx, column=2, padx=5) + + parent.columnconfigure(1, weight=1) + + def create_legend(self, parent, text, bg_color, fg_color): + f = tk.Frame(parent, bg=bg_color, padx=10, pady=4) + f.pack(side="left", padx=5) + tk.Label(f, text=text, bg=bg_color, fg=fg_color, font=("Microsoft YaHei UI", 9, "bold")).pack() + + def browse_file(self, variable, is_multiple=False): + if is_multiple: + files = filedialog.askopenfilenames(filetypes=[("CSV Files", "*.csv")]) + if files: variable.set("; ".join(files)) + else: + f = filedialog.askopenfilename(filetypes=[("Excel/CSV Files", "*.csv;*.xlsx")]) + if f: variable.set(f) + + def load_excel_safe(self, path): + if not path or not os.path.exists(path): return {} + try: + dfs = pd.read_excel(path, sheet_name=None) + clean_dfs = {} + for k, v in dfs.items(): + new_columns = [] + for col in v.columns: + clean_col = self.processor.clean_header_key(str(col)) + if clean_col in self.processor.standard_col_map: + new_columns.append(self.processor.standard_col_map[clean_col]) + elif col in self.processor.legacy_map: + new_columns.append(self.processor.legacy_map[col]) + else: + new_columns.append(col) + v.columns = new_columns + v = v.loc[:, ~v.columns.duplicated()] + if '合同编号' in v.columns: v['合同编号'] = v['合同编号'].astype(str) + clean_dfs[k.strip()] = v + return clean_dfs + except Exception as e: + messagebox.showwarning("读取错误", f"读取旧文件失败: {path}\n错误: {str(e)}") + return {} + + def process_files(self): + csv_paths_str = self.csv_paths.get() + if not csv_paths_str: + messagebox.showerror("提示", "请先选择 CSV 源文件!") + return + + self.status_var.set("⏳ 正在读取多个数据源...") + self.root.update() + + try: + csv_df, headers_or_msg = self.processor.load_multiple_csvs(csv_paths_str) + if csv_df is None: + messagebox.showerror("读取错误", headers_or_msg) + return + + self.status_var.set("🚀 正在极速合并数据...") + self.root.update() + + self.final_data = {} + + path_asd = self.asd_path.get() + asd_old = self.load_excel_safe(path_asd) + self.final_data['ASD'] = self.processor.merge_datasets(asd_old, csv_df, True) + + path_non = self.non_asd_path.get() + non_old = self.load_excel_safe(path_non) + self.final_data['NonASD'] = self.processor.merge_datasets(non_old, csv_df, False) + + self.final_data['ASD'] = self.processor.apply_formatting_to_all(self.final_data['ASD']) + self.final_data['NonASD'] = self.processor.apply_formatting_to_all(self.final_data['NonASD']) + + self.refresh_preview() + self.status_var.set("✅ 预览已生成。确认无误后请点击右下角保存!") + messagebox.showinfo("完成", + "数据预览已生成!\n\n注意:此时尚未写入文件。\n请在下方检查数据,确认无误后点击 [保存] 按钮。") + + except Exception as e: + self.status_var.set("❌ 发生错误") + traceback.print_exc() + messagebox.showerror("运行错误", str(e)) + + def refresh_preview(self): + for tab in self.notebook.tabs(): + self.notebook.forget(tab) + + for file_type in ['ASD', 'NonASD']: + if file_type not in self.final_data: continue + + data_dict = self.final_data[file_type] + main_frame = ttk.Frame(self.notebook, style="Panel.TFrame", padding=10) + self.notebook.add(main_frame, text=f" {file_type} 预览 ") + + inner_notebook = ttk.Notebook(main_frame) + inner_notebook.pack(fill="both", expand=True) + + sheet_order = ['外贸', '外贸明细', '内贸', '内贸明细', 'OM合同'] + for sheet_name in sheet_order: + if sheet_name in data_dict: + df = data_dict[sheet_name] + if not df.empty: + if '合同编号' in df.columns: + df['合同编号'] = df['合同编号'].astype(str) + sort_cols = ['合同编号'] + asc_order = [True] + if '_sort_price' in df.columns: + sort_cols.append('_sort_price') + asc_order.append(False) + df = df.sort_values(by=sort_cols, ascending=asc_order) + + if '明细' in sheet_name: + mask = df.duplicated(subset=['合同编号'], keep='first') + df.loc[mask, '合同标的'] = "" + + standard_cols = [] + is_asd = (file_type == 'ASD') + if sheet_name == '外贸': + standard_cols = self.processor.cols_asd_foreign_general if is_asd else self.processor.cols_nonasd_foreign_general + elif sheet_name == '内贸': + standard_cols = self.processor.cols_domestic_general + elif sheet_name == 'OM合同': + standard_cols = self.processor.cols_om + elif sheet_name == '外贸明细': + standard_cols = self.processor.cols_foreign_detail + elif sheet_name == '内贸明细': + standard_cols = self.processor.cols_domestic_detail + + self.create_treeview(inner_notebook, df, sheet_name, standard_cols) + + def create_treeview(self, parent, df, title, target_cols): + frame = ttk.Frame(parent) + parent.add(frame, text=f" {title} ") + + scroll_y = ttk.Scrollbar(frame, orient="vertical") + scroll_x = ttk.Scrollbar(frame, orient="horizontal") + + tree = ttk.Treeview(frame, columns=target_cols, show='headings', + yscrollcommand=scroll_y.set, xscrollcommand=scroll_x.set) + + scroll_y.config(command=tree.yview) + scroll_x.config(command=tree.xview) + + scroll_y.pack(side="right", fill="y") + scroll_x.pack(side="bottom", fill="x") + tree.pack(fill="both", expand=True) + + for col in target_cols: + clean_header = col.replace('\n', ' ') + tree.heading(col, text=clean_header) + tree.column(col, width=130, anchor="center") + + tree.tag_configure('new', background=self.colors['tag_new']) + tree.tag_configure('modified', background=self.colors['tag_mod'], foreground=self.colors['primary']) + tree.tag_configure('odd', background='white') + tree.tag_configure('even', background='#FAFAFA') + + if not df.empty: + df_display = df.fillna("") + last_contract_id = None + + count = 0 + for _, row in df_display.iterrows(): + values = [] + for col in target_cols: + val = row.get(col, "") + if '明细' in title and col == '合同标的': + current_id = row.get('合同编号', '') + if current_id == last_contract_id: val = "" + values.append(val) + + if '明细' in title: last_contract_id = row.get('合同编号', '') + + status = row.get('_status', '') + tags = [status] if status else [('even' if count % 2 == 0 else 'odd')] + tree.insert("", "end", values=values, tags=tags) + count += 1 + + tree.bind("", lambda event: self.on_double_click(event, tree)) + + def on_double_click(self, event, tree): + region = tree.identify("region", event.x, event.y) + if region != "cell": return + column = tree.identify_column(event.x) + row_id = tree.identify_row(event.y) + col_idx = int(column.replace('#', '')) - 1 + col_name = tree['columns'][col_idx] + current_val = tree.item(row_id, "values")[col_idx] + + new_val = simpledialog.askstring("快速编辑", f"修改 [{col_name}]:", initialvalue=current_val, parent=self.root) + if new_val is not None: + current_values = list(tree.item(row_id, "values")) + current_values[col_idx] = new_val + tree.item(row_id, values=current_values) + + def save_files(self): + if not self.final_data: + messagebox.showwarning("提示", "没有可保存的数据,请先处理文件!") + return + + csv_path_str = self.csv_paths.get() + first_path = csv_path_str.split(';')[0].strip() if csv_path_str else "" + base_dir = os.path.dirname(first_path) if first_path else "" + + confirm = messagebox.askyesno("确认写入", + "您确定要将预览的数据写入到 Excel 文件吗?\n\n此操作将覆盖目标文件中的数据。") + if not confirm: + self.status_var.set("已取消写入") + return + + self.status_var.set("💾 正在写入文件...") + self.root.update() + + try: + for file_type, sheets in self.final_data.items(): + target_path = "" + if file_type == 'ASD': + target_path = self.asd_path.get() + if not target_path: target_path = os.path.join(base_dir, "ASD_Combined.xlsx") + elif file_type == 'NonASD': + target_path = self.non_asd_path.get() + if not target_path: target_path = os.path.join(base_dir, "NonASD_Combined.xlsx") + + with pd.ExcelWriter(target_path, engine='openpyxl') as writer: + valid_sheets = ['外贸', '外贸明细', '内贸', '内贸明细', 'OM合同'] + for sheet_name in valid_sheets: + if sheet_name in sheets: + df = sheets[sheet_name] + if '合同编号' in df.columns: + sort_cols = ['合同编号'] + asc_order = [True] + if '_sort_price' in df.columns: + sort_cols.append('_sort_price') + asc_order.append(False) + df = df.sort_values(by=sort_cols, ascending=asc_order) + + save_df = df.drop(columns=['_status', '_sort_price'], errors='ignore') + + if not save_df.empty: + if '明细' in sheet_name: + mask = save_df.duplicated(subset=['合同编号'], keep='first') + save_df.loc[mask, '合同标的'] = "" + save_df.to_excel(writer, sheet_name=sheet_name, index=False) + + self.status_var.set("✅ 写入成功!") + messagebox.showinfo("成功", f"文件已成功写入!\n位置: {base_dir or '当前目录'}") + except PermissionError: + messagebox.showerror("保存失败", "文件被占用!\n请先关闭 Excel 文件后再点击保存。") + except Exception as e: + messagebox.showerror("保存失败", str(e)) + finally: + if self.status_var.get() != "已取消写入": + self.status_var.set("准备就绪") + + +if __name__ == "__main__": + root = tk.Tk() + try: + from ctypes import windll + + windll.shcore.SetProcessDpiAwareness(1) + except: + pass + app = ContractApp(root) + root.mainloop() \ No newline at end of file diff --git a/页面.py b/页面.py index 100a61f..476c72c 100644 --- a/页面.py +++ b/页面.py @@ -4,6 +4,7 @@ from tkinter import ttk, filedialog, messagebox, simpledialog import os import numpy as np import re +from datetime import datetime # ========================================== @@ -12,58 +13,113 @@ import re class DataProcessor: def __init__(self): - # 1. 总表表头 (保持不变,严格去空格) - self.columns_general = [ - "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", - "销售员", "最终用户单位", "最终用户信息联系人、电话、邮箱", "最终用户所在地", + # ==================== 1. 外贸总表表头 ==================== + self.cols_asd_foreign_general = [ + "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", - "合同", "总合同额", "外购", "已收款", "未收款", "收款日期", + "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", - "买方单位", "买方信息联系人、电话、邮箱", "收货人信息" + "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", + "转为美元净合同额", "转为美元总合同额" ] - # 内贸总表表头 - self.columns_domestic_general = [c if c != "外贸合同号" else "内贸合同号" for c in self.columns_general] - - # 2. [关键修改] 明细表表头 (完全按照你的截图顺序和名称定义) - self.columns_detail = [ - "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", - "净合同额美元", "外购", "计算汇率", "外购转美元", "报价总价美元", - "数量", "单位", "外币币种", "外币报价单价", - "报价RMB单价", "报价RMB总价", "售价RMB单价", "售价RMB总价", "折扣率(%)" + self.cols_nonasd_foreign_general = [ + "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率", + "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期", + "最晚发货期", "付款方式", "发货港", "目的港", "发货日期", + "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息", + "合同币种/美元", "转为美元净合同额", "转为美元总合同额" ] - # OM合同表头 (保持不变) - self.columns_om = [ - "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", - "销售员", "最终用户单位", "最终用户信息联系人、电话、邮箱", "最终用户所在地", - "买方单位", "买方信息联系人、电话、邮箱", "合同标的", "合同总额", - "已收款", "未收款", "收款日期" + # ==================== 2. 内贸总表表头 ==================== + self.cols_domestic_general = [ + "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "买方单位", "买方信息\n联系人、电话、邮箱", + "厂家", "型号", "合同标的", "数量", "单位", "折扣率(%)", + "合同额", "合同总额", "外购", "付款方式", "最晚发货期", + "已收款", "未收款", "收款日期", + "转为美元净合同额", "转为美元总合同额" ] - # [修改] 定义需要保留两位小数的金额列 (根据新表头更新) - self.money_cols = set([ - "合同", "总合同额", "外购", "已收款", "未收款", - "净合同额美元", "外购转美元", "报价总价美元", - "外币报价单价", "报价RMB单价", "报价RMB总价", - "售价RMB单价", "售价RMB总价", "外购产品金额" + # ==================== 3. 外贸明细表头 ==================== + self.cols_foreign_detail = [ + "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", + "币种", "报价单价", "报价总价", "销售单价", "销售总价", "折扣率", + "外购", "合同币种/美元", "外购转美元", "报价总价美元", "净合同额美元" + ] + + # ==================== 4. 内贸明细表头 ==================== + self.cols_domestic_detail = [ + "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位", + "外币币种", "外币报价单价", "报价RMB单价", "报价RMB总价", + "售价RMB单价", "售价RMB总价", "折扣率(%)", "外购", + "计算汇率", "外购转美元", "报价总价美元", "净合同额美元" + ] + + # ==================== 5. OM合同表头 ==================== + self.cols_om = [ + "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员", + "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地", + "买方单位", "买方信息\n联系人、电话、邮箱", "合同标的", + "合同总额", "已收款", "未收款", "收款日期" + ] + + # [逻辑] 只写在“第一行”(单价最高行)的列 + self.header_only_cols = set([ + "总合同额", "合同总额", "外购", "付款方式", "最晚发货期", + "已收款", "未收款", "收款日期", "收款情况", + "转为美元净合同额", "转为美元总合同额" ]) - # [修改] 定义需要百分比展示的列 (根据新表头更新) + # [逻辑] 金额列 (保留两位小数) + self.money_cols = set([ + "合同额", "总合同额", "合同总额", "外购", "已收款", "未收款", + "净合同额美元", "外购转美元", "报价总价美元", + "外币报价单价", "报价RMB单价", "报价RMB总价", + "售价RMB单价", "售价RMB总价", "外购产品金额", + "转为美元净合同额", "转为美元总合同额", "报价单价", "报价总价", "销售单价", "销售总价" + ]) + + # [逻辑] 比率列 (百分比展示) self.percent_cols = set([ "折扣率", "折扣率(%)", "计算汇率", "合同币种/美元" ]) - # 旧表头映射字典 (现在代码标准已更新为Excel标准,这个字典主要用于兼容总表的旧名称) - # 注意:明细表现在不需要映射了,因为 self.columns_detail 已经和 Excel 一样了 + # [新增逻辑] 日期列 (需要去除时分秒) + self.date_cols = set([ + "合同签订日期", "签订日期", "收款日期", "最晚发货期", "发货日期" + ]) + + # [逻辑] 旧表头映射 (用于读取旧Excel时兼容) self.legacy_map = { - "外币币种": "币种", # 仅用于总表可能的兼容 + "外币币种": "币种", "汇率": "计算汇率", - # 如果旧Excel里的总表还在用"折扣率(%)",映射回总表的"折扣率" - "折扣率(%)": "折扣率" + "折扣率(%)": "折扣率", + "折扣率(%)": "折扣率(%)", + "合同": "合同额" } - self.source_cols_processed = [] + # [核心] 构建所有标准列名的快速查找字典 (清洗后的key -> 标准带换行的key) + # 目的:无论Excel里是 "最终用户信息联系人..." 还是 "最终用户信息\n联系人...", 都能映射回标准 + self.standard_col_map = {} + all_lists = [ + self.cols_asd_foreign_general, self.cols_nonasd_foreign_general, + self.cols_domestic_general, self.cols_foreign_detail, + self.cols_domestic_detail, self.cols_om + ] + for lst in all_lists: + for col in lst: + clean_key = self.clean_header_key(col) + self.standard_col_map[clean_key] = col + + def clean_header_key(self, text): + """清洗表头:去除换行、空格、制表符,只保留纯文本""" + if not isinstance(text, str): return str(text) + return re.sub(r'[\s\n\r]+', '', text) def safe_float(self, val): try: @@ -93,6 +149,20 @@ class DataProcessor: except: return str(val) + def format_date_str(self, val): + """格式化日期:去除时分秒,统一为 YYYY-MM-DD""" + if pd.isna(val) or str(val).strip() == "": return "" + try: + # 如果已经是短日期字符串,直接返回 + s_val = str(val).strip() + # 尝试解析 + dt = pd.to_datetime(val, errors='coerce') + if pd.isnull(dt): + return s_val # 解析失败返回原样 + return dt.strftime('%Y-%m-%d') + except: + return str(val) + def normalize_for_compare(self, val): if pd.isna(val) or val is None: return "" s_val = str(val).strip() @@ -127,64 +197,6 @@ class DataProcessor: return df, (col_factory_general, col_factory_detail) - def parse_complex_subject(self, text): - res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': ''} - if not isinstance(text, str) or not text.strip(): return res - text = text.strip() - - name_patterns = [r'(?:中文品名|中文名称|名称|Name)[::]\s*(.*?)(?:\n|$)', r'(?:英文名称)[::]\s*(.*?)(?:\n|$)'] - for p in name_patterns: - m = re.search(p, text, re.IGNORECASE) - if m and not res['name']: res['name'] = m.group(1).strip() - - model_patterns = [r'(?:型号|Model)[::]\s*(.*?)(?:\n|$)'] - for p in model_patterns: - m = re.search(p, text, re.IGNORECASE) - if m: res['model'] = m.group(1).strip() - - brand_match = re.search(r'(?:品牌|Brand)[::]\s*(.*?)(?:\n|$)', text, re.IGNORECASE) - if brand_match: - brand_str = brand_match.group(1).strip() - if res['model']: - res['model'] = f"{brand_str} {res['model']}" - else: - res['model'] = brand_str - - clean_text = text - for k in ['中文品名', '中文名称', '英文名称', '名称', '型号', 'Model', '品牌', 'Brand']: - clean_text = re.sub(f'{k}.*?(?:\n|$)', '', clean_text, flags=re.IGNORECASE) - - if not res['name'] and '/' in text: - parts = text.split('/') - if len(parts) > 0: res['name'] = parts[0].strip() - - if not res['qty']: - qty_slash = re.search(r'/(\d+(\.\d+)?)/', text) - if qty_slash: - res['qty'] = qty_slash.group(1) - else: - qty_unit_match = re.search(r'(\d+)\s*([台个套件支箱组setpc]+)|([setpc]+)\s*(\d+)', text, re.IGNORECASE) - if qty_unit_match: - if qty_unit_match.group(1): - res['qty'] = qty_unit_match.group(1) - res['unit'] = qty_unit_match.group(2) - else: - res['qty'] = qty_unit_match.group(4) - res['unit'] = qty_unit_match.group(3) - - nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', '').replace(',', '')) - if nums: - candidate = nums[-1] - if candidate != res['qty']: res['price'] = candidate - - if not res['name'] and not res['model'] and '/' in text: - parts = text.split('/') - if len(parts) >= 1: res['name'] = parts[0] - if len(parts) >= 2: res['model'] = parts[1] - if len(parts) >= 3: res['qty'] = parts[2] - if len(parts) >= 4: res['price'] = parts[3] - return res - def parse_buyer_info(self, text): info = {'name': '', 'contact_full': ''} if not isinstance(text, str) or not text.strip(): return info @@ -194,25 +206,63 @@ class DataProcessor: info['contact_full'] = " ".join(lines[1:]) return info - def process_row_general(self, row, trade_type, col_factory): - target_cols = self.columns_general if trade_type == '外贸' else self.columns_domestic_general - new_row = {col: "" for col in target_cols} + def parse_single_line_subject(self, text): + res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0.0} + if not isinstance(text, str) or not text.strip(): return res + text = text.strip() + if '/' in text: + parts = [p.strip() for p in text.split('/')] + if len(parts) >= 1: res['name'] = parts[0] + if len(parts) >= 2: res['model'] = parts[1] + if len(parts) >= 3: + m_qty = re.match(r'^(\d+(\.\d+)?)\s*([\u4e00-\u9fa5a-zA-Z]+)?$', parts[2]) + if m_qty: + res['qty'] = m_qty.group(1) + res['unit'] = m_qty.group(3) if m_qty.group(3) else "" + else: + res['qty'] = parts[2] + + if len(parts) >= 4: + res['price'] = parts[3] + res['sort_price'] = self.safe_float(parts[3]) + return res + + name_match = re.search(r'(?:中文品名|中文名称|名称|Name)[::]\s*(.*?)(?:\n|$)', text, re.IGNORECASE) + if name_match: + res['name'] = name_match.group(1).strip() + else: + res['name'] = text.split('\n')[0] + + nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', '')) + if nums: + res['sort_price'] = self.safe_float(nums[-1]) + res['price'] = nums[-1] + + return res + + # === [核心] 总表处理逻辑 === + def process_row_general_expanded(self, row, trade_type, trade_cols, col_factory): + # 使用传入的 trade_cols (已是根据ASD/NonASD选择好的标准表头) + target_cols = trade_cols + + base_data = {} order_no_raw = str(row.get('合同订单编号', '')).strip() parts_no = order_no_raw.split() - new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw + base_data['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw contract_no_col = '外贸合同号' if trade_type == '外贸' else '内贸合同号' - new_row[contract_no_col] = " ".join(parts_no[1:]) if len(parts_no) > 1 else "" + base_data[contract_no_col] = " ".join(parts_no[1:]) if len(parts_no) > 1 else "" - target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) - parsed_target = self.parse_complex_subject(target_raw) - - new_row['合同标的'] = parsed_target['name'] - new_row['型号/货号'] = parsed_target['model'] - new_row['数量'] = parsed_target['qty'] - new_row['单位'] = parsed_target['unit'] - new_row['合同'] = parsed_target['price'] + # 财务数据 + total_amount = self.format_money_str(row.get('合同总额', '')) + status = str(row.get('收款状态', '')).strip() + received = "" + unreceived = "" + if '已收' in status: + received = total_amount + unreceived = self.format_money_str(0) + # 买方信息 if trade_type == '内贸': buyer_raw = str(row.get('合同买方(名称/联系人/电话/邮箱)', '')) else: @@ -220,41 +270,133 @@ class DataProcessor: if buyer_raw == '' or buyer_raw == 'nan': buyer_raw = str(row.get('合同买方(名称/联系人/电话/邮箱)', '')) parsed_buyer = self.parse_buyer_info(buyer_raw) - new_row['买方单位'] = parsed_buyer['name'] - new_row['买方信息联系人、电话、邮箱'] = parsed_buyer['contact_full'] - new_row['收货人信息'] = parsed_buyer['name'] - total_amount = row.get('合同总额', '') - status = str(row.get('收款状态', '')).strip() - new_row['总合同额'] = total_amount - new_row['收款情况'] = status - if '已收' in status: - new_row['已收款'] = total_amount - new_row['未收款'] = 0 + # 解析标的 + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + lines = [line.strip() for line in target_raw.split('\n') if line.strip()] + + parsed_items = [] + if not lines: + parsed_items.append({'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0}) else: - new_row['已收款'] = "" - new_row['未收款'] = "" + for line in lines: + parsed_items.append(self.parse_single_line_subject(line)) + # 排序并只取第一行 + parsed_items.sort(key=lambda x: x['sort_price'], reverse=True) + best_item = parsed_items[0] + + new_row = {col: "" for col in target_cols} + + new_row['合同编号'] = base_data['合同编号'] + new_row[contract_no_col] = base_data[contract_no_col] new_row['签署公司'] = row.get('收款账户', '') - new_row['合同签订日期'] = row.get('签约日期', '') + + # 日期 (使用新格式化函数) + date_raw = row.get('签约日期', '') + if '合同签订日期' in new_row: new_row['合同签订日期'] = self.format_date_str(date_raw) + if '签订日期' in new_row: new_row['签订日期'] = self.format_date_str(date_raw) + new_row['销售员'] = row.get('负责人', '') new_row['最终用户单位'] = row.get('客户名称', '') - new_row['最终用户信息联系人、电话、邮箱'] = row.get('联系人姓名', '') + + # 处理带换行符的列名映射 + # 通过遍历 target_cols 找到匹配的列 + for col in target_cols: + if "最终用户信息" in col: new_row[col] = row.get('联系人姓名', '') + if "买方信息" in col: new_row[col] = parsed_buyer['contact_full'] + new_row['厂家'] = row.get(col_factory, '') - new_row['币种'] = row.get('货币(选完产品再改)', '') - new_row['外购'] = row.get('外购产品金额', '') - new_row['收款日期'] = row.get('最新收款日期', '') - new_row['最晚发货期'] = row.get('最晚发货期', '') - new_row['付款方式'] = row.get('付款比例及期限', '') - new_row['发货港'] = row.get('发货地', '') - new_row['目的港'] = row.get('目的港', '') - new_row['折扣率'] = row.get('折扣率', '') - return pd.Series(new_row) + if '币种' in new_row: new_row['币种'] = row.get('货币(选完产品再改)', '') + if '发货港' in new_row: new_row['发货港'] = row.get('发货地', '') + if '目的港' in new_row: new_row['目的港'] = row.get('目的港', '') - # [关键修改] 明细表处理逻辑更新,匹配新表头 - def process_row_detail(self, row, col_factory): - new_row = {col: "" for col in self.columns_detail} + new_row['买方单位'] = parsed_buyer['name'] + if '收货人信息' in new_row: new_row['收货人信息'] = parsed_buyer['name'] + + discount_col = '折扣率' if '折扣率' in new_row else '折扣率(%)' + if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', '')) + + new_row['合同标的'] = best_item['name'] + if '型号/货号' in new_row: new_row['型号/货号'] = best_item['model'] + if '型号' in new_row: new_row['型号'] = best_item['model'] + new_row['数量'] = best_item['qty'] + new_row['单位'] = best_item['unit'] + # 合同额 (单行价格) + if '合同额' in new_row: new_row['合同额'] = self.format_money_str(best_item['price']) + + # 财务总额 (整单) + total_col_name = '总合同额' if '总合同额' in new_row else '合同总额' + new_row[total_col_name] = total_amount + + new_row['收款情况'] = status + new_row['外购'] = self.format_money_str(row.get('外购产品金额', '')) + new_row['已收款'] = received + new_row['未收款'] = unreceived + new_row['收款日期'] = self.format_date_str(row.get('最新收款日期', '')) + + if '最晚发货期' in new_row: new_row['最晚发货期'] = self.format_date_str(row.get('最晚发货期', '')) + if '付款方式' in new_row: new_row['付款方式'] = row.get('付款比例及期限', '') + if '发货日期' in new_row: new_row['发货日期'] = "" # 初始为空 + + if '合同币种/美元' in new_row: + new_row['合同币种/美元'] = row.get('合同币种/美元', '') + + new_row['_sort_price'] = best_item['sort_price'] + + return [new_row] + + # === [核心] 通用总表聚合行生成逻辑 (用于处理多行CSV聚合) === + def generate_general_row_aggregated(self, contract_id, group_df, target_cols, trade_type, is_asd, col_factory): + first_row = group_df.iloc[0] + # 直接复用单行处理逻辑,因为核心差异在标的聚合,我们在这里做聚合解析 + # 实际上 process_row_general_expanded 已经包含了标的解析和 Top 1 选取 + # 但如果是多行CSV记录(例如3行CSV对应同一个合同号),我们需要把所有标的收集起来排序 + + all_items = [] + for _, row in group_df.iterrows(): + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + lines = [line.strip() for line in target_raw.split('\n') if line.strip()] + if lines: + for line in lines: + all_items.append(self.parse_single_line_subject(line)) + + if not all_items: + all_items.append({'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0}) + + all_items.sort(key=lambda x: x['sort_price'], reverse=True) + best_item = all_items[0] + + # 构造一个合成的 row,大部分信息取 first_row,标的信息替换为 best_item + # 为了复用 process_row_general_expanded 的大量字段映射逻辑,我们构造一个 Series + # 但 process_row_general_expanded 内部又会解析一次标的... + # 简便做法:修改 process_row_general_expanded 让它接受 item 参数 + # 或者我们在这里手动构造 + + # 重新利用 process_row_general_expanded 生成骨架,然后修正标的数据 + rows = self.process_row_general_expanded(first_row, trade_type, target_cols, col_factory) + final_row = rows[0] + + # 修正标的字段为全局最优 + final_row['合同标的'] = best_item['name'] + if '型号/货号' in final_row: final_row['型号/货号'] = best_item['model'] + if '型号' in final_row: final_row['型号'] = best_item['model'] + final_row['数量'] = best_item['qty'] + final_row['单位'] = best_item['unit'] + if '合同额' in final_row: final_row['合同额'] = self.format_money_str(best_item['price']) + final_row['_sort_price'] = best_item['sort_price'] + + return final_row + + # === 明细表处理逻辑 === + def process_row_detail(self, row, col_factory, trade_type): + if trade_type == '外贸': + target_cols = self.cols_foreign_detail + else: + target_cols = self.cols_domestic_detail + + new_row = {col: "" for col in target_cols} detail_manuf_val = str(row.get(col_factory, '')) order_no_raw = str(row.get('合同订单编号', '')).strip() @@ -264,87 +406,115 @@ class DataProcessor: new_row['厂家'] = detail_manuf_val new_row['货号'] = row.get('产品编码', '') - # 币种 -> 外币币种 - new_row['外币币种'] = row.get('原币种', '') + if trade_type == '外贸': + new_row['币种'] = row.get('原币种', '') + else: + new_row['外币币种'] = row.get('原币种', '') target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) - parsed_target = self.parse_complex_subject(target_raw) - new_row['合同标的'] = parsed_target['name'] + if '/' in target_raw: + new_row['合同标的'] = target_raw.split('/')[0].strip() + else: + new_row['合同标的'] = target_raw.split('\n')[0].strip() csv_qty = str(row.get('数量', '')).strip() if csv_qty and csv_qty.lower() != 'nan': new_row['数量'] = csv_qty - else: - new_row['数量'] = parsed_target['qty'] - new_row['单位'] = parsed_target['unit'] val_product_subtotal = self.safe_float(row.get('产品小计', 0)) - if '外购' in detail_manuf_val: - new_row['外购'] = val_product_subtotal + new_row['外购'] = self.format_money_str(val_product_subtotal) remark = str(row.get('备注', '')).strip() if not remark or remark.lower() == 'nan': - outsourced_detail = str(row.get('外购产品明细', '')).strip() - if outsourced_detail and outsourced_detail.lower() != 'nan': - new_row['产品描述'] = outsourced_detail - else: - new_row['产品描述'] = "" + outsourced = str(row.get('外购产品明细', '')).strip() + new_row['产品描述'] = outsourced if outsourced.lower() != 'nan' else "" else: new_row['产品描述'] = remark else: new_row['外购'] = "" new_row['产品描述'] = row.get('产品名称', '') - # 美元报价 -> 外币报价单价 - new_row['外币报价单价'] = row.get('美元报价', '') - # 产品小计 -> 报价RMB总价 (假设逻辑) - new_row['报价RMB总价'] = row.get('产品小计', '') + if '外币报价单价' in new_row: new_row['外币报价单价'] = self.format_money_str(row.get('美元报价', '')) + if '报价单价' in new_row: new_row['报价单价'] = self.format_money_str(row.get('美元报价', '')) - new_row['计算汇率'] = row.get('汇率', '') - new_row['折扣率(%)'] = row.get('折扣率', '') - new_row['售价RMB单价'] = row.get('销售单价', '') - new_row['售价RMB总价'] = row.get('销售总价', '') - new_row['外购转美元'] = row.get('外购转美元', '') - new_row['报价总价美元'] = row.get('报价总价美元', '') - new_row['净合同额美元'] = row.get('净合同额美元', '') - new_row['报价RMB单价'] = row.get('报价RMB单价', '') # 如果CSV有这一列,如果没有则为空 + if '报价RMB总价' in new_row: new_row['报价RMB总价'] = self.format_money_str(row.get('产品小计', '')) + if '报价总价' in new_row: new_row['报价总价'] = self.format_money_str(row.get('产品小计', '')) + + if '计算汇率' in new_row: new_row['计算汇率'] = self.format_percent_str(row.get('汇率', '')) + if '合同币种/美元' in new_row: new_row['合同币种/美元'] = self.format_percent_str(row.get('汇率', '')) + + discount_col = '折扣率' if '折扣率' in new_row else '折扣率(%)' + if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', '')) + + if '售价RMB单价' in new_row: new_row['售价RMB单价'] = self.format_money_str(row.get('销售单价', '')) + if '销售单价' in new_row: new_row['销售单价'] = self.format_money_str(row.get('销售单价', '')) + + if '售价RMB总价' in new_row: new_row['售价RMB总价'] = self.format_money_str(row.get('销售总价', '')) + if '销售总价' in new_row: new_row['销售总价'] = self.format_money_str(row.get('销售总价', '')) + + new_row['外购转美元'] = self.format_money_str(row.get('外购转美元', '')) + new_row['报价总价美元'] = self.format_money_str(row.get('报价总价美元', '')) + new_row['净合同额美元'] = self.format_money_str(row.get('净合同额美元', '')) + + if '报价RMB单价' in new_row: new_row['报价RMB单价'] = self.format_money_str(row.get('报价RMB单价', '')) return pd.Series(new_row) - def process_row_om(self, row): - new_row = {col: "" for col in self.columns_om} - order_no_raw = str(row.get('合同订单编号', '')).strip() + # OM表处理 (使用聚合) + def generate_om_row_aggregated(self, contract_id, group_df, target_cols): + first_row = group_df.iloc[0] + + all_items = [] + for _, row in group_df.iterrows(): + target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) + lines = [line.strip() for line in target_raw.split('\n') if line.strip()] + if lines: + for line in lines: + all_items.append(self.parse_single_line_subject(line)) + + if not all_items: all_items.append({'name': '', 'price': '', 'sort_price': 0}) + all_items.sort(key=lambda x: x['sort_price'], reverse=True) + best_item = all_items[0] + + new_row = {col: "" for col in target_cols} + + order_no_raw = str(first_row.get('合同订单编号', '')).strip() parts_no = order_no_raw.split() new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw - if len(parts_no) > 1: new_row['内贸合同号'] = " ".join(parts_no[1:]) + new_row['内贸合同号'] = " ".join(parts_no[1:]) if len(parts_no) > 1 else "" - target_raw = str(row.get('合同标的(品名/型号/数量/单价/总价)', '')) - parsed_target = self.parse_complex_subject(target_raw) - new_row['合同标的'] = parsed_target['name'] - - total_amount = row.get('合同总额', '') - status = str(row.get('收款状态', '')).strip() - new_row['合同总额'] = total_amount - new_row['收款情况'] = status + total_amount = self.format_money_str(first_row.get('合同总额', '')) + status = str(first_row.get('收款状态', '')).strip() + received = "" + unreceived = "" if '已收' in status: - new_row['已收款'] = total_amount - new_row['未收款'] = 0 - else: - new_row['已收款'] = "" - new_row['未收款'] = "" + received = total_amount + unreceived = self.format_money_str(0) - new_row['签署公司'] = row.get('收款账户', '') - new_row['签订日期'] = row.get('签约日期', '') - new_row['销售员'] = row.get('负责人', '') - new_row['最终用户单位'] = row.get('客户名称', '') - new_row['最终用户信息联系人、电话、邮箱'] = row.get('联系人姓名', '') + new_row['签署公司'] = first_row.get('收款账户', '') + new_row['签订日期'] = self.format_date_str(first_row.get('签约日期', '')) + new_row['销售员'] = first_row.get('负责人', '') + new_row['最终用户单位'] = first_row.get('客户名称', '') - buyer_raw = str(row.get('合同买方(名称/联系人/电话/邮箱)', '')) + contact_col = '最终用户信息\n联系人、电话、邮箱' + if contact_col in new_row: new_row[contact_col] = first_row.get('联系人姓名', '') + + buyer_raw = str(first_row.get('合同买方(名称/联系人/电话/邮箱)', '')) parsed_buyer = self.parse_buyer_info(buyer_raw) new_row['买方单位'] = parsed_buyer['name'] - new_row['买方信息联系人、电话、邮箱'] = parsed_buyer['contact_full'] - new_row['收款日期'] = row.get('最新收款日期', '') - return pd.Series(new_row) + buyer_info_col = '买方信息\n联系人、电话、邮箱' + if buyer_info_col in new_row: new_row[buyer_info_col] = parsed_buyer['contact_full'] + + new_row['收款日期'] = self.format_date_str(first_row.get('最新收款日期', '')) + new_row['合同标的'] = best_item['name'] + new_row['_sort_price'] = best_item['sort_price'] + + new_row['合同总额'] = total_amount + new_row['收款情况'] = status + new_row['已收款'] = received + new_row['未收款'] = unreceived + + return new_row def merge_datasets(self, old_dfs, csv_df, is_asd): col_gen = '厂家' @@ -361,31 +531,28 @@ class DataProcessor: result_dfs = {} - def merge_logic(old_df, new_rows_df, unique_col, target_columns): + def merge_logic_expanded(old_df, new_rows_list, unique_col, target_columns): if old_df is None or old_df.empty: - if new_rows_df.empty: return pd.DataFrame(columns=target_columns + ['_status']) - combined = new_rows_df.copy() + if not new_rows_list: return pd.DataFrame(columns=target_columns + ['_status']) + combined = pd.DataFrame(new_rows_list) combined['_status'] = 'new' return combined combined = old_df.copy() - - # 确保旧数据列名存在 for col in target_columns: - if col not in combined.columns: - combined[col] = "" + if col not in combined.columns: combined[col] = "" + if '_sort_price' not in combined.columns: combined['_sort_price'] = 0.0 if unique_col in combined.columns: combined[unique_col] = combined[unique_col].astype(str) + if '_status' not in combined.columns: combined['_status'] = '' + + if not new_rows_list: return combined + + new_rows_df = pd.DataFrame(new_rows_list) if unique_col in new_rows_df.columns: new_rows_df[unique_col] = new_rows_df[unique_col].astype(str) - if '_status' not in combined.columns: - combined['_status'] = '' - - if new_rows_df.empty: - return combined - new_contract_ids = new_rows_df[unique_col].unique() rows_to_append = [] @@ -394,24 +561,25 @@ class DataProcessor: old_indices = combined[combined[unique_col] == cid].index if len(old_indices) > 0: - idx = old_indices[0] + first_old_idx = old_indices[0] + new_first_row = new_subset.iloc[0] + has_changed = False - new_row_series = new_subset.iloc[0] - for col in target_columns: - if col in new_row_series: - new_val = new_row_series[col] - old_val = combined.at[idx, col] + if col in new_first_row: + new_val = new_first_row[col] + old_val = combined.at[first_old_idx, col] - # 保护逻辑:新值非空才覆盖 if str(new_val).strip() != "": if self.normalize_for_compare(old_val) != self.normalize_for_compare(new_val): - combined.at[idx, col] = new_val + combined.at[first_old_idx, col] = new_val has_changed = True - if has_changed: - combined.at[idx, '_status'] = 'modified' + if '_sort_price' in new_first_row: + combined.at[first_old_idx, '_sort_price'] = new_first_row['_sort_price'] + if has_changed: + combined.at[first_old_idx, '_status'] = 'modified' else: new_subset_copy = new_subset.copy() new_subset_copy['_status'] = 'new' @@ -422,44 +590,57 @@ class DataProcessor: return combined - # --- 合并执行 --- + # --- 1. 外贸总表 (聚合) --- + new_gen_rows = [] + target_cols_foreign = self.cols_asd_foreign_general if is_asd else self.cols_nonasd_foreign_general if not csv_foreign.empty: - new_gen = csv_foreign.apply(lambda r: self.process_row_general(r, '外贸', col_gen), axis=1) - new_gen = new_gen.drop_duplicates(subset=['合同编号'], keep='first') - else: - new_gen = pd.DataFrame(columns=self.columns_general) - old_gen = old_dfs.get('外贸', old_dfs.get('外贸总表', pd.DataFrame(columns=self.columns_general))) - result_dfs['外贸'] = merge_logic(old_gen, new_gen, '合同编号', self.columns_general) + grouped = csv_foreign.groupby('合同订单编号') + for contract_id, group in grouped: + row_data = self.generate_general_row_aggregated(contract_id, group, target_cols_foreign, '外贸', is_asd, + col_gen) + new_gen_rows.append(row_data) + old_gen = old_dfs.get('外贸', old_dfs.get('外贸总表', pd.DataFrame(columns=target_cols_foreign))) + result_dfs['外贸'] = merge_logic_expanded(old_gen, new_gen_rows, '合同编号', target_cols_foreign) + + # --- 2. 外贸明细 --- if not csv_foreign.empty: - new_det = csv_foreign.apply(lambda r: self.process_row_detail(r, col_det), axis=1) + new_det = csv_foreign.apply(lambda r: self.process_row_detail(r, col_det, '外贸'), axis=1) else: - new_det = pd.DataFrame(columns=self.columns_detail) - old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.columns_detail)) - result_dfs['外贸明细'] = merge_logic(old_det, new_det, '合同编号', self.columns_detail) + new_det = pd.DataFrame(columns=self.cols_foreign_detail) + old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.cols_foreign_detail)) + result_dfs['外贸明细'] = merge_logic_expanded(old_det, new_det.to_dict('records'), '合同编号', + self.cols_foreign_detail) + # --- 3. 内贸总表 (聚合) --- + new_dom_rows = [] if not csv_domestic.empty: - new_dom_gen = csv_domestic.apply(lambda r: self.process_row_general(r, '内贸', col_gen), axis=1) - new_dom_gen = new_dom_gen.drop_duplicates(subset=['合同编号'], keep='first') - else: - new_dom_gen = pd.DataFrame(columns=self.columns_domestic_general) - old_dom_gen = old_dfs.get('内贸', old_dfs.get('内贸总表', pd.DataFrame(columns=self.columns_domestic_general))) - result_dfs['内贸'] = merge_logic(old_dom_gen, new_dom_gen, '合同编号', self.columns_domestic_general) + grouped = csv_domestic.groupby('合同订单编号') + for contract_id, group in grouped: + row_data = self.generate_general_row_aggregated(contract_id, group, self.cols_domestic_general, '内贸', + is_asd, col_gen) + new_dom_rows.append(row_data) + old_dom_gen = old_dfs.get('内贸', old_dfs.get('内贸总表', pd.DataFrame(columns=self.cols_domestic_general))) + result_dfs['内贸'] = merge_logic_expanded(old_dom_gen, new_dom_rows, '合同编号', self.cols_domestic_general) + # --- 4. 内贸明细 --- if not csv_domestic.empty: - new_dom_det = csv_domestic.apply(lambda r: self.process_row_detail(r, col_det), axis=1) + new_dom_det = csv_domestic.apply(lambda r: self.process_row_detail(r, col_det, '内贸'), axis=1) else: - new_dom_det = pd.DataFrame(columns=self.columns_detail) - old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.columns_detail)) - result_dfs['内贸明细'] = merge_logic(old_dom_det, new_dom_det, '合同编号', self.columns_detail) + new_dom_det = pd.DataFrame(columns=self.cols_domestic_detail) + old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.cols_domestic_detail)) + result_dfs['内贸明细'] = merge_logic_expanded(old_dom_det, new_dom_det.to_dict('records'), '合同编号', + self.cols_domestic_detail) + # --- 5. OM (聚合) --- + new_om_rows = [] if not csv_om.empty: - new_om = csv_om.apply(lambda r: self.process_row_om(r), axis=1) - new_om = new_om.drop_duplicates(subset=['合同编号'], keep='first') - else: - new_om = pd.DataFrame(columns=self.columns_om) - old_om = old_dfs.get('OM合同', old_dfs.get('其他', pd.DataFrame(columns=self.columns_om))) - result_dfs['OM合同'] = merge_logic(old_om, new_om, '合同编号', self.columns_om) + grouped = csv_om.groupby('合同订单编号') + for contract_id, group in grouped: + row_data = self.generate_om_row_aggregated(contract_id, group, self.cols_om) + new_om_rows.append(row_data) + old_om = old_dfs.get('OM合同', old_dfs.get('其他', pd.DataFrame(columns=self.cols_om))) + result_dfs['OM合同'] = merge_logic_expanded(old_om, new_om_rows, '合同编号', self.cols_om) return result_dfs @@ -472,6 +653,9 @@ class DataProcessor: for col in self.percent_cols: if col in df.columns: df[col] = df[col].apply(self.format_percent_str) + for col in self.date_cols: + if col in df.columns: + df[col] = df[col].apply(self.format_date_str) return data_dict @@ -482,7 +666,7 @@ class DataProcessor: class ContractApp: def __init__(self, root): self.root = root - self.root.title("合同数据处理系统 V3.2 (表头修正版)") + self.root.title("合同数据处理系统 V3.8 (换行符修复版)") self.root.geometry("1300x850") self.style = ttk.Style() @@ -573,15 +757,25 @@ class ContractApp: dfs = pd.read_excel(path, sheet_name=None) clean_dfs = {} for k, v in dfs.items(): - v.columns = v.columns.astype(str).str.replace(r'\s+', '', regex=True) + # [关键修复] 智能表头匹配:重命名表头为标准格式 + new_columns = [] + for col in v.columns: + clean_col = self.processor.clean_header_key(str(col)) + # 尝试在标准映射里找 + if clean_col in self.processor.standard_col_map: + new_columns.append(self.processor.standard_col_map[clean_col]) + # 尝试在旧映射里找 + elif col in self.processor.legacy_map: + new_columns.append(self.processor.legacy_map[col]) + else: + new_columns.append(col) # 找不到就保留原样 + v.columns = new_columns - # 总表仍可能需要 legacy_map,但明细表不需要了因为我们已经在代码里统一了列名 - v.rename(columns=self.processor.legacy_map, inplace=True) + # 去重 v = v.loc[:, ~v.columns.duplicated()] if '合同编号' in v.columns: v['合同编号'] = v['合同编号'].astype(str) - clean_dfs[k.strip()] = v return clean_dfs except Exception as e: @@ -632,20 +826,31 @@ class ContractApp: if not df.empty: if '合同编号' in df.columns: df['合同编号'] = df['合同编号'].astype(str) - df = df.sort_values(by='合同编号', ascending=True) + + sort_cols = ['合同编号'] + asc_order = [True] + if '_sort_price' in df.columns: + sort_cols.append('_sort_price') + asc_order.append(False) + df = df.sort_values(by=sort_cols, ascending=asc_order) + if '明细' in sheet_name: mask = df.duplicated(subset=['合同编号'], keep='first') df.loc[mask, '合同标的'] = "" standard_cols = [] + is_asd = (file_type == 'ASD') + if sheet_name == '外贸': - standard_cols = self.processor.columns_general + standard_cols = self.processor.cols_asd_foreign_general if is_asd else self.processor.cols_nonasd_foreign_general elif sheet_name == '内贸': - standard_cols = self.processor.columns_domestic_general + standard_cols = self.processor.cols_domestic_general elif sheet_name == 'OM合同': - standard_cols = self.processor.columns_om - elif '明细' in sheet_name: - standard_cols = self.processor.columns_detail + standard_cols = self.processor.cols_om + elif sheet_name == '外贸明细': + standard_cols = self.processor.cols_foreign_detail + elif sheet_name == '内贸明细': + standard_cols = self.processor.cols_domestic_detail self.create_treeview(inner_notebook, df, sheet_name, standard_cols) @@ -656,6 +861,7 @@ class ContractApp: scroll_y = ttk.Scrollbar(frame, orient="vertical") scroll_x = ttk.Scrollbar(frame, orient="horizontal") + # 仅显示标准列 display_cols = target_cols tree = ttk.Treeview(frame, columns=display_cols, show='headings', @@ -668,7 +874,9 @@ class ContractApp: tree.pack(fill="both", expand=True) for col in display_cols: - tree.heading(col, text=col) + # 清洗显示名称(换行变空格,防止表头太高) + clean_header = col.replace('\n', ' ') + tree.heading(col, text=clean_header) tree.column(col, width=120, anchor="center") tree.tag_configure('new', background='#FFFFCC') @@ -728,14 +936,21 @@ class ContractApp: for sheet_name in valid_sheets: if sheet_name in sheets: df = sheets[sheet_name] - save_df = df.drop(columns=['_status'], errors='ignore') + + if '合同编号' in df.columns: + sort_cols = ['合同编号'] + asc_order = [True] + if '_sort_price' in df.columns: + sort_cols.append('_sort_price') + asc_order.append(False) + df = df.sort_values(by=sort_cols, ascending=asc_order) + + save_df = df.drop(columns=['_status', '_sort_price'], errors='ignore') + if not save_df.empty: - if '合同编号' in save_df.columns: - save_df['合同编号'] = save_df['合同编号'].astype(str) - save_df = save_df.sort_values(by='合同编号', ascending=True) - if '明细' in sheet_name: - mask = save_df.duplicated(subset=['合同编号'], keep='first') - save_df.loc[mask, '合同标的'] = "" + if '明细' in sheet_name: + mask = save_df.duplicated(subset=['合同编号'], keep='first') + save_df.loc[mask, '合同标的'] = "" save_df.to_excel(writer, sheet_name=sheet_name, index=False) messagebox.showinfo("成功", f"文件保存成功!\n位置: {base_dir or '当前目录'}") except PermissionError: