基本满足双边合并功能

2026-01-21 15:24:12 +08:00
parent 77b12f4d58
commit edb1dc9cfe
2 changed files with 1521 additions and 252 deletions
--- a/提速版.py
+++ b/提速版.py
--- a/页面.py
+++ b/页面.py
@ -4,6 +4,7 @@ from tkinter import ttk, filedialog, messagebox, simpledialog
 import os
 import numpy as np
 import re
 from datetime import datetime
 # ==========================================
@ -12,58 +13,113 @@ import re
 class DataProcessor:
    def __init__(self):
-        # 1. 总表表头 (保持不变，严格去空格)
+        # ==================== 1. 外贸总表表头 ====================
-        self.columns_general = [
+        self.cols_asd_foreign_general = [
-            "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期",
+            "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员",
-            "销售员", "最终用户单位", "最终用户信息联系人、电话、邮箱", "最终用户所在地",
+            "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地",
            "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率",
-            "合同", "总合同额", "外购", "已收款", "未收款", "收款日期",
+            "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期",
            "最晚发货期", "付款方式", "发货港", "目的港", "发货日期",
-            "买方单位", "买方信息联系人、电话、邮箱", "收货人信息"
+            "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息",
            "转为美元净合同额", "转为美元总合同额"
        ]
-        # 内贸总表表头
+        self.cols_nonasd_foreign_general = [
-        self.columns_domestic_general = [c if c != "外贸合同号" else "内贸合同号" for c in self.columns_general]
+            "合同编号", "签署公司", "外贸合同号", "收款情况", "合同签订日期", "销售员",
-
+            "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地",
-        # 2. [关键修改] 明细表表头 (完全按照你的截图顺序和名称定义)
+            "厂家", "型号/货号", "合同标的", "数量", "单位", "币种", "折扣率",
-        self.columns_detail = [
+            "合同额", "总合同额", "外购", "已收款", "未收款", "收款日期",
-            "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述",
+            "最晚发货期", "付款方式", "发货港", "目的港", "发货日期",
-            "净合同额美元", "外购", "计算汇率", "外购转美元", "报价总价美元",
+            "买方单位", "买方信息\n联系人、电话、邮箱", "收货人信息",
-            "数量", "单位", "外币币种", "外币报价单价",
+            "合同币种/美元", "转为美元净合同额", "转为美元总合同额"
            "报价RMB单价", "报价RMB总价", "售价RMB单价", "售价RMB总价", "折扣率（％）"
        ]
-        # OM合同表头 (保持不变)
+        # ==================== 2. 内贸总表表头 ====================
-        self.columns_om = [
+        self.cols_domestic_general = [
-            "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期",
+            "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员",
-            "销售员", "最终用户单位", "最终用户信息联系人、电话、邮箱", "最终用户所在地",
+            "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地",
-            "买方单位", "买方信息联系人、电话、邮箱", "合同标的", "合同总额",
+            "买方单位", "买方信息\n联系人、电话、邮箱",
-            "已收款", "未收款", "收款日期"
+            "厂家", "型号", "合同标的", "数量", "单位", "折扣率（％）",
            "合同额", "合同总额", "外购", "付款方式", "最晚发货期",
            "已收款", "未收款", "收款日期",
            "转为美元净合同额", "转为美元总合同额"
        ]
-        # [修改] 定义需要保留两位小数的金额列 (根据新表头更新)
+        # ==================== 3. 外贸明细表头 ====================
-        self.money_cols = set([
+        self.cols_foreign_detail = [
-            "合同", "总合同额", "外购", "已收款", "未收款",
+            "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位",
-            "净合同额美元", "外购转美元", "报价总价美元",
+            "币种", "报价单价", "报价总价", "销售单价", "销售总价", "折扣率",
-            "外币报价单价", "报价RMB单价", "报价RMB总价",
+            "外购", "合同币种/美元", "外购转美元", "报价总价美元", "净合同额美元"
-            "售价RMB单价", "售价RMB总价", "外购产品金额"
+        ]
        # ==================== 4. 内贸明细表头 ====================
        self.cols_domestic_detail = [
            "合同编号", "销售员", "合同标的", "厂家", "货号", "产品描述", "数量", "单位",
            "外币币种", "外币报价单价", "报价RMB单价", "报价RMB总价",
            "售价RMB单价", "售价RMB总价", "折扣率（％）", "外购",
            "计算汇率", "外购转美元", "报价总价美元", "净合同额美元"
        ]
        # ==================== 5. OM合同表头 ====================
        self.cols_om = [
            "合同编号", "签署公司", "内贸合同号", "收款情况", "签订日期", "销售员",
            "最终用户单位", "最终用户信息\n联系人、电话、邮箱", "最终用户所在地",
            "买方单位", "买方信息\n联系人、电话、邮箱", "合同标的",
            "合同总额", "已收款", "未收款", "收款日期"
        ]
        # [逻辑] 只写在“第一行”（单价最高行）的列
        self.header_only_cols = set([
            "总合同额", "合同总额", "外购", "付款方式", "最晚发货期",
            "已收款", "未收款", "收款日期", "收款情况",
            "转为美元净合同额", "转为美元总合同额"
        ])
-        # [修改] 定义需要百分比展示的列 (根据新表头更新)
+        # [逻辑] 金额列 (保留两位小数)
        self.money_cols = set([
            "合同额", "总合同额", "合同总额", "外购", "已收款", "未收款",
            "净合同额美元", "外购转美元", "报价总价美元",
            "外币报价单价", "报价RMB单价", "报价RMB总价",
            "售价RMB单价", "售价RMB总价", "外购产品金额",
            "转为美元净合同额", "转为美元总合同额", "报价单价", "报价总价", "销售单价", "销售总价"
        ])
        # [逻辑] 比率列 (百分比展示)
        self.percent_cols = set([
            "折扣率", "折扣率（％）", "计算汇率", "合同币种/美元"
        ])
-        # 旧表头映射字典 (现在代码标准已更新为Excel标准，这个字典主要用于兼容总表的旧名称)
+        # [新增逻辑] 日期列 (需要去除时分秒)
-        # 注意：明细表现在不需要映射了，因为 self.columns_detail 已经和 Excel 一样了
+        self.date_cols = set([
            "合同签订日期", "签订日期", "收款日期", "最晚发货期", "发货日期"
        ])
        # [逻辑] 旧表头映射 (用于读取旧Excel时兼容)
        self.legacy_map = {
-            "外币币种": "币种",  # 仅用于总表可能的兼容
+            "外币币种": "币种",
            "汇率": "计算汇率",
-            # 如果旧Excel里的总表还在用"折扣率(%)"，映射回总表的"折扣率"
+            "折扣率(%)": "折扣率",
-            "折扣率(%)": "折扣率"
+            "折扣率（%）": "折扣率（％）",
            "合同": "合同额"
        }
-        self.source_cols_processed = []
+        # [核心] 构建所有标准列名的快速查找字典 (清洗后的key -> 标准带换行的key)
        # 目的：无论Excel里是 "最终用户信息联系人..." 还是 "最终用户信息\n联系人...", 都能映射回标准
        self.standard_col_map = {}
        all_lists = [
            self.cols_asd_foreign_general, self.cols_nonasd_foreign_general,
            self.cols_domestic_general, self.cols_foreign_detail,
            self.cols_domestic_detail, self.cols_om
        ]
        for lst in all_lists:
            for col in lst:
                clean_key = self.clean_header_key(col)
                self.standard_col_map[clean_key] = col
    def clean_header_key(self, text):
        """清洗表头：去除换行、空格、制表符，只保留纯文本"""
        if not isinstance(text, str): return str(text)
        return re.sub(r'[\s\n\r]+', '', text)
    def safe_float(self, val):
        try:
@ -93,6 +149,20 @@ class DataProcessor:
        except:
            return str(val)
    def format_date_str(self, val):
        """格式化日期：去除时分秒，统一为 YYYY-MM-DD"""
        if pd.isna(val) or str(val).strip() == "": return ""
        try:
            # 如果已经是短日期字符串，直接返回
            s_val = str(val).strip()
            # 尝试解析
            dt = pd.to_datetime(val, errors='coerce')
            if pd.isnull(dt):
                return s_val  # 解析失败返回原样
            return dt.strftime('%Y-%m-%d')
        except:
            return str(val)
    def normalize_for_compare(self, val):
        if pd.isna(val) or val is None: return ""
        s_val = str(val).strip()
@ -127,64 +197,6 @@ class DataProcessor:
        return df, (col_factory_general, col_factory_detail)
    def parse_complex_subject(self, text):
        res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': ''}
        if not isinstance(text, str) or not text.strip(): return res
        text = text.strip()
        name_patterns = [r'(?:中文品名|中文名称|名称|Name)[:：]\s*(.*?)(?:\n|$)', r'(?:英文名称)[:：]\s*(.*?)(?:\n|$)']
        for p in name_patterns:
            m = re.search(p, text, re.IGNORECASE)
            if m and not res['name']: res['name'] = m.group(1).strip()
        model_patterns = [r'(?:型号|Model)[:：]\s*(.*?)(?:\n|$)']
        for p in model_patterns:
            m = re.search(p, text, re.IGNORECASE)
            if m: res['model'] = m.group(1).strip()
        brand_match = re.search(r'(?:品牌|Brand)[:：]\s*(.*?)(?:\n|$)', text, re.IGNORECASE)
        if brand_match:
            brand_str = brand_match.group(1).strip()
            if res['model']:
                res['model'] = f"{brand_str} {res['model']}"
            else:
                res['model'] = brand_str
        clean_text = text
        for k in ['中文品名', '中文名称', '英文名称', '名称', '型号', 'Model', '品牌', 'Brand']:
            clean_text = re.sub(f'{k}.*?(?:\n|$)', '', clean_text, flags=re.IGNORECASE)
        if not res['name'] and '/' in text:
            parts = text.split('/')
            if len(parts) > 0: res['name'] = parts[0].strip()
        if not res['qty']:
            qty_slash = re.search(r'/(\d+(\.\d+)?)/', text)
            if qty_slash:
                res['qty'] = qty_slash.group(1)
            else:
                qty_unit_match = re.search(r'(\d+)\s*([台个套件支箱组setpc]+)|([setpc]+)\s*(\d+)', text, re.IGNORECASE)
                if qty_unit_match:
                    if qty_unit_match.group(1):
                        res['qty'] = qty_unit_match.group(1)
                        res['unit'] = qty_unit_match.group(2)
                    else:
                        res['qty'] = qty_unit_match.group(4)
                        res['unit'] = qty_unit_match.group(3)
        nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', '').replace('，', ''))
        if nums:
            candidate = nums[-1]
            if candidate != res['qty']: res['price'] = candidate
        if not res['name'] and not res['model'] and '/' in text:
            parts = text.split('/')
            if len(parts) >= 1: res['name'] = parts[0]
            if len(parts) >= 2: res['model'] = parts[1]
            if len(parts) >= 3: res['qty'] = parts[2]
            if len(parts) >= 4: res['price'] = parts[3]
        return res
    def parse_buyer_info(self, text):
        info = {'name': '', 'contact_full': ''}
        if not isinstance(text, str) or not text.strip(): return info
@ -194,25 +206,63 @@ class DataProcessor:
        info['contact_full'] = " ".join(lines[1:])
        return info
-    def process_row_general(self, row, trade_type, col_factory):
+    def parse_single_line_subject(self, text):
-        target_cols = self.columns_general if trade_type == '外贸' else self.columns_domestic_general
+        res = {'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0.0}
-        new_row = {col: "" for col in target_cols}
+        if not isinstance(text, str) or not text.strip(): return res
        text = text.strip()
        if '/' in text:
            parts = [p.strip() for p in text.split('/')]
            if len(parts) >= 1: res['name'] = parts[0]
            if len(parts) >= 2: res['model'] = parts[1]
            if len(parts) >= 3:
                m_qty = re.match(r'^(\d+(\.\d+)?)\s*([\u4e00-\u9fa5a-zA-Z]+)?$', parts[2])
                if m_qty:
                    res['qty'] = m_qty.group(1)
                    res['unit'] = m_qty.group(3) if m_qty.group(3) else ""
                else:
                    res['qty'] = parts[2]
            if len(parts) >= 4:
                res['price'] = parts[3]
                res['sort_price'] = self.safe_float(parts[3])
            return res
        name_match = re.search(r'(?:中文品名|中文名称|名称|Name)[:：]\s*(.*?)(?:\n|$)', text, re.IGNORECASE)
        if name_match:
            res['name'] = name_match.group(1).strip()
        else:
            res['name'] = text.split('\n')[0]
        nums = re.findall(r'\d+(?:\.\d+)?', text.replace(',', ''))
        if nums:
            res['sort_price'] = self.safe_float(nums[-1])
            res['price'] = nums[-1]
        return res
    # === [核心] 总表处理逻辑 ===
    def process_row_general_expanded(self, row, trade_type, trade_cols, col_factory):
        # 使用传入的 trade_cols (已是根据ASD/NonASD选择好的标准表头)
        target_cols = trade_cols
        base_data = {}
        order_no_raw = str(row.get('合同订单编号', '')).strip()
        parts_no = order_no_raw.split()
-        new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw
+        base_data['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw
        contract_no_col = '外贸合同号' if trade_type == '外贸' else '内贸合同号'
-        new_row[contract_no_col] = " ".join(parts_no[1:]) if len(parts_no) > 1 else ""
+        base_data[contract_no_col] = " ".join(parts_no[1:]) if len(parts_no) > 1 else ""
-        target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
+        # 财务数据
-        parsed_target = self.parse_complex_subject(target_raw)
+        total_amount = self.format_money_str(row.get('合同总额', ''))
-
+        status = str(row.get('收款状态', '')).strip()
-        new_row['合同标的'] = parsed_target['name']
+        received = ""
-        new_row['型号/货号'] = parsed_target['model']
+        unreceived = ""
-        new_row['数量'] = parsed_target['qty']
+        if '已收' in status:
-        new_row['单位'] = parsed_target['unit']
+            received = total_amount
-        new_row['合同'] = parsed_target['price']
+            unreceived = self.format_money_str(0)
        # 买方信息
        if trade_type == '内贸':
            buyer_raw = str(row.get('合同买方（名称/联系人/电话/邮箱）', ''))
        else:
@ -220,41 +270,133 @@ class DataProcessor:
            if buyer_raw == '' or buyer_raw == 'nan':
                buyer_raw = str(row.get('合同买方（名称/联系人/电话/邮箱）', ''))
        parsed_buyer = self.parse_buyer_info(buyer_raw)
        new_row['买方单位'] = parsed_buyer['name']
        new_row['买方信息联系人、电话、邮箱'] = parsed_buyer['contact_full']
        new_row['收货人信息'] = parsed_buyer['name']
-        total_amount = row.get('合同总额', '')
+        # 解析标的
-        status = str(row.get('收款状态', '')).strip()
+        target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
-        new_row['总合同额'] = total_amount
+        lines = [line.strip() for line in target_raw.split('\n') if line.strip()]
-        new_row['收款情况'] = status
+
-        if '已收' in status:
+        parsed_items = []
-            new_row['已收款'] = total_amount
+        if not lines:
-            new_row['未收款'] = 0
+            parsed_items.append({'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0})
        else:
-            new_row['已收款'] = ""
+            for line in lines:
-            new_row['未收款'] = ""
+                parsed_items.append(self.parse_single_line_subject(line))
        # 排序并只取第一行
        parsed_items.sort(key=lambda x: x['sort_price'], reverse=True)
        best_item = parsed_items[0]
        new_row = {col: "" for col in target_cols}
        new_row['合同编号'] = base_data['合同编号']
        new_row[contract_no_col] = base_data[contract_no_col]
        new_row['签署公司'] = row.get('收款账户', '')
-        new_row['合同签订日期'] = row.get('签约日期', '')
+
        # 日期 (使用新格式化函数)
        date_raw = row.get('签约日期', '')
        if '合同签订日期' in new_row: new_row['合同签订日期'] = self.format_date_str(date_raw)
        if '签订日期' in new_row: new_row['签订日期'] = self.format_date_str(date_raw)
        new_row['销售员'] = row.get('负责人', '')
        new_row['最终用户单位'] = row.get('客户名称', '')
-        new_row['最终用户信息联系人、电话、邮箱'] = row.get('联系人姓名', '')
+
        # 处理带换行符的列名映射
        # 通过遍历 target_cols 找到匹配的列
        for col in target_cols:
            if "最终用户信息" in col: new_row[col] = row.get('联系人姓名', '')
            if "买方信息" in col: new_row[col] = parsed_buyer['contact_full']
        new_row['厂家'] = row.get(col_factory, '')
        new_row['币种'] = row.get('货币(选完产品再改)', '')
        new_row['外购'] = row.get('外购产品金额', '')
        new_row['收款日期'] = row.get('最新收款日期', '')
        new_row['最晚发货期'] = row.get('最晚发货期', '')
        new_row['付款方式'] = row.get('付款比例及期限', '')
        new_row['发货港'] = row.get('发货地', '')
        new_row['目的港'] = row.get('目的港', '')
        new_row['折扣率'] = row.get('折扣率', '')
-        return pd.Series(new_row)
+        if '币种' in new_row: new_row['币种'] = row.get('货币(选完产品再改)', '')
        if '发货港' in new_row: new_row['发货港'] = row.get('发货地', '')
        if '目的港' in new_row: new_row['目的港'] = row.get('目的港', '')
-    # [关键修改] 明细表处理逻辑更新，匹配新表头
+        new_row['买方单位'] = parsed_buyer['name']
-    def process_row_detail(self, row, col_factory):
+        if '收货人信息' in new_row: new_row['收货人信息'] = parsed_buyer['name']
-        new_row = {col: "" for col in self.columns_detail}
+
        discount_col = '折扣率' if '折扣率' in new_row else '折扣率（％）'
        if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', ''))
        new_row['合同标的'] = best_item['name']
        if '型号/货号' in new_row: new_row['型号/货号'] = best_item['model']
        if '型号' in new_row: new_row['型号'] = best_item['model']
        new_row['数量'] = best_item['qty']
        new_row['单位'] = best_item['unit']
        # 合同额 (单行价格)
        if '合同额' in new_row: new_row['合同额'] = self.format_money_str(best_item['price'])
        # 财务总额 (整单)
        total_col_name = '总合同额' if '总合同额' in new_row else '合同总额'
        new_row[total_col_name] = total_amount
        new_row['收款情况'] = status
        new_row['外购'] = self.format_money_str(row.get('外购产品金额', ''))
        new_row['已收款'] = received
        new_row['未收款'] = unreceived
        new_row['收款日期'] = self.format_date_str(row.get('最新收款日期', ''))
        if '最晚发货期' in new_row: new_row['最晚发货期'] = self.format_date_str(row.get('最晚发货期', ''))
        if '付款方式' in new_row: new_row['付款方式'] = row.get('付款比例及期限', '')
        if '发货日期' in new_row: new_row['发货日期'] = ""  # 初始为空
        if '合同币种/美元' in new_row:
            new_row['合同币种/美元'] = row.get('合同币种/美元', '')
        new_row['_sort_price'] = best_item['sort_price']
        return [new_row]
    # === [核心] 通用总表聚合行生成逻辑 (用于处理多行CSV聚合) ===
    def generate_general_row_aggregated(self, contract_id, group_df, target_cols, trade_type, is_asd, col_factory):
        first_row = group_df.iloc[0]
        # 直接复用单行处理逻辑，因为核心差异在标的聚合，我们在这里做聚合解析
        # 实际上 process_row_general_expanded 已经包含了标的解析和 Top 1 选取
        # 但如果是多行CSV记录（例如3行CSV对应同一个合同号），我们需要把所有标的收集起来排序
        all_items = []
        for _, row in group_df.iterrows():
            target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
            lines = [line.strip() for line in target_raw.split('\n') if line.strip()]
            if lines:
                for line in lines:
                    all_items.append(self.parse_single_line_subject(line))
        if not all_items:
            all_items.append({'name': '', 'model': '', 'qty': '', 'unit': '', 'price': '', 'sort_price': 0})
        all_items.sort(key=lambda x: x['sort_price'], reverse=True)
        best_item = all_items[0]
        # 构造一个合成的 row，大部分信息取 first_row，标的信息替换为 best_item
        # 为了复用 process_row_general_expanded 的大量字段映射逻辑，我们构造一个 Series
        # 但 process_row_general_expanded 内部又会解析一次标的...
        # 简便做法：修改 process_row_general_expanded 让它接受 item 参数
        # 或者我们在这里手动构造
        # 重新利用 process_row_general_expanded 生成骨架，然后修正标的数据
        rows = self.process_row_general_expanded(first_row, trade_type, target_cols, col_factory)
        final_row = rows[0]
        # 修正标的字段为全局最优
        final_row['合同标的'] = best_item['name']
        if '型号/货号' in final_row: final_row['型号/货号'] = best_item['model']
        if '型号' in final_row: final_row['型号'] = best_item['model']
        final_row['数量'] = best_item['qty']
        final_row['单位'] = best_item['unit']
        if '合同额' in final_row: final_row['合同额'] = self.format_money_str(best_item['price'])
        final_row['_sort_price'] = best_item['sort_price']
        return final_row
    # === 明细表处理逻辑 ===
    def process_row_detail(self, row, col_factory, trade_type):
        if trade_type == '外贸':
            target_cols = self.cols_foreign_detail
        else:
            target_cols = self.cols_domestic_detail
        new_row = {col: "" for col in target_cols}
        detail_manuf_val = str(row.get(col_factory, ''))
        order_no_raw = str(row.get('合同订单编号', '')).strip()
@ -264,87 +406,115 @@ class DataProcessor:
        new_row['厂家'] = detail_manuf_val
        new_row['货号'] = row.get('产品编码', '')
-        # 币种 -> 外币币种
+        if trade_type == '外贸':
            new_row['币种'] = row.get('原币种', '')
        else:
            new_row['外币币种'] = row.get('原币种', '')
        target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
-        parsed_target = self.parse_complex_subject(target_raw)
+        if '/' in target_raw:
-        new_row['合同标的'] = parsed_target['name']
+            new_row['合同标的'] = target_raw.split('/')[0].strip()
        else:
            new_row['合同标的'] = target_raw.split('\n')[0].strip()
        csv_qty = str(row.get('数量', '')).strip()
        if csv_qty and csv_qty.lower() != 'nan':
            new_row['数量'] = csv_qty
        else:
            new_row['数量'] = parsed_target['qty']
            new_row['单位'] = parsed_target['unit']
        val_product_subtotal = self.safe_float(row.get('产品小计', 0))
        if '外购' in detail_manuf_val:
-            new_row['外购'] = val_product_subtotal
+            new_row['外购'] = self.format_money_str(val_product_subtotal)
            remark = str(row.get('备注', '')).strip()
            if not remark or remark.lower() == 'nan':
-                outsourced_detail = str(row.get('外购产品明细', '')).strip()
+                outsourced = str(row.get('外购产品明细', '')).strip()
-                if outsourced_detail and outsourced_detail.lower() != 'nan':
+                new_row['产品描述'] = outsourced if outsourced.lower() != 'nan' else ""
                    new_row['产品描述'] = outsourced_detail
                else:
                    new_row['产品描述'] = ""
            else:
                new_row['产品描述'] = remark
        else:
            new_row['外购'] = ""
            new_row['产品描述'] = row.get('产品名称', '')
-            # 美元报价 -> 外币报价单价
+            if '外币报价单价' in new_row: new_row['外币报价单价'] = self.format_money_str(row.get('美元报价', ''))
-            new_row['外币报价单价'] = row.get('美元报价', '')
+            if '报价单价' in new_row: new_row['报价单价'] = self.format_money_str(row.get('美元报价', ''))
            # 产品小计 -> 报价RMB总价 (假设逻辑)
            new_row['报价RMB总价'] = row.get('产品小计', '')
-        new_row['计算汇率'] = row.get('汇率', '')
+            if '报价RMB总价' in new_row: new_row['报价RMB总价'] = self.format_money_str(row.get('产品小计', ''))
-        new_row['折扣率（％）'] = row.get('折扣率', '')
+            if '报价总价' in new_row: new_row['报价总价'] = self.format_money_str(row.get('产品小计', ''))
-        new_row['售价RMB单价'] = row.get('销售单价', '')
+
-        new_row['售价RMB总价'] = row.get('销售总价', '')
+        if '计算汇率' in new_row: new_row['计算汇率'] = self.format_percent_str(row.get('汇率', ''))
-        new_row['外购转美元'] = row.get('外购转美元', '')
+        if '合同币种/美元' in new_row: new_row['合同币种/美元'] = self.format_percent_str(row.get('汇率', ''))
-        new_row['报价总价美元'] = row.get('报价总价美元', '')
+
-        new_row['净合同额美元'] = row.get('净合同额美元', '')
+        discount_col = '折扣率' if '折扣率' in new_row else '折扣率（％）'
-        new_row['报价RMB单价'] = row.get('报价RMB单价', '')  # 如果CSV有这一列，如果没有则为空
+        if discount_col in new_row: new_row[discount_col] = self.format_percent_str(row.get('折扣率', ''))
        if '售价RMB单价' in new_row: new_row['售价RMB单价'] = self.format_money_str(row.get('销售单价', ''))
        if '销售单价' in new_row: new_row['销售单价'] = self.format_money_str(row.get('销售单价', ''))
        if '售价RMB总价' in new_row: new_row['售价RMB总价'] = self.format_money_str(row.get('销售总价', ''))
        if '销售总价' in new_row: new_row['销售总价'] = self.format_money_str(row.get('销售总价', ''))
        new_row['外购转美元'] = self.format_money_str(row.get('外购转美元', ''))
        new_row['报价总价美元'] = self.format_money_str(row.get('报价总价美元', ''))
        new_row['净合同额美元'] = self.format_money_str(row.get('净合同额美元', ''))
        if '报价RMB单价' in new_row: new_row['报价RMB单价'] = self.format_money_str(row.get('报价RMB单价', ''))
        return pd.Series(new_row)
-    def process_row_om(self, row):
+    # OM表处理 (使用聚合)
-        new_row = {col: "" for col in self.columns_om}
+    def generate_om_row_aggregated(self, contract_id, group_df, target_cols):
-        order_no_raw = str(row.get('合同订单编号', '')).strip()
+        first_row = group_df.iloc[0]
        all_items = []
        for _, row in group_df.iterrows():
            target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
            lines = [line.strip() for line in target_raw.split('\n') if line.strip()]
            if lines:
                for line in lines:
                    all_items.append(self.parse_single_line_subject(line))
        if not all_items: all_items.append({'name': '', 'price': '', 'sort_price': 0})
        all_items.sort(key=lambda x: x['sort_price'], reverse=True)
        best_item = all_items[0]
        new_row = {col: "" for col in target_cols}
        order_no_raw = str(first_row.get('合同订单编号', '')).strip()
        parts_no = order_no_raw.split()
        new_row['合同编号'] = parts_no[0] if len(parts_no) > 0 else order_no_raw
-        if len(parts_no) > 1: new_row['内贸合同号'] = " ".join(parts_no[1:])
+        new_row['内贸合同号'] = " ".join(parts_no[1:]) if len(parts_no) > 1 else ""
-        target_raw = str(row.get('合同标的（品名/型号/数量/单价/总价）', ''))
+        total_amount = self.format_money_str(first_row.get('合同总额', ''))
-        parsed_target = self.parse_complex_subject(target_raw)
+        status = str(first_row.get('收款状态', '')).strip()
-        new_row['合同标的'] = parsed_target['name']
+        received = ""
-
+        unreceived = ""
        total_amount = row.get('合同总额', '')
        status = str(row.get('收款状态', '')).strip()
        new_row['合同总额'] = total_amount
        new_row['收款情况'] = status
        if '已收' in status:
-            new_row['已收款'] = total_amount
+            received = total_amount
-            new_row['未收款'] = 0
+            unreceived = self.format_money_str(0)
        else:
            new_row['已收款'] = ""
            new_row['未收款'] = ""
-        new_row['签署公司'] = row.get('收款账户', '')
+        new_row['签署公司'] = first_row.get('收款账户', '')
-        new_row['签订日期'] = row.get('签约日期', '')
+        new_row['签订日期'] = self.format_date_str(first_row.get('签约日期', ''))
-        new_row['销售员'] = row.get('负责人', '')
+        new_row['销售员'] = first_row.get('负责人', '')
-        new_row['最终用户单位'] = row.get('客户名称', '')
+        new_row['最终用户单位'] = first_row.get('客户名称', '')
        new_row['最终用户信息联系人、电话、邮箱'] = row.get('联系人姓名', '')
-        buyer_raw = str(row.get('合同买方（名称/联系人/电话/邮箱）', ''))
+        contact_col = '最终用户信息\n联系人、电话、邮箱'
        if contact_col in new_row: new_row[contact_col] = first_row.get('联系人姓名', '')
        buyer_raw = str(first_row.get('合同买方（名称/联系人/电话/邮箱）', ''))
        parsed_buyer = self.parse_buyer_info(buyer_raw)
        new_row['买方单位'] = parsed_buyer['name']
        new_row['买方信息联系人、电话、邮箱'] = parsed_buyer['contact_full']
        new_row['收款日期'] = row.get('最新收款日期', '')
-        return pd.Series(new_row)
+        buyer_info_col = '买方信息\n联系人、电话、邮箱'
        if buyer_info_col in new_row: new_row[buyer_info_col] = parsed_buyer['contact_full']
        new_row['收款日期'] = self.format_date_str(first_row.get('最新收款日期', ''))
        new_row['合同标的'] = best_item['name']
        new_row['_sort_price'] = best_item['sort_price']
        new_row['合同总额'] = total_amount
        new_row['收款情况'] = status
        new_row['已收款'] = received
        new_row['未收款'] = unreceived
        return new_row
    def merge_datasets(self, old_dfs, csv_df, is_asd):
        col_gen = '厂家'
@ -361,31 +531,28 @@ class DataProcessor:
        result_dfs = {}
-        def merge_logic(old_df, new_rows_df, unique_col, target_columns):
+        def merge_logic_expanded(old_df, new_rows_list, unique_col, target_columns):
            if old_df is None or old_df.empty:
-                if new_rows_df.empty: return pd.DataFrame(columns=target_columns + ['_status'])
+                if not new_rows_list: return pd.DataFrame(columns=target_columns + ['_status'])
-                combined = new_rows_df.copy()
+                combined = pd.DataFrame(new_rows_list)
                combined['_status'] = 'new'
                return combined
            combined = old_df.copy()
            # 确保旧数据列名存在
            for col in target_columns:
-                if col not in combined.columns:
+                if col not in combined.columns: combined[col] = ""
-                    combined[col] = ""
+            if '_sort_price' not in combined.columns: combined['_sort_price'] = 0.0
            if unique_col in combined.columns:
                combined[unique_col] = combined[unique_col].astype(str)
            if '_status' not in combined.columns: combined['_status'] = ''
            if not new_rows_list: return combined
            new_rows_df = pd.DataFrame(new_rows_list)
            if unique_col in new_rows_df.columns:
                new_rows_df[unique_col] = new_rows_df[unique_col].astype(str)
            if '_status' not in combined.columns:
                combined['_status'] = ''
            if new_rows_df.empty:
                return combined
            new_contract_ids = new_rows_df[unique_col].unique()
            rows_to_append = []
@ -394,24 +561,25 @@ class DataProcessor:
                old_indices = combined[combined[unique_col] == cid].index
                if len(old_indices) > 0:
-                    idx = old_indices[0]
+                    first_old_idx = old_indices[0]
                    new_first_row = new_subset.iloc[0]
                    has_changed = False
                    new_row_series = new_subset.iloc[0]
                    for col in target_columns:
-                        if col in new_row_series:
+                        if col in new_first_row:
-                            new_val = new_row_series[col]
+                            new_val = new_first_row[col]
-                            old_val = combined.at[idx, col]
+                            old_val = combined.at[first_old_idx, col]
                            # 保护逻辑：新值非空才覆盖
                            if str(new_val).strip() != "":
                                if self.normalize_for_compare(old_val) != self.normalize_for_compare(new_val):
-                                    combined.at[idx, col] = new_val
+                                    combined.at[first_old_idx, col] = new_val
                                    has_changed = True
-                    if has_changed:
+                    if '_sort_price' in new_first_row:
-                        combined.at[idx, '_status'] = 'modified'
+                        combined.at[first_old_idx, '_sort_price'] = new_first_row['_sort_price']
                    if has_changed:
                        combined.at[first_old_idx, '_status'] = 'modified'
                else:
                    new_subset_copy = new_subset.copy()
                    new_subset_copy['_status'] = 'new'
@ -422,44 +590,57 @@ class DataProcessor:
            return combined
-        # --- 合并执行 ---
+        # --- 1. 外贸总表 (聚合) ---
        new_gen_rows = []
        target_cols_foreign = self.cols_asd_foreign_general if is_asd else self.cols_nonasd_foreign_general
        if not csv_foreign.empty:
-            new_gen = csv_foreign.apply(lambda r: self.process_row_general(r, '外贸', col_gen), axis=1)
+            grouped = csv_foreign.groupby('合同订单编号')
-            new_gen = new_gen.drop_duplicates(subset=['合同编号'], keep='first')
+            for contract_id, group in grouped:
-        else:
+                row_data = self.generate_general_row_aggregated(contract_id, group, target_cols_foreign, '外贸', is_asd,
-            new_gen = pd.DataFrame(columns=self.columns_general)
+                                                                col_gen)
-        old_gen = old_dfs.get('外贸', old_dfs.get('外贸总表', pd.DataFrame(columns=self.columns_general)))
+                new_gen_rows.append(row_data)
        result_dfs['外贸'] = merge_logic(old_gen, new_gen, '合同编号', self.columns_general)
        old_gen = old_dfs.get('外贸', old_dfs.get('外贸总表', pd.DataFrame(columns=target_cols_foreign)))
        result_dfs['外贸'] = merge_logic_expanded(old_gen, new_gen_rows, '合同编号', target_cols_foreign)
        # --- 2. 外贸明细 ---
        if not csv_foreign.empty:
-            new_det = csv_foreign.apply(lambda r: self.process_row_detail(r, col_det), axis=1)
+            new_det = csv_foreign.apply(lambda r: self.process_row_detail(r, col_det, '外贸'), axis=1)
        else:
-            new_det = pd.DataFrame(columns=self.columns_detail)
+            new_det = pd.DataFrame(columns=self.cols_foreign_detail)
-        old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.columns_detail))
+        old_det = old_dfs.get('外贸明细', pd.DataFrame(columns=self.cols_foreign_detail))
-        result_dfs['外贸明细'] = merge_logic(old_det, new_det, '合同编号', self.columns_detail)
+        result_dfs['外贸明细'] = merge_logic_expanded(old_det, new_det.to_dict('records'), '合同编号',
                                                      self.cols_foreign_detail)
        # --- 3. 内贸总表 (聚合) ---
        new_dom_rows = []
        if not csv_domestic.empty:
-            new_dom_gen = csv_domestic.apply(lambda r: self.process_row_general(r, '内贸', col_gen), axis=1)
+            grouped = csv_domestic.groupby('合同订单编号')
-            new_dom_gen = new_dom_gen.drop_duplicates(subset=['合同编号'], keep='first')
+            for contract_id, group in grouped:
-        else:
+                row_data = self.generate_general_row_aggregated(contract_id, group, self.cols_domestic_general, '内贸',
-            new_dom_gen = pd.DataFrame(columns=self.columns_domestic_general)
+                                                                is_asd, col_gen)
-        old_dom_gen = old_dfs.get('内贸', old_dfs.get('内贸总表', pd.DataFrame(columns=self.columns_domestic_general)))
+                new_dom_rows.append(row_data)
-        result_dfs['内贸'] = merge_logic(old_dom_gen, new_dom_gen, '合同编号', self.columns_domestic_general)
+        old_dom_gen = old_dfs.get('内贸', old_dfs.get('内贸总表', pd.DataFrame(columns=self.cols_domestic_general)))
        result_dfs['内贸'] = merge_logic_expanded(old_dom_gen, new_dom_rows, '合同编号', self.cols_domestic_general)
        # --- 4. 内贸明细 ---
        if not csv_domestic.empty:
-            new_dom_det = csv_domestic.apply(lambda r: self.process_row_detail(r, col_det), axis=1)
+            new_dom_det = csv_domestic.apply(lambda r: self.process_row_detail(r, col_det, '内贸'), axis=1)
        else:
-            new_dom_det = pd.DataFrame(columns=self.columns_detail)
+            new_dom_det = pd.DataFrame(columns=self.cols_domestic_detail)
-        old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.columns_detail))
+        old_dom_det = old_dfs.get('内贸明细', pd.DataFrame(columns=self.cols_domestic_detail))
-        result_dfs['内贸明细'] = merge_logic(old_dom_det, new_dom_det, '合同编号', self.columns_detail)
+        result_dfs['内贸明细'] = merge_logic_expanded(old_dom_det, new_dom_det.to_dict('records'), '合同编号',
                                                      self.cols_domestic_detail)
        # --- 5. OM (聚合) ---
        new_om_rows = []
        if not csv_om.empty:
-            new_om = csv_om.apply(lambda r: self.process_row_om(r), axis=1)
+            grouped = csv_om.groupby('合同订单编号')
-            new_om = new_om.drop_duplicates(subset=['合同编号'], keep='first')
+            for contract_id, group in grouped:
-        else:
+                row_data = self.generate_om_row_aggregated(contract_id, group, self.cols_om)
-            new_om = pd.DataFrame(columns=self.columns_om)
+                new_om_rows.append(row_data)
-        old_om = old_dfs.get('OM合同', old_dfs.get('其他', pd.DataFrame(columns=self.columns_om)))
+        old_om = old_dfs.get('OM合同', old_dfs.get('其他', pd.DataFrame(columns=self.cols_om)))
-        result_dfs['OM合同'] = merge_logic(old_om, new_om, '合同编号', self.columns_om)
+        result_dfs['OM合同'] = merge_logic_expanded(old_om, new_om_rows, '合同编号', self.cols_om)
        return result_dfs
@ -472,6 +653,9 @@ class DataProcessor:
            for col in self.percent_cols:
                if col in df.columns:
                    df[col] = df[col].apply(self.format_percent_str)
            for col in self.date_cols:
                if col in df.columns:
                    df[col] = df[col].apply(self.format_date_str)
        return data_dict
@ -482,7 +666,7 @@ class DataProcessor:
 class ContractApp:
    def __init__(self, root):
        self.root = root
-        self.root.title("合同数据处理系统 V3.2 (表头修正版)")
+        self.root.title("合同数据处理系统 V3.8 (换行符修复版)")
        self.root.geometry("1300x850")
        self.style = ttk.Style()
@ -573,15 +757,25 @@ class ContractApp:
            dfs = pd.read_excel(path, sheet_name=None)
            clean_dfs = {}
            for k, v in dfs.items():
-                v.columns = v.columns.astype(str).str.replace(r'\s+', '', regex=True)
+                # [关键修复] 智能表头匹配：重命名表头为标准格式
                new_columns = []
                for col in v.columns:
                    clean_col = self.processor.clean_header_key(str(col))
                    # 尝试在标准映射里找
                    if clean_col in self.processor.standard_col_map:
                        new_columns.append(self.processor.standard_col_map[clean_col])
                    # 尝试在旧映射里找
                    elif col in self.processor.legacy_map:
                        new_columns.append(self.processor.legacy_map[col])
                    else:
                        new_columns.append(col)  # 找不到就保留原样
                v.columns = new_columns
-                # 总表仍可能需要 legacy_map，但明细表不需要了因为我们已经在代码里统一了列名
+                # 去重
                v.rename(columns=self.processor.legacy_map, inplace=True)
                v = v.loc[:, ~v.columns.duplicated()]
                if '合同编号' in v.columns:
                    v['合同编号'] = v['合同编号'].astype(str)
                clean_dfs[k.strip()] = v
            return clean_dfs
        except Exception as e:
@ -632,20 +826,31 @@ class ContractApp:
                    if not df.empty:
                        if '合同编号' in df.columns:
                            df['合同编号'] = df['合同编号'].astype(str)
-                            df = df.sort_values(by='合同编号', ascending=True)
+
                            sort_cols = ['合同编号']
                            asc_order = [True]
                            if '_sort_price' in df.columns:
                                sort_cols.append('_sort_price')
                                asc_order.append(False)
                            df = df.sort_values(by=sort_cols, ascending=asc_order)
                            if '明细' in sheet_name:
                                mask = df.duplicated(subset=['合同编号'], keep='first')
                                df.loc[mask, '合同标的'] = ""
                        standard_cols = []
                        is_asd = (file_type == 'ASD')
                        if sheet_name == '外贸':
-                            standard_cols = self.processor.columns_general
+                            standard_cols = self.processor.cols_asd_foreign_general if is_asd else self.processor.cols_nonasd_foreign_general
                        elif sheet_name == '内贸':
-                            standard_cols = self.processor.columns_domestic_general
+                            standard_cols = self.processor.cols_domestic_general
                        elif sheet_name == 'OM合同':
-                            standard_cols = self.processor.columns_om
+                            standard_cols = self.processor.cols_om
-                        elif '明细' in sheet_name:
+                        elif sheet_name == '外贸明细':
-                            standard_cols = self.processor.columns_detail
+                            standard_cols = self.processor.cols_foreign_detail
                        elif sheet_name == '内贸明细':
                            standard_cols = self.processor.cols_domestic_detail
                        self.create_treeview(inner_notebook, df, sheet_name, standard_cols)
@ -656,6 +861,7 @@ class ContractApp:
        scroll_y = ttk.Scrollbar(frame, orient="vertical")
        scroll_x = ttk.Scrollbar(frame, orient="horizontal")
        # 仅显示标准列
        display_cols = target_cols
        tree = ttk.Treeview(frame, columns=display_cols, show='headings',
@ -668,7 +874,9 @@ class ContractApp:
        tree.pack(fill="both", expand=True)
        for col in display_cols:
-            tree.heading(col, text=col)
+            # 清洗显示名称（换行变空格，防止表头太高）
            clean_header = col.replace('\n', ' ')
            tree.heading(col, text=clean_header)
            tree.column(col, width=120, anchor="center")
        tree.tag_configure('new', background='#FFFFCC')
@ -728,11 +936,18 @@ class ContractApp:
                    for sheet_name in valid_sheets:
                        if sheet_name in sheets:
                            df = sheets[sheet_name]
-                            save_df = df.drop(columns=['_status'], errors='ignore')
+
                            if '合同编号' in df.columns:
                                sort_cols = ['合同编号']
                                asc_order = [True]
                                if '_sort_price' in df.columns:
                                    sort_cols.append('_sort_price')
                                    asc_order.append(False)
                                df = df.sort_values(by=sort_cols, ascending=asc_order)
                            save_df = df.drop(columns=['_status', '_sort_price'], errors='ignore')
                            if not save_df.empty:
                                if '合同编号' in save_df.columns:
                                    save_df['合同编号'] = save_df['合同编号'].astype(str)
                                    save_df = save_df.sort_values(by='合同编号', ascending=True)
                                if '明细' in sheet_name:
                                    mask = save_df.duplicated(subset=['合同编号'], keep='first')
                                    save_df.loc[mask, '合同标的'] = ""