KCGL/库研操作/导入数据库.py

import pandas as pd
import psycopg2

# 1. 数据库配置
DB_CONFIG = {
    'dbname': 'inventory_system',
    'user': 'test',
    'password': '1234',
    'host': 'localhost',
    'port': '5435'
}

# 2. Excel 文件路径
EXCEL_FILE = '筛选后的库存统计.xlsx'


def fix_category_data_no_nan():
    try:
        print("正在读取 Excel 文件...")

        # 【修改点 1】：明确限制只读取到第四级
        possible_category_cols = ['类别一级', '类别二级', '类别三级', '类别四级']

        df_header = pd.read_excel(EXCEL_FILE, nrows=0)
        actual_category_cols = [col for col in possible_category_cols if col in df_header.columns]

        needed_columns = ['资产名称', '规格型号'] + actual_category_cols

        df = pd.read_excel(EXCEL_FILE, dtype=str, usecols=lambda x: x in needed_columns)
        df = df.where(pd.notnull(df), None)
        df = df.drop_duplicates(subset=['资产名称', '规格型号'])

        print(f"发现了 {len(df)} 种独立物料，准备修复类别并清除 'nan'...")

        conn = psycopg2.connect(**DB_CONFIG)
        cur = conn.cursor()

        update_count = 0

        for index, row in df.iterrows():
            name = row.get('资产名称')
            spec_model = row.get('规格型号')

            # 清理规格型号，防止它也被 pandas 变成了 'nan'
            clean_spec = None if pd.isna(spec_model) or str(spec_model).lower() == 'nan' else str(spec_model).strip()

            if not name or str(name).lower() == 'nan':
                continue

            # --- 核心逻辑：只拼接前4级，并且严格过滤 nan ---
            category_parts = []
            for col in actual_category_cols:
                val = row.get(col)
                if val is not None:
                    str_val = str(val).strip()
                    # 【修改点 2】：增加对 'nan' 和 'None' 字符串的拦截
                    if str_val != '' and str_val.lower() != 'nan' and str_val.lower() != 'none':
                        category_parts.append(str_val)

            full_category = "/".join(category_parts)

            if not full_category:
                continue

            prefixed_name = f"库研*{name}"
            prefixed_spec = f"KY*{clean_spec}" if clean_spec else None

            # 执行更新操作
            update_query = """
                           UPDATE material_base
                           SET category = %s
                           WHERE (name = %s OR name = %s)
                             AND (
                               (spec_model = %s OR spec_model = %s)
                                   OR (spec_model IS NULL AND %s IS NULL)
                               ) \
                           """

            cur.execute(update_query, (
                full_category,
                name, prefixed_name,
                clean_spec, prefixed_spec, clean_spec
            ))

            update_count += cur.rowcount

        conn.commit()
        print(f"✅ 完美修复！清除了讨厌的 'nan'，共修正了 {update_count} 条记录。")

    except Exception as e:
        print(f"❌ 发生错误: {e}")
        if 'conn' in locals() and conn:
            conn.rollback()
    finally:
        if 'cur' in locals() and cur:
            cur.close()
        if 'conn' in locals() and conn:
            conn.close()


if __name__ == "__main__":
    fix_category_data_no_nan()