import pandas as pd import psycopg2 # 1. 数据库配置 DB_CONFIG = { 'dbname': 'inventory_system', 'user': 'test', 'password': '1234', 'host': 'localhost', 'port': '5435' } # 2. Excel 文件路径 EXCEL_FILE = '筛选后的库存统计.xlsx' def fix_category_data_no_nan(): try: print("正在读取 Excel 文件...") # 【修改点 1】:明确限制只读取到第四级 possible_category_cols = ['类别一级', '类别二级', '类别三级', '类别四级'] df_header = pd.read_excel(EXCEL_FILE, nrows=0) actual_category_cols = [col for col in possible_category_cols if col in df_header.columns] needed_columns = ['资产名称', '规格型号'] + actual_category_cols df = pd.read_excel(EXCEL_FILE, dtype=str, usecols=lambda x: x in needed_columns) df = df.where(pd.notnull(df), None) df = df.drop_duplicates(subset=['资产名称', '规格型号']) print(f"发现了 {len(df)} 种独立物料,准备修复类别并清除 'nan'...") conn = psycopg2.connect(**DB_CONFIG) cur = conn.cursor() update_count = 0 for index, row in df.iterrows(): name = row.get('资产名称') spec_model = row.get('规格型号') # 清理规格型号,防止它也被 pandas 变成了 'nan' clean_spec = None if pd.isna(spec_model) or str(spec_model).lower() == 'nan' else str(spec_model).strip() if not name or str(name).lower() == 'nan': continue # --- 核心逻辑:只拼接前4级,并且严格过滤 nan --- category_parts = [] for col in actual_category_cols: val = row.get(col) if val is not None: str_val = str(val).strip() # 【修改点 2】:增加对 'nan' 和 'None' 字符串的拦截 if str_val != '' and str_val.lower() != 'nan' and str_val.lower() != 'none': category_parts.append(str_val) full_category = "/".join(category_parts) if not full_category: continue prefixed_name = f"库研*{name}" prefixed_spec = f"KY*{clean_spec}" if clean_spec else None # 执行更新操作 update_query = """ UPDATE material_base SET category = %s WHERE (name = %s OR name = %s) AND ( (spec_model = %s OR spec_model = %s) OR (spec_model IS NULL AND %s IS NULL) ) \ """ cur.execute(update_query, ( full_category, name, prefixed_name, clean_spec, prefixed_spec, clean_spec )) update_count += cur.rowcount conn.commit() print(f"✅ 完美修复!清除了讨厌的 'nan',共修正了 {update_count} 条记录。") except Exception as e: print(f"❌ 发生错误: {e}") if 'conn' in locals() and conn: conn.rollback() finally: if 'cur' in locals() and cur: cur.close() if 'conn' in locals() and conn: conn.close() if __name__ == "__main__": fix_category_data_no_nan()