102 lines
3.4 KiB
Python
102 lines
3.4 KiB
Python
import pandas as pd
|
||
import psycopg2
|
||
|
||
# 1. 数据库配置
|
||
DB_CONFIG = {
|
||
'dbname': 'inventory_system',
|
||
'user': 'test',
|
||
'password': '1234',
|
||
'host': 'localhost',
|
||
'port': '5435'
|
||
}
|
||
|
||
# 2. Excel 文件路径
|
||
EXCEL_FILE = '筛选后的库存统计.xlsx'
|
||
|
||
|
||
def fix_category_data_no_nan():
|
||
try:
|
||
print("正在读取 Excel 文件...")
|
||
|
||
# 【修改点 1】:明确限制只读取到第四级
|
||
possible_category_cols = ['类别一级', '类别二级', '类别三级', '类别四级']
|
||
|
||
df_header = pd.read_excel(EXCEL_FILE, nrows=0)
|
||
actual_category_cols = [col for col in possible_category_cols if col in df_header.columns]
|
||
|
||
needed_columns = ['资产名称', '规格型号'] + actual_category_cols
|
||
|
||
df = pd.read_excel(EXCEL_FILE, dtype=str, usecols=lambda x: x in needed_columns)
|
||
df = df.where(pd.notnull(df), None)
|
||
df = df.drop_duplicates(subset=['资产名称', '规格型号'])
|
||
|
||
print(f"发现了 {len(df)} 种独立物料,准备修复类别并清除 'nan'...")
|
||
|
||
conn = psycopg2.connect(**DB_CONFIG)
|
||
cur = conn.cursor()
|
||
|
||
update_count = 0
|
||
|
||
for index, row in df.iterrows():
|
||
name = row.get('资产名称')
|
||
spec_model = row.get('规格型号')
|
||
|
||
# 清理规格型号,防止它也被 pandas 变成了 'nan'
|
||
clean_spec = None if pd.isna(spec_model) or str(spec_model).lower() == 'nan' else str(spec_model).strip()
|
||
|
||
if not name or str(name).lower() == 'nan':
|
||
continue
|
||
|
||
# --- 核心逻辑:只拼接前4级,并且严格过滤 nan ---
|
||
category_parts = []
|
||
for col in actual_category_cols:
|
||
val = row.get(col)
|
||
if val is not None:
|
||
str_val = str(val).strip()
|
||
# 【修改点 2】:增加对 'nan' 和 'None' 字符串的拦截
|
||
if str_val != '' and str_val.lower() != 'nan' and str_val.lower() != 'none':
|
||
category_parts.append(str_val)
|
||
|
||
full_category = "/".join(category_parts)
|
||
|
||
if not full_category:
|
||
continue
|
||
|
||
prefixed_name = f"库研*{name}"
|
||
prefixed_spec = f"KY*{clean_spec}" if clean_spec else None
|
||
|
||
# 执行更新操作
|
||
update_query = """
|
||
UPDATE material_base
|
||
SET category = %s
|
||
WHERE (name = %s OR name = %s)
|
||
AND (
|
||
(spec_model = %s OR spec_model = %s)
|
||
OR (spec_model IS NULL AND %s IS NULL)
|
||
) \
|
||
"""
|
||
|
||
cur.execute(update_query, (
|
||
full_category,
|
||
name, prefixed_name,
|
||
clean_spec, prefixed_spec, clean_spec
|
||
))
|
||
|
||
update_count += cur.rowcount
|
||
|
||
conn.commit()
|
||
print(f"✅ 完美修复!清除了讨厌的 'nan',共修正了 {update_count} 条记录。")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 发生错误: {e}")
|
||
if 'conn' in locals() and conn:
|
||
conn.rollback()
|
||
finally:
|
||
if 'cur' in locals() and cur:
|
||
cur.close()
|
||
if 'conn' in locals() and conn:
|
||
conn.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
fix_category_data_no_nan() |