Files
Contract-document-crawling-…/拿取内容测试.py
2026-01-16 15:16:35 +08:00

91 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
import os
# ================= 配置区域 =================
base_url = "http://111.198.24.44:88/index.php"
# 1. 登录信息
login_payload = {
"module": "Users",
"action": "Authenticate",
"return_module": "Users",
"return_action": "Login",
"user_name": "TEST", # 在这里填入真实的用户名
"user_password": "test", # 在这里填入真实的密码
"login_theme": "newskin"
}
# 2. 抓取数据参数 (保留了你之前的筛选条件)
data_payload = {
"module": "SalesOrder",
"action": "SalesOrderAjax",
"file": "ListViewData",
"sorder": "",
"start": "1",
"pagesize": "100",
"actionId": "1768546984243",
"isFilter": "true",
"search[viewscope]": "all_to_me",
"search[viewname]": "324126",
"filter[Fields0]": "subject",
"filter[Condition0]": "cts",
"filter[Srch_value0]": "W25A",
"filter[type0]": "text",
"filter[dateCondition1]": "prevfy",
"filter[Fields1]": "duedate",
"filter[Condition1]": "btwa",
"filter[Srch_value1]": "2025-01-01,2025-12-31",
"filter[type1]": "date",
"filter[Fields2]": "subject",
"filter[Condition2]": "dcts",
"filter[Srch_value2]": "取消",
"filter[type2]": "text",
"filter[search_cnt]": "3",
"filter[matchtype]": "all"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "http://111.198.24.44:88/index.php?module=SalesOrder&action=index"
}
# ================= 执行逻辑 =================
session = requests.Session()
try:
print("1. 正在登录...")
session.post(base_url, data=login_payload, headers=headers)
if 'PHPSESSID' in session.cookies:
print(" 登录成功Cookie已获取。")
else:
print(" ⚠️ 警告:可能登录失败 (未检测到PHPSESSID)。")
print("2. 正在获取数据并导出...")
resp = session.post(base_url, data=data_payload, headers=headers)
# === 关键修改:保存文件 ===
try:
# 尝试解析 JSON
json_data = resp.json()
# 定义文件名
filename = "result.json"
# 写入文件
# ensure_ascii=False 保证中文能正常显示,而不是显示成 \u53d6\u6d88
with open(filename, 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=4)
print(f"\n✅ 成功!数据已保存到当前目录下的: 【{filename}")
print(f" 文件路径: {os.path.abspath(filename)}")
except json.JSONDecodeError:
print("\n❌ 失败:服务器返回的不是 JSON 格式。")
print("可能是 HTML 页面,已保存为 'error_page.html' 供检查。")
with open("error_page.html", "w", encoding="utf-8") as f:
f.write(resp.text)
except Exception as e:
print(f"发生错误: {e}")