Files
ZDXX/2_3banben/services/crawler_106.py
2026-01-09 13:38:51 +08:00

159 lines
6.4 KiB
Python

# services/crawler_106.py
import os
import requests
import logging
from datetime import datetime
from config import Config
CONFIG = Config.CRAWLER_CONFIG["106"]
def get_temp_dir():
base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
temp_dir = os.path.join(base_dir, 'instance', 'temp')
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
return temp_dir
def get_106_dynamic_token(port):
try:
login_url = f"http://106.75.72.40:{port}/api/login"
resp = requests.post(login_url, json=CONFIG["login_payload"], timeout=10)
return resp.text.strip().replace('"', '') if resp.status_code == 200 else None
except:
return None
def find_closest_item(items, is_date_level=True):
if not items or not isinstance(items, list): return None
today = datetime.now()
scored_items = []
for item in items:
name_val = item.get('name', '')
path_val = item.get('path', '')
target_str = name_val if name_val else path_val.split('/')[-1]
try:
if is_date_level:
current_date = datetime.strptime(target_str, "%Y_%m_%d")
else:
mod_str = item.get('modified', '')
current_date = datetime.fromisoformat(mod_str.replace('Z', '+00:00'))
diff = abs((today - current_date.replace(tzinfo=None)).total_seconds())
scored_items.append((diff, item, target_str))
except:
continue
if not scored_items: return None
scored_items.sort(key=lambda x: x[0])
return scored_items[0]
def run_106_logic():
"""返回 result_list, 每个元素是一个字典"""
results = []
print(">>> [106爬虫] 启动...")
today_str = datetime.now().strftime("%Y_%m_%d")
main_headers = {"Authorization": CONFIG["primary_auth"], "User-Agent": "Mozilla/5.0"}
try:
resp = requests.get(CONFIG["base_url"], headers=main_headers, timeout=20)
proxies = resp.json().get('proxies', [])
for item in proxies:
name = item.get('name', '')
if not name.lower().endswith('_data'): continue
name_upper = name.upper()
is_tower_underscore = "TOWER_" in name_upper
is_tower_i = "TOWER" in name_upper and not is_tower_underscore
if not (is_tower_underscore or is_tower_i): continue
# 构建基础数据包
data_packet = {
'source': '106网站',
'name': name,
'status': '正常',
'value': '',
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'raw_json': {},
'temp_file': None
}
if str(item.get('status')).lower() != 'online':
data_packet['status'] = '离线'
data_packet['value'] = f"状态: {item.get('status')}"
results.append(data_packet)
continue
try:
port = item.get('conf', {}).get('remote_port')
token = get_106_dynamic_token(port)
if not token:
data_packet['status'] = '异常'
data_packet['value'] = "Token获取失败"
results.append(data_packet)
continue
headers = {"Authorization": CONFIG["primary_auth"], "x-auth": token}
api_root = "/api/resources/Data/" if is_tower_underscore else "/api/resources/data/"
res1 = requests.get(f"http://106.75.72.40:{port}{api_root}", headers=headers, timeout=10)
best_date = find_closest_item(res1.json().get('items', []), True)
if not best_date or best_date[2] != today_str:
data_packet['value'] = "未找到今日文件夹"
data_packet['target_time'] = best_date[2] if best_date else "N/A"
results.append(data_packet)
continue
data_packet['target_time'] = best_date[2] # 实际数据时间
date_path = f"{api_root}{best_date[2]}/"
res2 = requests.get(f"http://106.75.72.40:{port}{date_path}", headers=headers, timeout=10)
best_file = find_closest_item(res2.json().get('items', []), False)
if not best_file:
data_packet['value'] = "今日文件夹为空"
results.append(data_packet)
continue
file_item = best_file[1]
full_path = file_item.get('path') or f"{date_path}{file_item.get('name')}"
# 核心逻辑:获取内容
if is_tower_i:
# 下载二进制文件
download_url = f"http://106.75.72.40:{port}/api/raw{full_path}"
res3 = requests.get(download_url, headers=headers, timeout=20, stream=True)
if res3.status_code == 200:
safe_name = f"{name}_{datetime.now().strftime('%H%M%S')}.db"
temp_path = os.path.join(get_temp_dir(), safe_name)
with open(temp_path, 'wb') as f:
f.write(res3.content)
data_packet['temp_file'] = temp_path # 🔥 传递给API
data_packet['value'] = f"Binary Downloaded: {len(res3.content)} bytes"
data_packet['raw_json'] = file_item # 用文件属性充当RawData
else:
data_packet['status'] = '异常'
data_packet['value'] = f"下载失败: {res3.status_code}"
else:
# JSON 内容
file_api_url = f"http://106.75.72.40:{port}/api/resources{full_path}"
res3 = requests.get(file_api_url, headers=headers, timeout=20)
try:
json_content = res3.json()
data_packet['raw_json'] = json_content # 🔥 完整保存
data_packet['value'] = json_content.get('content', '')
except:
data_packet['value'] = "JSON解析失败"
results.append(data_packet)
except Exception as e:
data_packet['status'] = '异常'
data_packet['value'] = str(e)[:50]
results.append(data_packet)
except Exception as e:
logging.error(f"106 Crawler Error: {e}")
return results