打包上传的2.0版本
This commit is contained in:
2
2_1banben/services/__init__.py
Normal file
2
2_1banben/services/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# services/__init__.py
|
||||
# 这是一个空文件,用于将 services 文件夹标识为 Python 包。
|
||||
37
2_1banben/services/core.py
Normal file
37
2_1banben/services/core.py
Normal file
@ -0,0 +1,37 @@
|
||||
# services/core.py
|
||||
import logging
|
||||
import threading
|
||||
from .crawler_106 import run_106_logic
|
||||
from .crawler_82 import run_82_logic
|
||||
|
||||
task_lock = threading.Lock()
|
||||
|
||||
|
||||
def execute_monitor_task():
|
||||
"""
|
||||
执行所有爬虫,返回一个大列表:
|
||||
{'device_list': [item1, item2...], 'target_time': '...'}
|
||||
"""
|
||||
if task_lock.locked():
|
||||
logging.warning(">>> 任务正在运行中,跳过")
|
||||
return None
|
||||
|
||||
with task_lock:
|
||||
logging.info(">>> 开始执行监控任务...")
|
||||
|
||||
# 1. 获取 106 数据列表
|
||||
list_106 = run_106_logic()
|
||||
|
||||
# 2. 获取 82 数据列表
|
||||
list_82 = run_82_logic()
|
||||
|
||||
# 3. 合并
|
||||
combined_list = list_106 + list_82
|
||||
|
||||
logging.info(f">>> 任务完成,共获取 {len(combined_list)} 条数据")
|
||||
|
||||
return {
|
||||
'device_list': combined_list,
|
||||
'target_time': None, # 具体时间已在 item 里
|
||||
'temp_file_path': None # 废弃旧逻辑,文件路径已在 item 里
|
||||
}
|
||||
159
2_1banben/services/crawler_106.py
Normal file
159
2_1banben/services/crawler_106.py
Normal file
@ -0,0 +1,159 @@
|
||||
# services/crawler_106.py
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from config import Config
|
||||
|
||||
CONFIG = Config.CRAWLER_CONFIG["106"]
|
||||
|
||||
|
||||
def get_temp_dir():
|
||||
base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
||||
temp_dir = os.path.join(base_dir, 'instance', 'temp')
|
||||
if not os.path.exists(temp_dir):
|
||||
os.makedirs(temp_dir)
|
||||
return temp_dir
|
||||
|
||||
|
||||
def get_106_dynamic_token(port):
|
||||
try:
|
||||
login_url = f"http://106.75.72.40:{port}/api/login"
|
||||
resp = requests.post(login_url, json=CONFIG["login_payload"], timeout=10)
|
||||
return resp.text.strip().replace('"', '') if resp.status_code == 200 else None
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def find_closest_item(items, is_date_level=True):
|
||||
if not items or not isinstance(items, list): return None
|
||||
today = datetime.now()
|
||||
scored_items = []
|
||||
for item in items:
|
||||
name_val = item.get('name', '')
|
||||
path_val = item.get('path', '')
|
||||
target_str = name_val if name_val else path_val.split('/')[-1]
|
||||
try:
|
||||
if is_date_level:
|
||||
current_date = datetime.strptime(target_str, "%Y_%m_%d")
|
||||
else:
|
||||
mod_str = item.get('modified', '')
|
||||
current_date = datetime.fromisoformat(mod_str.replace('Z', '+00:00'))
|
||||
diff = abs((today - current_date.replace(tzinfo=None)).total_seconds())
|
||||
scored_items.append((diff, item, target_str))
|
||||
except:
|
||||
continue
|
||||
if not scored_items: return None
|
||||
scored_items.sort(key=lambda x: x[0])
|
||||
return scored_items[0]
|
||||
|
||||
|
||||
def run_106_logic():
|
||||
"""返回 result_list, 每个元素是一个字典"""
|
||||
results = []
|
||||
print(">>> [106爬虫] 启动...")
|
||||
today_str = datetime.now().strftime("%Y_%m_%d")
|
||||
main_headers = {"Authorization": CONFIG["primary_auth"], "User-Agent": "Mozilla/5.0"}
|
||||
|
||||
try:
|
||||
resp = requests.get(CONFIG["base_url"], headers=main_headers, timeout=20)
|
||||
proxies = resp.json().get('proxies', [])
|
||||
|
||||
for item in proxies:
|
||||
name = item.get('name', '')
|
||||
if not name.lower().endswith('_data'): continue
|
||||
name_upper = name.upper()
|
||||
is_tower_underscore = "TOWER_" in name_upper
|
||||
is_tower_i = "TOWER" in name_upper and not is_tower_underscore
|
||||
if not (is_tower_underscore or is_tower_i): continue
|
||||
|
||||
# 构建基础数据包
|
||||
data_packet = {
|
||||
'source': '106网站',
|
||||
'name': name,
|
||||
'status': '正常',
|
||||
'value': '',
|
||||
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'raw_json': {},
|
||||
'temp_file': None
|
||||
}
|
||||
|
||||
if str(item.get('status')).lower() != 'online':
|
||||
data_packet['status'] = '离线'
|
||||
data_packet['value'] = f"状态: {item.get('status')}"
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
try:
|
||||
port = item.get('conf', {}).get('remote_port')
|
||||
token = get_106_dynamic_token(port)
|
||||
if not token:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = "Token获取失败"
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
headers = {"Authorization": CONFIG["primary_auth"], "x-auth": token}
|
||||
api_root = "/api/resources/Data/" if is_tower_underscore else "/api/resources/data/"
|
||||
|
||||
res1 = requests.get(f"http://106.75.72.40:{port}{api_root}", headers=headers, timeout=10)
|
||||
best_date = find_closest_item(res1.json().get('items', []), True)
|
||||
|
||||
if not best_date or best_date[2] != today_str:
|
||||
data_packet['value'] = "未找到今日文件夹"
|
||||
data_packet['target_time'] = best_date[2] if best_date else "N/A"
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
data_packet['target_time'] = best_date[2] # 实际数据时间
|
||||
date_path = f"{api_root}{best_date[2]}/"
|
||||
res2 = requests.get(f"http://106.75.72.40:{port}{date_path}", headers=headers, timeout=10)
|
||||
best_file = find_closest_item(res2.json().get('items', []), False)
|
||||
|
||||
if not best_file:
|
||||
data_packet['value'] = "今日文件夹为空"
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
file_item = best_file[1]
|
||||
full_path = file_item.get('path') or f"{date_path}{file_item.get('name')}"
|
||||
|
||||
# 核心逻辑:获取内容
|
||||
if is_tower_i:
|
||||
# 下载二进制文件
|
||||
download_url = f"http://106.75.72.40:{port}/api/raw{full_path}"
|
||||
res3 = requests.get(download_url, headers=headers, timeout=20, stream=True)
|
||||
if res3.status_code == 200:
|
||||
safe_name = f"{name}_{datetime.now().strftime('%H%M%S')}.db"
|
||||
temp_path = os.path.join(get_temp_dir(), safe_name)
|
||||
with open(temp_path, 'wb') as f:
|
||||
f.write(res3.content)
|
||||
|
||||
data_packet['temp_file'] = temp_path # 🔥 传递给API
|
||||
data_packet['value'] = f"Binary Downloaded: {len(res3.content)} bytes"
|
||||
data_packet['raw_json'] = file_item # 用文件属性充当RawData
|
||||
else:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = f"下载失败: {res3.status_code}"
|
||||
else:
|
||||
# JSON 内容
|
||||
file_api_url = f"http://106.75.72.40:{port}/api/resources{full_path}"
|
||||
res3 = requests.get(file_api_url, headers=headers, timeout=20)
|
||||
try:
|
||||
json_content = res3.json()
|
||||
data_packet['raw_json'] = json_content # 🔥 完整保存
|
||||
data_packet['value'] = json_content.get('content', '')
|
||||
except:
|
||||
data_packet['value'] = "JSON解析失败"
|
||||
|
||||
results.append(data_packet)
|
||||
|
||||
except Exception as e:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = str(e)[:50]
|
||||
results.append(data_packet)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"106 Crawler Error: {e}")
|
||||
|
||||
return results
|
||||
62
2_1banben/services/crawler_82.py
Normal file
62
2_1banben/services/crawler_82.py
Normal file
@ -0,0 +1,62 @@
|
||||
# services/crawler_82.py
|
||||
import requests
|
||||
import json
|
||||
import logging
|
||||
from lxml import etree
|
||||
from config import Config
|
||||
from datetime import datetime
|
||||
|
||||
CONFIG = Config.CRAWLER_CONFIG["82"]
|
||||
|
||||
|
||||
def run_82_logic():
|
||||
"""返回 result_list"""
|
||||
results = []
|
||||
print(">>> [82爬虫] 启动...")
|
||||
session = requests.Session()
|
||||
|
||||
try:
|
||||
session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
|
||||
resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
|
||||
html = etree.HTML(resp.content)
|
||||
if html is None: return []
|
||||
|
||||
stations = html.xpath('//option/@value')
|
||||
for sid in [s for s in stations if s]:
|
||||
data_packet = {
|
||||
'source': '82网站',
|
||||
'name': str(sid),
|
||||
'status': '正常',
|
||||
'value': '',
|
||||
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'raw_json': {},
|
||||
'temp_file': None
|
||||
}
|
||||
try:
|
||||
r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
|
||||
headers={'Content-Type': 'text/plain'}, timeout=10)
|
||||
try:
|
||||
data = r.json()
|
||||
except:
|
||||
data = None
|
||||
|
||||
if data:
|
||||
d_list = data.get('date', [])
|
||||
latest = str(d_list[-1]) if d_list else "N/A"
|
||||
data_packet['target_time'] = latest
|
||||
data_packet['value'] = f"Data Points: {len(d_list)}"
|
||||
data_packet['raw_json'] = data # 🔥 存完整JSON
|
||||
else:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = "返回空数据"
|
||||
|
||||
except Exception as e:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = "单个采集失败"
|
||||
|
||||
results.append(data_packet)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"82 Crawler Error: {e}")
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user