# -*- coding: utf-8 -*- import atexit import json import os import socket import subprocess import sys import threading import time from pathlib import Path from typing import Optional, Union, Dict, List from Utils.LogManager import LogManager class FlaskSubprocessManager: """ 超稳定版 Flask 子进程守护 - 单线程 watchdog(唯一监控点) - 强制端口检测 - 端口不通 / 子进程退出 → 100% 重启 - 完整支持 exe + Python 模式 - 自动恢复设备列表快照 """ _instance = None _lock = threading.RLock() def __new__(cls): with cls._lock: if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._initialize() return cls._instance # ========================= 初始化 ========================= def _initialize(self): self.process: Optional[subprocess.Popen] = None self.comm_port = 34566 self._watchdog_running = False self._stop_event = threading.Event() self._restart_cooldown = 5 # 每次重启最少间隔 self._restart_fail_threshold = 3 # 端口检查连续失败几次才重启 self._restart_fail_count = 0 self._restart_window = 600 # 10 分钟 self._restart_limit = 5 # 最多次数 self._restart_record: List[float] = [] if os.name == "nt": si = subprocess.STARTUPINFO() si.dwFlags |= subprocess.STARTF_USESHOWWINDOW si.wShowWindow = 0 self._si = si else: self._si = None atexit.register(self.stop) self._kill_orphans() LogManager.info("FlaskSubprocessManager 初始化完成", udid="flask") # ========================= 工具 ========================= def _log(self, level, msg): print(msg) if level == "info": LogManager.info(msg, udid="flask") elif level == "warn": LogManager.warning(msg, udid="flask") else: LogManager.error(msg, udid="flask") # 杀死残留 python.exe 占用端口 def _kill_orphans(self): try: if os.name == "nt": out = subprocess.check_output(["netstat", "-ano"], text=True) for line in out.splitlines(): if f"127.0.0.1:{self.comm_port}" in line and "LISTENING" in line: pid = int(line.strip().split()[-1]) if pid != os.getpid(): subprocess.run( ["taskkill", "/F", "/PID", str(pid)], capture_output=True ) self._log("warn", f"[FlaskMgr] 杀死残留 Flask 实例 PID={pid}") except Exception: pass def _port_alive(self): """检测 Flask 与 Quart 的两个端口是否活着""" def _check(p): try: with socket.create_connection(("127.0.0.1", p), timeout=0.4): return True except Exception: return False return _check(self.comm_port) or _check(self.comm_port + 1) # ========================= 启动 ========================= # ========================= 启动 ========================= def start(self): with self._lock: # 已经有一个在跑了就别重复起 if self.process and self.process.poll() is None: self._log("warn", "[FlaskMgr] Flask 已在运行,跳过") return # 设定环境变量,给子进程用 env = os.environ.copy() env["FLASK_COMM_PORT"] = str(self.comm_port) # ✅ 正确判断是否是 Nuitka/打包后的 exe # - 被 Nuitka 打包:sys.frozen 会存在/为 True # - 直接用 python 跑 .py:sys.frozen 不存在 is_frozen = bool(getattr(sys, "frozen", False)) if is_frozen: # 打包后的 exe 模式:直接调用自己 exe = Path(sys.executable).resolve() cmd = [str(exe), "--role=flask"] cwd = str(exe.parent) else: # 开发模式:用 python 去跑 Module/Main.py --role=flask project_root = Path(__file__).resolve().parents[1] main_py = project_root / "Module" / "Main.py" cmd = [sys.executable, "-u", str(main_py), "--role=flask"] cwd = str(project_root) self._log("info", f"[FlaskMgr] 启动 Flask: {cmd}") self.process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env, cwd=cwd, bufsize=1, startupinfo=self._si, start_new_session=True, ) # 异步吃子进程 stdout,顺便打日志 threading.Thread(target=self._read_stdout, daemon=True).start() # 看门狗只需要起一次 if not self._watchdog_running: threading.Thread(target=self._watchdog_loop, daemon=True).start() self._watchdog_running = True self._log("info", f"[FlaskMgr] Flask 子进程已启动 PID={self.process.pid}") def _read_stdout(self): if not self.process or not self.process.stdout: return for line in iter(self.process.stdout.readline, ""): if line: self._log("info", f"[Flask] {line.rstrip()}") # ========================= 停止 ========================= def stop(self): with self._lock: if not self.process: return try: self.process.terminate() except Exception: pass try: self.process.wait(timeout=3) except Exception: pass if self.process.poll() is None: try: self.process.kill() except Exception: pass self._log("warn", "[FlaskMgr] 已停止 Flask 子进程") self.process = None # ========================= 看门狗 ========================= def _watchdog_loop(self): self._log("info", "[FlaskWD] 看门狗已启动") while not self._stop_event.is_set(): time.sleep(1.2) # 1) 子进程退出 if not self.process or self.process.poll() is not None: self._log("error", "[FlaskWD] Flask 子进程退出,准备重启") self._restart() continue # 2) 端口不通 if not self._port_alive(): self._restart_fail_count += 1 self._log("warn", f"[FlaskWD] 端口检测失败 {self._restart_fail_count}/" f"{self._restart_fail_threshold}") if self._restart_fail_count >= self._restart_fail_threshold: self._restart() continue # 3) 端口正常 self._restart_fail_count = 0 # ========================= 重启核心逻辑 ========================= def _restart(self): now = time.time() # 10 分钟限频 self._restart_record = [t for t in self._restart_record if now - t < self._restart_window] if len(self._restart_record) >= self._restart_limit: self._log("error", "[FlaskWD] 10 分钟内重启次数太多,暂停监控") return # 冷却 if self._restart_record and now - self._restart_record[-1] < self._restart_cooldown: self._log("warn", "[FlaskWD] 冷却中,暂不重启") return self._log("warn", "[FlaskWD] >>> 重启 Flask 子进程 <<<") # 执行重启 try: self.stop() time.sleep(1) self.start() self._restart_record.append(now) self._restart_fail_count = 0 except Exception as e: self._log("error", f"[FlaskWD] 重启失败: {e}") # 重启后推送设备快照 self._push_snapshot() # ========================= 推送设备快照 ========================= def _push_snapshot(self): """Flask 重启后重新同步 DeviceInfo 内容""" try: from Module.DeviceInfo import DeviceInfo info = DeviceInfo() with info._lock: for m in info._models.values(): self.send(m.toDict()) except Exception: pass # ========================= 发送数据 ========================= def send(self, data: Union[str, Dict]): if isinstance(data, dict): data = json.dumps(data, ensure_ascii=False) try: with socket.create_connection(("127.0.0.1", self.comm_port), timeout=2) as s: s.sendall((data + "\n").encode()) return True except Exception: return False @classmethod def get_instance(cls): return cls()