From f936973b8df9e15eca5cef60051ca4d4e76b6d9a Mon Sep 17 00:00:00 2001
From: Nathan Woodburn <github@nathan.woodburn.au>
Date: Fri, 13 Jun 2025 23:43:41 +1000
Subject: [PATCH] feat: Add status check speedups

---
 main.py                   |  12 +-
 server.py                 | 546 ++++++++++++++++++++++----------------
 templates/index_fast.html | 146 ++++++++++
 3 files changed, 473 insertions(+), 231 deletions(-)
 create mode 100644 templates/index_fast.html

diff --git a/main.py b/main.py
index 57ab5e2..e4751b1 100644
--- a/main.py
+++ b/main.py
@@ -1,13 +1,13 @@
 import time
 import signal
 import threading
+import concurrent.futures
 from flask import Flask
-from server import app
+from server import app, node_check_executor
 import server
 from gunicorn.app.base import BaseApplication
 import os
 import dotenv
-import concurrent.futures
 import schedule
 
 
@@ -58,6 +58,10 @@ def run_gunicorn():
 def signal_handler(sig, frame):
     print("Shutting down gracefully...", flush=True)
     stop_event.set()
+    
+    # Shutdown the node check executor
+    print("Shutting down thread pools...", flush=True)
+    node_check_executor.shutdown(wait=False)
 
 
 if __name__ == '__main__':
@@ -79,6 +83,10 @@ if __name__ == '__main__':
         finally:
             stop_event.set()
             scheduler_future.cancel()
+            
+            # Make sure to shut down node check executor
+            node_check_executor.shutdown(wait=False)
+            
             try:
                 scheduler_future.result(timeout=5)
             except concurrent.futures.CancelledError:
diff --git a/server.py b/server.py
index ef46839..5a2b7cb 100644
--- a/server.py
+++ b/server.py
@@ -38,11 +38,22 @@ import functools
 import io
 import brotli
 from io import BytesIO
+import concurrent.futures
+from threading import Lock
 
 # Set up logging BEFORE attempting imports that might fail
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
+# Set up ThreadPoolExecutor for parallel node checking
+# Use a reasonable number of workers based on CPU cores
+node_check_executor = concurrent.futures.ThreadPoolExecutor(
+    max_workers=min(32, os.cpu_count() * 4)  # Max 32 workers or 4x CPU cores
+)
+
+# Create a lock for thread safety when updating cache
+cache_lock = Lock()
+
 dotenv.load_dotenv()
 
 # Configure caching
@@ -207,7 +218,9 @@ def retry(max_attempts=3, delay_seconds=1):
             while attempts < max_attempts:
                 try:
                     return func(*args, **kwargs)
-                except (socket.timeout, socket.error, dns.exception.Timeout, requests.exceptions.RequestException) as e:
+                except (socket.timeout, socket.error, dns.exception.Timeout, 
+                        requests.exceptions.RequestException, ConnectionRefusedError,
+                        ConnectionResetError, OSError, ssl.SSLError) as e:
                     attempts += 1
                     last_error = e
                     logger.warning(f"Attempt {attempts} failed with error: {e} - retrying in {delay_seconds} seconds")
@@ -219,12 +232,13 @@ def retry(max_attempts=3, delay_seconds=1):
     return decorator
 
 
+# Optimize socket timeout settings
 @retry(max_attempts=3, delay_seconds=2)
 def check_plain_dns(ip: str) -> bool:
     resolver = dns.resolver.Resolver()
     resolver.nameservers = [ip]
-    resolver.timeout = 5  # Set a reasonable timeout
-    resolver.lifetime = 5  # Total timeout for the query
+    resolver.timeout = 3  # Reduced from 5 seconds to 3 seconds
+    resolver.lifetime = 3  # Reduced from 5 seconds to 3 seconds
 
     try:
         result = resolver.resolve("1.wdbrn", "TXT")
@@ -273,13 +287,13 @@ def check_doh(ip: str) -> dict:
         )
         wireframe_request = request.encode() + dns_query
         
-        # Create socket with timeout
-        sock = socket.create_connection((ip, 443), timeout=10)
+        # Create socket with reduced timeout
+        sock = socket.create_connection((ip, 443), timeout=5)  # Reduced from 10 to 5 seconds
         context = ssl.create_default_context()
         context.check_hostname = False  # Skip hostname verification for IP-based connection
         ssock = context.wrap_socket(sock, server_hostname="hnsdoh.com")
 
-        ssock.settimeout(10)  # Set a timeout for socket operations
+        ssock.settimeout(5)  # Reduced from 10 to 5 seconds
         ssock.sendall(wireframe_request)
         
         response_data = b""
@@ -354,7 +368,7 @@ def check_dot(ip: str) -> bool:
     q = dns.message.make_query(qname, dns.rdatatype.TXT)
     try:
         response = dns.query.tls(
-            q, ip, timeout=5, port=853, server_hostname="hnsdoh.com"
+            q, ip, timeout=3, port=853, server_hostname="hnsdoh.com"  # Reduced from 5 to 3 seconds
         )
         if response.rcode() == dns.rcode.NOERROR:
             for rrset in response.answer:
@@ -382,12 +396,12 @@ def verify_cert(ip: str, port: int) -> dict:
     ssock = None
     
     try:
-        sock = socket.create_connection((ip, port), timeout=10)
+        sock = socket.create_connection((ip, port), timeout=5)  # Reduced from 10 to 5 seconds
         # Wrap the socket in SSL/TLS
         context = ssl.create_default_context()
         context.check_hostname = False  # Skip hostname verification for IP-based connection
         ssock = context.wrap_socket(sock, server_hostname="hnsdoh.com")
-        ssock.settimeout(10)  # Set timeout for socket operations
+        ssock.settimeout(5)  # Reduced from 10 to 5 seconds
     
         # Retrieve the server's certificate
         cert = ssock.getpeercert()
@@ -469,7 +483,7 @@ def format_last_check(last_log: datetime) -> str:
 
 
 def check_nodes() -> list:
-    global nodes
+    global nodes, _node_status_cache, _node_status_cache_time
     if last_log > datetime.now() - relativedelta.relativedelta(minutes=1):
         # Load the last log
         with open(f"{log_dir}/node_status.json", "r") as file:
@@ -487,53 +501,43 @@ def check_nodes() -> list:
         if len(nodes) == 0:
             nodes = get_node_list()
             
+        # Use ThreadPoolExecutor to check nodes in parallel
+        futures = {}
         node_status = []
+        
+        # Submit all node checks to the executor
         for ip in nodes:
-            logger.info(f"Checking node {ip}")
+            futures[node_check_executor.submit(check_single_node, ip)] = ip
+        
+        # Collect results as they complete
+        for future in concurrent.futures.as_completed(futures):
+            ip = futures[future]
             try:
-                plain_dns_result = check_plain_dns(ip)
-                doh_check = check_doh(ip)
-                dot_result = check_dot(ip)
-                cert_result = verify_cert(ip, 443)
-                cert_853_result = verify_cert(ip, 853)
-                
-                node_status.append(
-                    {
-                        "ip": ip,
-                        "name": node_names[ip] if ip in node_names else ip,
-                        "location": (
-                            node_locations[ip] if ip in node_locations else "Unknown"
-                        ),
-                        "plain_dns": plain_dns_result,
-                        "doh": doh_check["status"],
-                        "doh_server": doh_check["server"],
-                        "dot": dot_result,
-                        "cert": cert_result,
-                        "cert_853": cert_853_result,
-                    }
-                )
-                logger.info(f"Node {ip} check complete")
+                node_result = future.result()
+                node_status.append(node_result)
             except Exception as e:
-                logger.error(f"Error checking node {ip}: {e}")
-                # Add a failed entry for this node to ensure it's still included
-                node_status.append(
-                    {
-                        "ip": ip,
-                        "name": node_names[ip] if ip in node_names else ip,
-                        "location": (
-                            node_locations[ip] if ip in node_locations else "Unknown"
-                        ),
-                        "plain_dns": False,
-                        "doh": False,
-                        "doh_server": [],
-                        "dot": False,
-                        "cert": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
-                        "cert_853": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
-                    }
-                )
+                logger.error(f"Error processing results for node {ip}: {e}")
+                # Ensure a failed node entry is still included
+                node_status.append({
+                    "ip": ip,
+                    "name": node_names[ip] if ip in node_names else ip,
+                    "location": (node_locations[ip] if ip in node_locations else "Unknown"),
+                    "plain_dns": False,
+                    "doh": False,
+                    "doh_server": [],
+                    "dot": False,
+                    "cert": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
+                    "cert_853": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
+                })
                 
         # Save the node status to a file
         log_status(node_status)
+        
+        # Update the in-memory cache with thread safety
+        with cache_lock:
+            _node_status_cache = node_status
+            _node_status_cache_time = datetime.now()
+            
     logger.info("Finished checking nodes")
 
     # Send notifications if any nodes are down
@@ -567,156 +571,84 @@ def check_nodes() -> list:
     return node_status
 
 
-# Optimize check_nodes_from_log function with in-memory caching
-def check_nodes_from_log() -> list:
-    global last_log, _node_status_cache, _node_status_cache_time
-    
-    # Check if we have a valid cache
-    current_time = datetime.now()
-    staleness_threshold_str = os.getenv("STALENESS_THRESHOLD_MINUTES", "15")
-    
+def check_single_node(ip):
+    """Check a single node and return its status."""
+    logger.info(f"Checking node {ip}")
     try:
-        staleness_threshold = int(staleness_threshold_str)
-    except ValueError:
-        logger.warning(f"Invalid STALENESS_THRESHOLD_MINUTES value: {staleness_threshold_str}")
-        staleness_threshold = 15
-    
-    # Use in-memory cache if it's fresh enough
-    if (_node_status_cache is not None and _node_status_cache_time is not None and 
-            current_time < _node_status_cache_time + relativedelta.relativedelta(minutes=staleness_threshold/2)):
-        logger.info(f"Using in-memory cache from {format_last_check(_node_status_cache_time)}")
-        return _node_status_cache
-    
-    # Otherwise load from disk or run a new check
-    try:
-        with open(f"{log_dir}/node_status.json", "r") as file:
-            data = json.load(file)
+        # Add timeout handling for individual checks
+        plain_dns_result = False
+        doh_result = {"status": False, "server": []}
+        dot_result = False
+        cert_result = {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"}
+        cert_853_result = {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"}
         
-        newest = {
-            "date": datetime.now() - relativedelta.relativedelta(years=1),
-            "nodes": [],
+        # Use timeout to limit time spent on each check
+        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
+            future_plain_dns = executor.submit(check_plain_dns, ip)
+            future_doh = executor.submit(check_doh, ip)
+            future_dot = executor.submit(check_dot, ip)
+            future_cert = executor.submit(verify_cert, ip, 443)
+            future_cert_853 = executor.submit(verify_cert, ip, 853)
+            
+            # Collect results with timeout
+            try:
+                plain_dns_result = future_plain_dns.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Plain DNS check timed out for {ip}: {str(e)}")
+                
+            try:
+                doh_result = future_doh.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"DoH check timed out for {ip}: {str(e)}")
+                
+            try:
+                dot_result = future_dot.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"DoT check timed out for {ip}: {str(e)}")
+                
+            try:
+                cert_result = future_cert.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Cert check timed out for {ip}: {str(e)}")
+                
+            try:
+                cert_853_result = future_cert_853.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Cert 853 check timed out for {ip}: {str(e)}")
+        
+        node_status = {
+            "ip": ip,
+            "name": node_names[ip] if ip in node_names else ip,
+            "location": (
+                node_locations[ip] if ip in node_locations else "Unknown"
+            ),
+            "plain_dns": plain_dns_result,
+            "doh": doh_result["status"],
+            "doh_server": doh_result["server"],
+            "dot": dot_result,
+            "cert": cert_result,
+            "cert_853": cert_853_result,
         }
-        
-        for entry in data:
-            if datetime.strptime(entry["date"], "%Y-%m-%d %H:%M:%S") > newest["date"]:
-                newest = entry
-                newest["date"] = datetime.strptime(newest["date"], "%Y-%m-%d %H:%M:%S")
-        
-        node_status = newest["nodes"]
-        
-        if current_time > newest["date"] + relativedelta.relativedelta(minutes=staleness_threshold):
-            logger.warning(f"Data is stale (older than {staleness_threshold} minutes), triggering immediate check")
-            node_status = check_nodes()
-        else:
-            last_log = newest["date"]
-            logger.info(f"Using cached node status from {format_last_check(last_log)}")
-        
-        # Update the in-memory cache
-        _node_status_cache = node_status
-        _node_status_cache_time = current_time
-        
-    except (FileNotFoundError, json.JSONDecodeError) as e:
-        logger.error(f"Error reading node status file: {e}")
-        logger.info("Running initial node check")
-        node_status = check_nodes()
-        
-        # Update the in-memory cache
-        _node_status_cache = node_status
-        _node_status_cache_time = current_time
-    
-    return node_status
-
-
-def send_notification(title, description, author):
-    discord_hook = os.getenv("DISCORD_HOOK")
-    if discord_hook:
-        data = {
-            "content": "",
-            "embeds": [
-                {
-                    "title": title,
-                    "description": description,
-                    "url": "https://status.hnsdoh.com",
-                    "color": 5814783,
-                    "author": {
-                        "name": author,
-                        "icon_url": "https://status.hnsdoh.com/favicon.png",
-                    },
-                }
-            ],
-            "username": "HNSDoH",
-            "avatar_url": "https://status.hnsdoh.com/favicon.png",
-            "attachments": [],
+        logger.info(f"Node {ip} check complete")
+        return node_status
+    except Exception as e:
+        logger.error(f"Error checking node {ip}: {e}")
+        # Add a failed entry for this node to ensure it's still included
+        return {
+            "ip": ip,
+            "name": node_names[ip] if ip in node_names else ip,
+            "location": (
+                node_locations[ip] if ip in node_locations else "Unknown"
+            ),
+            "plain_dns": False,
+            "doh": False,
+            "doh_server": [],
+            "dot": False,
+            "cert": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
+            "cert_853": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
         }
-        response = requests.post(discord_hook, json=data)
-        print("Sent notification", flush=True)
-    else:
-        print("No discord hook", flush=True)
 
 
-def send_down_notification(node):
-    global sent_notifications
-
-    # Check if a notification has already been sent
-    if node["ip"] not in sent_notifications:
-        sent_notifications[node["ip"]] = datetime.strftime(
-            datetime.now(), "%Y-%m-%d %H:%M:%S"
-        )
-    else:
-        last_send = datetime.strptime(
-            sent_notifications[node["ip"]], "%Y-%m-%d %H:%M:%S"
-        )
-
-        if last_send > datetime.now() - relativedelta.relativedelta(hours=1):
-            print(
-                f"Notification already sent for {node['name']} in the last hr",
-                flush=True,
-            )
-            return
-
-        # Only send certain notifications once per day
-        if node["plain_dns"] and node["doh"] and node["dot"]:
-            if last_send > datetime.now() - relativedelta.relativedelta(days=1):
-                print(
-                    f"Notification already sent for {node['name']} in the last day",
-                    flush=True,
-                )
-                return
-
-    # Save the notification to the file
-    sent_notifications[node["ip"]] = datetime.strftime(
-        datetime.now(), "%Y-%m-%d %H:%M:%S"
-    )
-    with open(f"{log_dir}/sent_notifications.json", "w") as file:
-        json.dump(sent_notifications, file, indent=4)
-
-    title = f"{node['name']} is down"
-
-    description = f"{node['name']} ({node['ip']}) is down with the following issues:\n"
-    if not node["plain_dns"]:
-        description += "- Plain DNS is down\n"
-    if not node["doh"]:
-        description += "- DoH is down\n"
-    if not node["dot"]:
-        description += "- DoT is down\n"
-    if not node["cert"]["valid"]:
-        description += "- Certificate on port 443 is invalid\n"
-    if not node["cert_853"]["valid"]:
-        description += "- Certificate on port 853 is invalid\n"
-
-    if node["plain_dns"] and node["doh"] and node["dot"]:
-        if node["cert"]["valid"] and node["cert_853"]["valid"]:
-            description = f"The certificate on {node['name']} ({node['ip']}) is expiring soon\n"
-            title = f"{node['name']} certificate is expiring soon"
-        # Also add the expiry date of the certificates
-        description += "\nCertificate expiry dates:\n"
-        description += f"- Certificate on port 443 expires {node['cert']['expires']}\n"
-        description += f"- Certificate on port 853 expires {node['cert_853']['expires']}\n"
-    send_notification(title, description, node["name"])
-
-
-# endregion
-
 # region File logs
 
 
@@ -786,9 +718,9 @@ def create_default_node_dict():
         "name": "",
         "location": "",
         "ip": "",
-        "plain_dns": {"last_down": "Never", "percentage": 0},
-        "doh": {"last_down": "Never", "percentage": 0},
-        "dot": {"last_down": "Never", "percentage": 0},
+        "plain_dns": {"last_down": "never", "percentage": 0},
+        "doh": {"last_down": "never", "percentage": 0},
+        "dot": {"last_down": "never", "percentage": 0},
     }
 
 def create_default_counts_dict():
@@ -804,9 +736,9 @@ def summarize_history(history: list) -> dict:
     nodes_status = defaultdict(create_default_node_dict)
     
     overall_status = {
-        "plain_dns": {"last_down": "Never", "percentage": 0},
-        "doh": {"last_down": "Never", "percentage": 0},
-        "dot": {"last_down": "Never", "percentage": 0},
+        "plain_dns": {"last_down": "never", "percentage": 0},
+        "doh": {"last_down": "never", "percentage": 0},
+        "dot": {"last_down": "never", "percentage": 0},
     }
 
     # Collect data
@@ -834,7 +766,7 @@ def summarize_history(history: list) -> dict:
             for key in ["plain_dns", "doh", "dot"]:
                 if node.get(key) == False:
                     # Check if the last downtime is more recent
-                    if nodes_status[ip][key]["last_down"] == "Never":
+                    if nodes_status[ip][key]["last_down"] == "never":
                         nodes_status[ip][key]["last_down"] = date.strftime("%Y-%m-%d %H:%M:%S")
                     elif date > datetime.strptime(nodes_status[ip][key]["last_down"], "%Y-%m-%d %H:%M:%S"):
                         nodes_status[ip][key]["last_down"] = date.strftime("%Y-%m-%d %H:%M:%S")
@@ -875,7 +807,7 @@ def summarize_history(history: list) -> dict:
             last_downs = [
                 nodes_status[ip][key]["last_down"]
                 for ip in nodes_status
-                if nodes_status[ip][key]["last_down"] != "Never"
+                if nodes_status[ip][key]["last_down"] != "never"
             ]
             if last_downs:
                 overall_status[key]["last_down"] = max(last_downs)
@@ -961,7 +893,7 @@ def api_index():
 
 # Cache node status for API requests
 @app.route("/api/nodes")
-@cache.cached(timeout=60)  # Cache for 1 minute
+@cache.cached(timeout=300)  # Increased from 60s to 5 minutes
 def api_nodes():
     node_status = check_nodes_from_log()
     return jsonify(node_status)
@@ -1118,6 +1050,14 @@ def api_errors():
 @app.route("/api/check/<ip>")
 @cache.cached(timeout=30)  # Cache for 30 seconds
 def api_check(ip: str):
+    # Verify IP is one of the nodes
+    global nodes
+    if not nodes:
+        return jsonify({"error": "No nodes available"}), 404
+    if ip not in nodes:
+        return jsonify({"error": f"Node {ip} not found"}), 404
+
+
     logger.info(f"Checking node {ip}")
     data = {
         "ip": ip,
@@ -1150,8 +1090,19 @@ def api_check(ip: str):
 # region Main routes
 # Cache the main page rendering
 @app.route("/")
-@cache.cached(timeout=60, query_string=True)  # Cache for 1 minute, respect query params 
+@cache.cached(timeout=120, query_string=True)  # Increased from 60s to 2 minutes
 def index():
+    # Check for fast_load parameter to provide a quicker initial page load
+    fast_load = request.args.get('fast_load', 'false').lower() == 'true'
+    
+    if fast_load:
+        # Return a minimal template that will load data via JavaScript
+        return render_template(
+            "index_fast.html",
+            api_url=request.url_root + "api"
+        )
+    
+    # Original slower but complete load
     node_status = check_nodes_from_log()
 
     alerts = []
@@ -1218,7 +1169,7 @@ def index():
     # Convert time to relative time
     for node in history_summary["nodes"]:
         for key in ["plain_dns", "doh", "dot"]:
-            if node[key]["last_down"] == "Never":
+            if node[key]["last_down"] == "never":
                 node[key]["last_down"] = "over 30 days ago"
             else:
                 node[key]["last_down"] = format_last_check(
@@ -1226,7 +1177,7 @@ def index():
                 )
     
     for key in ["plain_dns", "doh", "dot"]:
-        if history_summary["overall"][key]["last_down"] == "Never":
+        if history_summary["overall"][key]["last_down"] == "never":
             continue
         history_summary["overall"][key]["last_down"] = format_last_check(
             datetime.strptime(history_summary["overall"][key]["last_down"], "%Y-%m-%d %H:%M:%S")
@@ -1307,20 +1258,16 @@ def scheduled_node_check():
         global nodes, _node_status_cache, _node_status_cache_time
         nodes = []  # Reset node list to force refresh
         
-        # Run the check and update in-memory cache
+        # Run the check (which now uses ThreadPoolExecutor)
         node_status = check_nodes()
-        _node_status_cache = node_status
-        _node_status_cache_time = datetime.now()
-        
         # Clear relevant caches
         cache.delete_memoized(api_nodes)
         cache.delete_memoized(api_errors)
         cache.delete_memoized(index)
-        
         logger.info("Completed scheduled node check and updated caches")
     except Exception as e:
         logger.error(f"Error in scheduled node check: {e}")
-
+        
 def scheduler_listener(event):
     """Listener for scheduler events"""
     if event.exception:
@@ -1339,7 +1286,6 @@ def start_scheduler():
         check_interval = 5
 
     logger.info(f"Setting up scheduler to run every {check_interval} minutes")
-    
     # Add the job to the scheduler
     scheduler.add_job(
         scheduled_node_check,
@@ -1347,10 +1293,9 @@ def start_scheduler():
         id='node_check_job',
         replace_existing=True
     )
-    
+    logger.info(f"Setting up scheduler to run every {check_interval} minutes")
     # Add listener for job events
     scheduler.add_listener(scheduler_listener, EVENT_JOB_ERROR | EVENT_JOB_EXECUTED)
-    
     # Start the scheduler if it's not already running
     if not scheduler.running:
         scheduler.start()
@@ -1364,10 +1309,6 @@ def signal_handler(sig, frame):
         logger.info("Scheduler shut down")
     sys.exit(0)
 
-# Register the signal handlers for Docker
-signal.signal(signal.SIGINT, signal_handler)
-signal.signal(signal.SIGTERM, signal_handler)
-
 # Initialize the scheduler when the app starts without relying on @before_first_request
 # which is deprecated in newer Flask versions
 with app.app_context():
@@ -1388,32 +1329,26 @@ def add_compression(response):
             'Content-Encoding' in response.headers or
             response.direct_passthrough):
         return response
-    
     # Only compress specific MIME types
     content_type = response.headers.get('Content-Type', '')
     compressible_types = [
         'text/html', 
         'text/css', 
         'text/plain', 
-        'application/javascript', 
+        'application/javascript',
         'application/json',
         'application/xml',
         'text/xml'
     ]
-    
     if not any(t in content_type for t in compressible_types):
         return response
-    
     accept_encoding = request.headers.get('Accept-Encoding', '')
-    
     if 'br' in accept_encoding:
         try:
             # Get the response content
             response_data = response.get_data()
-            
             # Compress with Brotli
             compressed_data = brotli.compress(response_data, quality=6)
-            
             # Only apply Brotli if it results in smaller size
             if len(compressed_data) < len(response_data):
                 response.set_data(compressed_data)
@@ -1422,10 +1357,163 @@ def add_compression(response):
         except Exception as e:
             logger.warning(f"Brotli compression failed: {e}")
             # If compression fails, we just return the uncompressed response
-    
+            
     return response
 
+def check_nodes_from_log():
+    """Read the most recent node status from the log file."""
+    global _node_status_cache, _node_status_cache_time
+    
+    # Return cached result if it's less than 2 minutes old (increased from 60s)
+    with cache_lock:
+        if _node_status_cache is not None and _node_status_cache_time is not None:
+            if (datetime.now() - _node_status_cache_time).total_seconds() < 120:
+                logger.debug("Using cached node status")
+                return _node_status_cache
+    
+    try:
+        # Load the last log
+        with open(f"{log_dir}/node_status.json", "r") as file:
+            data = json.load(file)
+        
+        newest = {
+            "date": datetime.now() - relativedelta.relativedelta(years=1),
+            "nodes": [],
+        }
+        
+        for entry in data:
+            entry_date = datetime.strptime(entry["date"], "%Y-%m-%d %H:%M:%S")
+            if entry_date > newest["date"]:
+                newest = entry
+                newest["date"] = entry_date
+        
+        # Update the cache
+        with cache_lock:
+            _node_status_cache = newest["nodes"]
+            _node_status_cache_time = datetime.now()
+        
+        return newest["nodes"]
+    except Exception as e:
+        logger.error(f"Error reading node status from log: {e}")
+        # If we can't read from the log, run a fresh check
+        return check_nodes()
+
+# Add a lightweight status function for quick status checks
+@app.route("/api/quick-status")
+@cache.cached(timeout=30)  # Cache for 30 seconds
+def quick_status():
+    """Return a minimal status without expensive node checks"""
+    try:
+        # Load the last log
+        with open(f"{log_dir}/node_status.json", "r") as file:
+            data = json.load(file)
+        
+        if not data:
+            return jsonify({"status": "unknown", "last_check": "never"})
+        
+        newest_entry = max(data, key=lambda x: datetime.strptime(x["date"], "%Y-%m-%d %H:%M:%S"))
+        last_check_time = format_last_check(datetime.strptime(newest_entry["date"], "%Y-%m-%d %H:%M:%S"))
+        
+        # Count nodes with issues
+        node_status = newest_entry["nodes"]
+        total_nodes = len(node_status)
+        nodes_with_issues = 0
+        
+        for node in node_status:
+            if (not node["plain_dns"] or not node["doh"] or not node["dot"] or 
+                not node["cert"]["valid"] or not node["cert_853"]["valid"]):
+                nodes_with_issues += 1
+        
+        return jsonify({
+            "status": "ok" if nodes_with_issues == 0 else "issues",
+            "last_check": last_check_time,
+            "total_nodes": total_nodes,
+            "nodes_with_issues": nodes_with_issues
+        })
+    except Exception as e:
+        logger.error(f"Error getting quick status: {e}")
+        return jsonify({"status": "error", "message": str(e)})
+
+# Optimize check_single_node with shorter timeouts
+def check_single_node(ip):
+    """Check a single node and return its status."""
+    logger.info(f"Checking node {ip}")
+    try:
+        # Add timeout handling for individual checks
+        plain_dns_result = False
+        doh_result = {"status": False, "server": []}
+        dot_result = False
+        cert_result = {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"}
+        cert_853_result = {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"}
+        
+        # Use timeout to limit time spent on each check
+        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
+            future_plain_dns = executor.submit(check_plain_dns, ip)
+            future_doh = executor.submit(check_doh, ip)
+            future_dot = executor.submit(check_dot, ip)
+            future_cert = executor.submit(verify_cert, ip, 443)
+            future_cert_853 = executor.submit(verify_cert, ip, 853)
+            
+            # Collect results with timeout
+            try:
+                plain_dns_result = future_plain_dns.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Plain DNS check timed out for {ip}: {str(e)}")
+                
+            try:
+                doh_result = future_doh.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"DoH check timed out for {ip}: {str(e)}")
+                
+            try:
+                dot_result = future_dot.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"DoT check timed out for {ip}: {str(e)}")
+                
+            try:
+                cert_result = future_cert.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Cert check timed out for {ip}: {str(e)}")
+                
+            try:
+                cert_853_result = future_cert_853.result(timeout=5)
+            except (concurrent.futures.TimeoutError, Exception) as e:
+                logger.warning(f"Cert 853 check timed out for {ip}: {str(e)}")
+        
+        node_status = {
+            "ip": ip,
+            "name": node_names[ip] if ip in node_names else ip,
+            "location": (
+                node_locations[ip] if ip in node_locations else "Unknown"
+            ),
+            "plain_dns": plain_dns_result,
+            "doh": doh_result["status"],
+            "doh_server": doh_result["server"],
+            "dot": dot_result,
+            "cert": cert_result,
+            "cert_853": cert_853_result,
+        }
+        logger.info(f"Node {ip} check complete")
+        return node_status
+    except Exception as e:
+        logger.error(f"Error checking node {ip}: {e}")
+        # Add a failed entry for this node to ensure it's still included
+        return {
+            "ip": ip,
+            "name": node_names[ip] if ip in node_names else ip,
+            "location": (
+                node_locations[ip] if ip in node_locations else "Unknown"
+            ),
+            "plain_dns": False,
+            "doh": False,
+            "doh_server": [],
+            "dot": False,
+            "cert": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
+            "cert_853": {"valid": False, "expires": "ERROR", "expiry_date": "ERROR"},
+        }
+
+# Run the app with threading enabled
 if __name__ == "__main__":
     # The scheduler is already started in the app context above
-    # Run the Flask app
-    app.run(debug=True, port=5000, host="0.0.0.0")
+    # Run the Flask app with threading for better concurrency
+    app.run(debug=True, port=5000, host="0.0.0.0", threaded=True)
diff --git a/templates/index_fast.html b/templates/index_fast.html
new file mode 100644
index 0000000..563bb95
--- /dev/null
+++ b/templates/index_fast.html
@@ -0,0 +1,146 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>HNSDoH Status</title>
+    <link rel="stylesheet" href="/assets/css/style.css">
+    <meta name="description" content="HNSDoH Status page - Monitoring the status of HNSDoH resolvers">
+    <link rel="manifest" href="/manifest.json">
+    <link rel="icon" type="image/png" href="/favicon.png">
+    <style>
+        .loader {
+            border: 5px solid #f3f3f3;
+            border-radius: 50%;
+            border-top: 5px solid #3498db;
+            width: 40px;
+            height: 40px;
+            margin: 20px auto;
+            animation: spin 1.5s linear infinite;
+        }
+        
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        
+        .quick-status {
+            text-align: center;
+            padding: 20px;
+            margin: 20px;
+            border-radius: 5px;
+            background-color: #f5f5f5;
+        }
+        
+        .status-ok {
+            color: green;
+        }
+        
+        .status-issues {
+            color: orange;
+        }
+        
+        .status-error {
+            color: red;
+        }
+    </style>
+</head>
+<body>
+    <header>
+        <h1>HNSDoH Status</h1>
+        <p>Monitoring the status of HNSDoH resolvers</p>
+    </header>
+    
+    <div class="quick-status">
+        <h2>Current Status</h2>
+        <div id="quick-status-display">Loading...</div>
+        <div class="loader" id="status-loader"></div>
+    </div>
+    
+    <main>
+        <div id="content">
+            <div class="loader"></div>
+            <p>Loading full status data...</p>
+            <p>This may take a few moments as we check all resolver nodes.</p>
+        </div>
+    </main>
+    
+    <footer>
+        <p>Made by <a href="https://nathan.woodburn.au">Nathan.Woodburn/</a></p>
+    </footer>
+    
+    <script>
+        // Load quick status first
+        fetch('/api/quick-status')
+            .then(response => response.json())
+            .then(data => {
+                const statusDisplay = document.getElementById('quick-status-display');
+                const statusLoader = document.getElementById('status-loader');
+                
+                let statusClass = 'status-ok';
+                let statusMessage = 'All systems operational';
+                
+                if (data.status === 'issues') {
+                    statusClass = 'status-issues';
+                    statusMessage = `${data.nodes_with_issues} out of ${data.total_nodes} nodes have issues`;
+                } else if (data.status === 'error' || data.status === 'unknown') {
+                    statusClass = 'status-error';
+                    statusMessage = 'Unable to determine system status';
+                }
+                
+                statusDisplay.innerHTML = `
+                    <h3 class="${statusClass}">${statusMessage}</h3>
+                    <p>Last check: ${data.last_check}</p>
+                `;
+                
+                statusLoader.style.display = 'none';
+            })
+            .catch(error => {
+                document.getElementById('quick-status-display').innerHTML = `
+                    <h3 class="status-error">Error loading status</h3>
+                    <p>${error}</p>
+                `;
+                document.getElementById('status-loader').style.display = 'none';
+            });
+        
+        // Then load full page data
+        fetch('/api/nodes')
+            .then(response => response.json())
+            .then(nodeData => {
+                // Once we have node data, get history data
+                return Promise.all([
+                    Promise.resolve(nodeData),
+                    fetch('/api/history').then(res => res.json())
+                ]);
+            })
+            .then(([nodeData, historyData]) => {
+                // Now we have both datasets, fetch the HTML with them
+                return fetch('/?' + new URLSearchParams({
+                    _data_loaded: 'true'  // Signal to the server we already have data
+                }));
+            })
+            .then(response => response.text())
+            .then(html => {
+                document.getElementById('content').innerHTML = html;
+                
+                // Replace direct links with JS-enhanced versions
+                document.querySelectorAll('a').forEach(link => {
+                    const href = link.getAttribute('href');
+                    if (href && href.startsWith('/') && !href.includes('fast_load')) {
+                        link.setAttribute('href', href + (href.includes('?') ? '&' : '?') + 'fast_load=true');
+                    }
+                });
+            })
+            .catch(error => {
+                document.getElementById('content').innerHTML = `
+                    <div class="error">
+                        <h2>Error Loading Data</h2>
+                        <p>There was a problem loading the full status data. Please try refreshing the page.</p>
+                        <p>Error details: ${error}</p>
+                        <a href="/" class="button">Refresh Page</a>
+                    </div>
+                `;
+            });
+    </script>
+</body>
+</html>