From 68c6d8dfa4a95ed2fb7841d11e192f6543710770 Mon Sep 17 00:00:00 2001 From: Nathan Woodburn Date: Fri, 23 May 2025 10:45:15 +1000 Subject: [PATCH] feat: Add proxy pac generator --- .env.example | 11 +++++ README.md | 44 ++++++++++++++++- server.py | 109 ++++++++++++++++++++++++++++++++++++++++++- templates/index.html | 2 + 4 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e0299f2 --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +# Server configuration +WORKERS=1 +THREADS=2 + +# Proxy PAC configuration +# The proxy server address (hostname:port) +PROXY_ADDRESS=127.0.0.1:8080 + +# Additional TLDs to skip beyond the IANA list +# The server automatically skips all TLDs from IANA plus some special ones +ADDITIONAL_SKIP_TLDS=lan diff --git a/README.md b/README.md index 2f77bf4..7eb500e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,45 @@ # python-webserver-template -Python3 website template including git actions \ No newline at end of file +Python3 website template including git actions + +# Proxy PAC Server + +A Python-based web server for hosting a Proxy Auto-Configuration (PAC) file. The server is built with Flask and can be deployed using Docker. + +## Features + +- Serves a dynamic proxy.pac file +- Automatically skips all IANA registered TLDs (from https://data.iana.org/TLD/tlds-alpha-by-domain.txt) +- Configurable proxy address and additional TLDs to bypass +- Simple web interface + +## Configuration + +Environment variables can be used to configure the server: + +- `WORKERS`: Number of Gunicorn workers (default: 1) +- `THREADS`: Number of threads per worker (default: 2) +- `PROXY_ADDRESS`: The proxy server address (default: 127.0.0.1:8080) +- `ADDITIONAL_SKIP_TLDS`: Comma-separated list of additional TLDs to bypass the proxy + +## Usage + +1. Clone the repository +2. Copy `.env.example` to `.env` and modify as needed +3. Run the server with Docker or Python + +### Docker + +```bash +docker build -t proxy-pac-server . +docker run -p 5000:5000 --env-file .env proxy-pac-server +``` + +### Python + +```bash +pip install -r requirements.txt +python main.py +``` + +The PAC file will be available at `http://your-server:5000/proxy.pac` \ No newline at end of file diff --git a/server.py b/server.py index 240a1a8..f8081cf 100644 --- a/server.py +++ b/server.py @@ -13,13 +13,101 @@ from flask import ( import os import json import requests -from datetime import datetime +from datetime import datetime, timedelta import dotenv dotenv.load_dotenv() app = Flask(__name__) +# Proxy PAC configuration +PROXY_ADDRESS = os.getenv('PROXY_ADDRESS', '127.0.0.1:9590') +# URL for IANA TLD list +IANA_TLD_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" +# Default TLDs to always skip (in addition to IANA list) +DEFAULT_SKIP_TLDS = [ + 'localhost', 'test', 'invalid', 'example', 'local', + 'arpa', 'onion', 'internal', 'private', 'home' +] +# Additional TLDs to skip specified in environment +ADDITIONAL_SKIP_TLDS = os.getenv('ADDITIONAL_SKIP_TLDS', '').split(',') +if ADDITIONAL_SKIP_TLDS == ['']: + ADDITIONAL_SKIP_TLDS = [] + +# IANA TLD list cache +tld_list_cache = { + 'tlds': [], + 'last_updated': None, + 'initialized': False +} + +def get_iana_tlds(): + """Download and parse the IANA TLD list.""" + now = datetime.now() + + # Check if we need to refresh the cache (daily) + if (tld_list_cache['last_updated'] is None or + now - tld_list_cache['last_updated'] > timedelta(days=1)): + try: + response = requests.get(IANA_TLD_URL) + if response.status_code == 200: + # Parse the TLD list (skip the header line, convert to lowercase) + tlds = [line.strip().lower() for line in response.text.splitlines() + if line.strip() and not line.startswith('#')] + tld_list_cache['tlds'] = tlds + tld_list_cache['last_updated'] = now + tld_list_cache['initialized'] = True + print(f"Downloaded {len(tlds)} TLDs from IANA") + else: + print(f"Failed to download IANA TLD list: {response.status_code}") + # If we failed but have a previous cache, keep using it + if not tld_list_cache['tlds']: + # Otherwise use an empty list + tld_list_cache['tlds'] = [] + except Exception as e: + print(f"Error downloading IANA TLD list: {e}") + # If exception occurs but we have a previous cache, keep using it + if not tld_list_cache['tlds']: + # Otherwise use an empty list + tld_list_cache['tlds'] = [] + + # Combine IANA TLDs with our default skip TLDs and additional skip TLDs + all_tlds = list(set(tld_list_cache['tlds'] + DEFAULT_SKIP_TLDS + ADDITIONAL_SKIP_TLDS)) + return all_tlds + +def generate_pac_script(proxy_addr, skip_tlds): + """Generate a Proxy Auto-Configuration script.""" + skipped_tlds = "', '".join(skip_tlds) + return f""" +function FindProxyForURL(url, host) {{ + var skipped = ['{skipped_tlds}']; + + // skip any TLD in the list + var tld = host; + var lastDot = tld.lastIndexOf('.'); + if (lastDot != -1) {{ + tld = tld.substr(lastDot+1); + }} + tld = tld.toLowerCase(); + + if (skipped.includes(tld)) {{ + return 'DIRECT'; + }} + + // skip IP addresses + var isIpV4Addr = /^(\\d+.)(\\d+.)(\\d+.)(\\d+)$/; + if (isIpV4Addr.test(host)) {{ + return "DIRECT"; + }} + + // loosely check if IPv6 + if (lastDot == -1 && host.split(':').length > 2) {{ + return "DIRECT"; + }} + + return "PROXY {proxy_addr}"; +}} +""" def find(name, path): for root, dirs, files in os.walk(path): @@ -59,6 +147,16 @@ def faviconPNG(): return send_from_directory("templates/assets/img", "favicon.png") +@app.route("/proxy.pac") +def proxy_pac(): + """Serve the Proxy Auto-Configuration file.""" + proxy_addr = PROXY_ADDRESS + skip_tlds = get_iana_tlds() + + response = make_response(generate_pac_script(proxy_addr, skip_tlds)) + response.headers["Content-Type"] = "application/x-ns-proxy-autoconfig" + return response + @app.route("/.well-known/") def wellknown(path): # Try to proxy to https://nathan.woodburn.au/.well-known/ @@ -110,5 +208,14 @@ def not_found(e): # endregion + +# Replace before_first_request with a startup initialization +# This runs once at server startup +with app.app_context(): + get_iana_tlds() + if __name__ == "__main__": + # Initialize TLD list at startup if running directly + if not tld_list_cache['initialized']: + get_iana_tlds() app.run(debug=True, port=5000, host="0.0.0.0") diff --git a/templates/index.html b/templates/index.html index bb349f8..d1deee5 100644 --- a/templates/index.html +++ b/templates/index.html @@ -13,6 +13,8 @@

Nathan.Woodburn/

+

Proxy Auto-Configuration Server

+

Download proxy.pac