From 50275ba48230a81bfcd8f3376132e8f0d45d15b0 Mon Sep 17 00:00:00 2001
From: Nathan Woodburn <github@nathan.woodburn.au>
Date: Thu, 27 Feb 2025 22:22:40 +1100
Subject: [PATCH] feat: Added proxy

---
 requirements.txt   |  4 +++-
 server.py          | 26 ++++++++++++++++++++
 templates/500.html | 21 ++++++++++++++++
 tools.py           | 60 ++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 106 insertions(+), 5 deletions(-)
 create mode 100644 templates/500.html
diff --git a/requirements.txt b/requirements.txt
index b19f980..4e7f470 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,6 @@ requests
 python-dotenv
 dnspython
 cryptography
-datetime
\ No newline at end of file
+datetime
+beautifulsoup4
+requests-doh
\ No newline at end of file
diff --git a/server.py b/server.py
index 2423c94..535300c 100644
--- a/server.py
+++ b/server.py
@@ -24,6 +24,8 @@ dotenv.load_dotenv()
 app = Flask(__name__)
 
 
+BLOCKED_PATHS = ["https.js"]
+
 def find(name, path):
     for root, dirs, files in os.walk(path):
         if name in files:
@@ -79,6 +81,30 @@ def wellknown(path):
 def index():
     return render_template("index.html")
 
+@app.route("/proxy/<path:url>")
+def proxy(url: str):
+    # Decode the URL
+    url = urllib.parse.unquote(url)
+    # Get last path segment
+    path = url.split("/")[-1]
+    if path in BLOCKED_PATHS:
+        return render_template("404.html"), 403
+
+
+
+    content: requests.Response = tools.proxy(url)
+    if not content.ok:
+        return render_template("500.html"), 500   
+    
+    # Get the content type
+    contentType = content.headers.get("Content-Type")
+    if "text/html" in contentType:
+        return tools.cleanProxyContent(content.text,url,request.host_url)
+
+    response = make_response(content.content)
+    response.headers["Content-Type"] = contentType
+    return response
+
 
 @app.route("/<path:path>")
 def catch_all(path: str):
diff --git a/templates/500.html b/templates/500.html
new file mode 100644
index 0000000..6e46054
--- /dev/null
+++ b/templates/500.html
@@ -0,0 +1,21 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Nathan.Woodburn/</title>
+    <link rel="icon" href="/assets/img/favicon.png" type="image/png">
+    <link rel="stylesheet" href="/assets/css/404.css">
+</head>
+
+<body>
+    <div class="spacer"></div>
+    <div class="centre">
+        <h1>500 | Internal Server Error</h1>
+        <p>Sorry, we can't seem to display this page. Maybe try again or your request might not be valid</p>
+        <p><a href="/">Go back to the homepage</a></p>
+    </div>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/tools.py b/tools.py
index ce90f87..16882bf 100644
--- a/tools.py
+++ b/tools.py
@@ -1,4 +1,5 @@
 import random
+from urllib.parse import urlparse
 import dns.resolver
 import subprocess
 import tempfile
@@ -8,10 +9,19 @@ from cryptography.hazmat.backends import default_backend
 import datetime
 from dns import resolver
 import requests
+import re
+from bs4 import BeautifulSoup
+from requests_doh import DNSOverHTTPSSession, add_dns_provider
+import urllib3
+
 
 resolver = dns.resolver.Resolver()
 resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"]
 resolver.port = 53
+add_dns_provider("HNSDoH", "https://hnsdoh.com/dns-query")
+
+# Disable warnings
+urllib3.disable_warnings()
 
 
 def check_ssl(domain: str):    
@@ -179,14 +189,56 @@ def curl(url: str):
         url = "http://" + url
     try:
         # curl --doh-url https://hnsdoh.com/dns-query {url} --insecure
-        commmand = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent"
-        response = subprocess.run(commmand, shell=True, capture_output=True, text=True)
+        command = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent"
+        response = subprocess.run(command, shell=True, capture_output=True, text=True)
         if response.returncode != 0:
             return {"success": False, "error": response.stderr}
         else:
             return {"success": True, "result": response.stdout}
-    except:
-        return {"success": False, "error": "An error occurred"}
+
+    except Exception as e:
+        return {"success": False, "error": "An error occurred", "message": str(e)}
+
+def proxy(url: str) -> requests.Response:
+    session = DNSOverHTTPSSession("HNSDoH")
+    r = session.get(url,verify=False)
+    return r
+
+
+
+
+def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
+    # Find all instances of the url in the html
+    hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
+    proxyUrl = f"{proxyHost}proxy/{hostUrl}"
+    # htmlContent = htmlContent.replace(hostUrl,proxyUrl)
+
+    # parse html
+    soup = BeautifulSoup(htmlContent, 'html.parser')
+    # find all resources
+
+
+    for linkType in ['link','img','script', 'a']:
+        links = soup.find_all(linkType)
+        for link in links:
+            for attrib in ['src','href']:
+                if link.has_attr(attrib):
+                    if str(link[attrib]).startswith('/'):
+                        link.attrs[attrib] = proxyUrl + link[attrib]
+                        continue
+                    if str(link[attrib]).startswith('http'):
+                        link.attrs[attrib] = str(link[attrib]).replace(hostUrl,proxyUrl)
+                        continue
+                    ignored = False
+                    for ignore in ["data:", "mailto:", "tel:", "javascript:", "blob:"]:
+                        if str(link[attrib]).startswith(ignore):
+                            ignored = True
+                            break
+                    if not ignored:
+                        link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}"
+            
     
+    return soup.prettify()
+
 # if __name__ == "__main__":
 #     print(curl("https://dso.dprofile"))
\ No newline at end of file