From 50275ba48230a81bfcd8f3376132e8f0d45d15b0 Mon Sep 17 00:00:00 2001 From: Nathan Woodburn Date: Thu, 27 Feb 2025 22:22:40 +1100 Subject: [PATCH] feat: Added proxy --- requirements.txt | 4 +++- server.py | 26 ++++++++++++++++++++ templates/500.html | 21 ++++++++++++++++ tools.py | 60 ++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 templates/500.html diff --git a/requirements.txt b/requirements.txt index b19f980..4e7f470 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ requests python-dotenv dnspython cryptography -datetime \ No newline at end of file +datetime +beautifulsoup4 +requests-doh \ No newline at end of file diff --git a/server.py b/server.py index 2423c94..535300c 100644 --- a/server.py +++ b/server.py @@ -24,6 +24,8 @@ dotenv.load_dotenv() app = Flask(__name__) +BLOCKED_PATHS = ["https.js"] + def find(name, path): for root, dirs, files in os.walk(path): if name in files: @@ -79,6 +81,30 @@ def wellknown(path): def index(): return render_template("index.html") +@app.route("/proxy/") +def proxy(url: str): + # Decode the URL + url = urllib.parse.unquote(url) + # Get last path segment + path = url.split("/")[-1] + if path in BLOCKED_PATHS: + return render_template("404.html"), 403 + + + + content: requests.Response = tools.proxy(url) + if not content.ok: + return render_template("500.html"), 500 + + # Get the content type + contentType = content.headers.get("Content-Type") + if "text/html" in contentType: + return tools.cleanProxyContent(content.text,url,request.host_url) + + response = make_response(content.content) + response.headers["Content-Type"] = contentType + return response + @app.route("/") def catch_all(path: str): diff --git a/templates/500.html b/templates/500.html new file mode 100644 index 0000000..6e46054 --- /dev/null +++ b/templates/500.html @@ -0,0 +1,21 @@ + + + + + + + Nathan.Woodburn/ + + + + + +
+
+

500 | Internal Server Error

+

Sorry, we can't seem to display this page. Maybe try again or your request might not be valid

+

Go back to the homepage

+
+ + + \ No newline at end of file diff --git a/tools.py b/tools.py index ce90f87..16882bf 100644 --- a/tools.py +++ b/tools.py @@ -1,4 +1,5 @@ import random +from urllib.parse import urlparse import dns.resolver import subprocess import tempfile @@ -8,10 +9,19 @@ from cryptography.hazmat.backends import default_backend import datetime from dns import resolver import requests +import re +from bs4 import BeautifulSoup +from requests_doh import DNSOverHTTPSSession, add_dns_provider +import urllib3 + resolver = dns.resolver.Resolver() resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"] resolver.port = 53 +add_dns_provider("HNSDoH", "https://hnsdoh.com/dns-query") + +# Disable warnings +urllib3.disable_warnings() def check_ssl(domain: str): @@ -179,14 +189,56 @@ def curl(url: str): url = "http://" + url try: # curl --doh-url https://hnsdoh.com/dns-query {url} --insecure - commmand = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent" - response = subprocess.run(commmand, shell=True, capture_output=True, text=True) + command = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent" + response = subprocess.run(command, shell=True, capture_output=True, text=True) if response.returncode != 0: return {"success": False, "error": response.stderr} else: return {"success": True, "result": response.stdout} - except: - return {"success": False, "error": "An error occurred"} + + except Exception as e: + return {"success": False, "error": "An error occurred", "message": str(e)} + +def proxy(url: str) -> requests.Response: + session = DNSOverHTTPSSession("HNSDoH") + r = session.get(url,verify=False) + return r + + + + +def cleanProxyContent(htmlContent: str,url:str, proxyHost: str): + # Find all instances of the url in the html + hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}" + proxyUrl = f"{proxyHost}proxy/{hostUrl}" + # htmlContent = htmlContent.replace(hostUrl,proxyUrl) + + # parse html + soup = BeautifulSoup(htmlContent, 'html.parser') + # find all resources + + + for linkType in ['link','img','script', 'a']: + links = soup.find_all(linkType) + for link in links: + for attrib in ['src','href']: + if link.has_attr(attrib): + if str(link[attrib]).startswith('/'): + link.attrs[attrib] = proxyUrl + link[attrib] + continue + if str(link[attrib]).startswith('http'): + link.attrs[attrib] = str(link[attrib]).replace(hostUrl,proxyUrl) + continue + ignored = False + for ignore in ["data:", "mailto:", "tel:", "javascript:", "blob:"]: + if str(link[attrib]).startswith(ignore): + ignored = True + break + if not ignored: + link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}" + + return soup.prettify() + # if __name__ == "__main__": # print(curl("https://dso.dprofile")) \ No newline at end of file