From 7f0a13f44b34d64c461a094e562e3b6f7aa5cfdf Mon Sep 17 00:00:00 2001 From: Nathan Woodburn <github@nathan.woodburn.au> Date: Thu, 27 Feb 2025 23:35:23 +1100 Subject: [PATCH] feat: Add better cleaning for dprofile --- server.py | 11 +++++++++- tools.py | 62 +++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/server.py b/server.py index 535300c..afc9654 100644 --- a/server.py +++ b/server.py @@ -94,12 +94,21 @@ def proxy(url: str): content: requests.Response = tools.proxy(url) if not content.ok: + print(content.text) return render_template("500.html"), 500 # Get the content type contentType = content.headers.get("Content-Type") if "text/html" in contentType: - return tools.cleanProxyContent(content.text,url,request.host_url) + response = make_response(tools.cleanProxyContent(content.text,url,request.host_url)) + response.headers["Content-Type"] = contentType + return response + + # Clean JS + if "text/javascript" in contentType or 'application/javascript' in contentType: + response = make_response(tools.proxyCleanJS(content.text,url,request.host_url)) + response.headers["Content-Type"] = contentType + return response response = make_response(content.content) response.headers["Content-Type"] = contentType diff --git a/tools.py b/tools.py index 1e97fe3..1541f98 100644 --- a/tools.py +++ b/tools.py @@ -13,7 +13,7 @@ import re from bs4 import BeautifulSoup from requests_doh import DNSOverHTTPSSession, add_dns_provider import urllib3 - +import socket resolver = dns.resolver.Resolver() resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"] @@ -199,21 +199,28 @@ def curl(url: str): except Exception as e: return {"success": False, "error": "An error occurred", "message": str(e)} + +class ProxyError(Exception): + def __init__(self, message): + self.message = message + self.text = message + self.ok = False + self.status_code = 500 + super().__init__(self.message) + + def proxy(url: str) -> requests.Response: - session = DNSOverHTTPSSession("HNSDoH") - r = session.get(url,verify=False) - return r - - - + try: + session = DNSOverHTTPSSession("HNSDoH") + r = session.get(url,verify=False,timeout=30) + return r + except Exception as e: + return ProxyError(str(e)) def cleanProxyContent(htmlContent: str,url:str, proxyHost: str): # Set proxy host to https if not 127.0.0.1 or localhost if ":5000" not in proxyHost: proxyHost = proxyHost.replace("http","https") - - - # Find all instances of the url in the html hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}" @@ -243,9 +250,42 @@ def cleanProxyContent(htmlContent: str,url:str, proxyHost: str): break if not ignored: link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}" - + + scripts = soup.find_all('script') + for script in scripts: + if len(script.text) > 0: + script.text = proxyCleanJS(script.text,url,proxyHost) return soup.prettify() +def proxyCleanJS(jsContent: str, url: str, proxyHost: str): + # Set proxy host to https if not 127.0.0.1 or localhost + if ":5000" not in proxyHost: + proxyHost = proxyHost.replace("http","https") + + hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}" + proxyUrl = f"{proxyHost}proxy/{hostUrl}" + + if "dprofile" in url: + jsContent = jsContent.replace("window.location.hostname", f"\"{urlparse(url).netloc}\"") + jsContent = jsContent.replace("src=\"img", f"src=\"{proxyUrl}/img") + + return jsContent + + + # Replace all instances of the url with the proxy url + hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}" + proxyUrl = f"{proxyHost}proxy/{hostUrl}" + + jsContent = jsContent.replace(hostUrl,proxyUrl) + # Common ways to get current url + for locator in ["window.location.href","window.location","location.href","location"]: + jsContent = jsContent.replace(locator,proxyUrl) + + + return jsContent + + + # if __name__ == "__main__": # print(curl("https://dso.dprofile")) \ No newline at end of file