From 003e14343bbb01ef1cf72796f66f463e97dfd089 Mon Sep 17 00:00:00 2001 From: Nathan Woodburn <github@nathan.woodburn.au> Date: Fri, 28 Feb 2025 13:34:01 +1100 Subject: [PATCH] fix: Add better js parsing --- server.py | 8 ++++---- tools.py | 22 ++++++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/server.py b/server.py index afc9654..90582c8 100644 --- a/server.py +++ b/server.py @@ -93,7 +93,7 @@ def proxy(url: str): content: requests.Response = tools.proxy(url) - if not content.ok: + if not content.status_code < 500: print(content.text) return render_template("500.html"), 500 @@ -102,17 +102,17 @@ def proxy(url: str): if "text/html" in contentType: response = make_response(tools.cleanProxyContent(content.text,url,request.host_url)) response.headers["Content-Type"] = contentType - return response + return response, content.status_code # Clean JS if "text/javascript" in contentType or 'application/javascript' in contentType: response = make_response(tools.proxyCleanJS(content.text,url,request.host_url)) response.headers["Content-Type"] = contentType - return response + return response, content.status_code response = make_response(content.content) response.headers["Content-Type"] = contentType - return response + return response, content.status_code @app.route("/<path:path>") diff --git a/tools.py b/tools.py index edfa370..ad0eb5a 100644 --- a/tools.py +++ b/tools.py @@ -190,7 +190,7 @@ def curl(url: str): try: # curl --doh-url https://hnsdoh.com/dns-query {url} --insecure command = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent" - response = subprocess.run(command, shell=True, capture_output=True, text=True) + response = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=10) if response.returncode != 0: return {"success": False, "error": response.stderr} else: @@ -212,10 +212,6 @@ class ProxyError(Exception): def proxy(url: str) -> requests.Response: try: session = requests_doh.DNSOverHTTPSSession("HNSDoH") - ip = socket.gethostbyname("hnsdoh.com") - print(f"Resolved IP: {ip}") - ip = requests_doh.resolve_dns("dso.dprofile") - print(f"Resolved IP: {ip}") r = session.get(url,verify=False,timeout=30) return r except Exception as e: @@ -253,12 +249,22 @@ def cleanProxyContent(htmlContent: str,url:str, proxyHost: str): ignored = True break if not ignored: - link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}" + # link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}" + # Add path also + link.attrs[attrib] = f"{proxyUrl}/{urlparse(link[attrib]).path}/{link[attrib]}" scripts = soup.find_all('script') for script in scripts: - if len(script.text) > 0: - script.text = proxyCleanJS(script.text,url,proxyHost) + if script.has_attr("text"): + script.attrs["text"] = proxyCleanJS(script.text,url,proxyHost) + continue + if not script.has_attr("contents"): + continue + if len(script.contents) > 0: + newScript = soup.new_tag("script") + for content in script.contents: + newScript.append(proxyCleanJS(content,url,proxyHost)) + script.replace_with(newScript) return soup.prettify()