fix: Add better js parsing

This commit is contained in:
Nathan Woodburn 2025-02-28 13:34:01 +11:00
parent edbfa66249
commit 003e14343b
Signed by: nathanwoodburn
GPG Key ID: 203B000478AD0EF1
2 changed files with 18 additions and 12 deletions

View File

@ -93,7 +93,7 @@ def proxy(url: str):
content: requests.Response = tools.proxy(url)
if not content.ok:
if not content.status_code < 500:
print(content.text)
return render_template("500.html"), 500
@ -102,17 +102,17 @@ def proxy(url: str):
if "text/html" in contentType:
response = make_response(tools.cleanProxyContent(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
return response, content.status_code
# Clean JS
if "text/javascript" in contentType or 'application/javascript' in contentType:
response = make_response(tools.proxyCleanJS(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
return response, content.status_code
response = make_response(content.content)
response.headers["Content-Type"] = contentType
return response
return response, content.status_code
@app.route("/<path:path>")

View File

@ -190,7 +190,7 @@ def curl(url: str):
try:
# curl --doh-url https://hnsdoh.com/dns-query {url} --insecure
command = f"curl --doh-url https://hnsdoh.com/dns-query {url} --insecure --silent"
response = subprocess.run(command, shell=True, capture_output=True, text=True)
response = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=10)
if response.returncode != 0:
return {"success": False, "error": response.stderr}
else:
@ -212,10 +212,6 @@ class ProxyError(Exception):
def proxy(url: str) -> requests.Response:
try:
session = requests_doh.DNSOverHTTPSSession("HNSDoH")
ip = socket.gethostbyname("hnsdoh.com")
print(f"Resolved IP: {ip}")
ip = requests_doh.resolve_dns("dso.dprofile")
print(f"Resolved IP: {ip}")
r = session.get(url,verify=False,timeout=30)
return r
except Exception as e:
@ -253,12 +249,22 @@ def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
ignored = True
break
if not ignored:
link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}"
# link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}"
# Add path also
link.attrs[attrib] = f"{proxyUrl}/{urlparse(link[attrib]).path}/{link[attrib]}"
scripts = soup.find_all('script')
for script in scripts:
if len(script.text) > 0:
script.text = proxyCleanJS(script.text,url,proxyHost)
if script.has_attr("text"):
script.attrs["text"] = proxyCleanJS(script.text,url,proxyHost)
continue
if not script.has_attr("contents"):
continue
if len(script.contents) > 0:
newScript = soup.new_tag("script")
for content in script.contents:
newScript.append(proxyCleanJS(content,url,proxyHost))
script.replace_with(newScript)
return soup.prettify()