feat: Add better cleaning for dprofile

This commit is contained in:
Nathan Woodburn 2025-02-27 23:35:23 +11:00
parent 6d77096a15
commit 7f0a13f44b
Signed by: nathanwoodburn
GPG Key ID: 203B000478AD0EF1
2 changed files with 61 additions and 12 deletions

View File

@ -94,12 +94,21 @@ def proxy(url: str):
content: requests.Response = tools.proxy(url)
if not content.ok:
print(content.text)
return render_template("500.html"), 500
# Get the content type
contentType = content.headers.get("Content-Type")
if "text/html" in contentType:
return tools.cleanProxyContent(content.text,url,request.host_url)
response = make_response(tools.cleanProxyContent(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
# Clean JS
if "text/javascript" in contentType or 'application/javascript' in contentType:
response = make_response(tools.proxyCleanJS(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
response = make_response(content.content)
response.headers["Content-Type"] = contentType

View File

@ -13,7 +13,7 @@ import re
from bs4 import BeautifulSoup
from requests_doh import DNSOverHTTPSSession, add_dns_provider
import urllib3
import socket
resolver = dns.resolver.Resolver()
resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"]
@ -199,22 +199,29 @@ def curl(url: str):
except Exception as e:
return {"success": False, "error": "An error occurred", "message": str(e)}
class ProxyError(Exception):
def __init__(self, message):
self.message = message
self.text = message
self.ok = False
self.status_code = 500
super().__init__(self.message)
def proxy(url: str) -> requests.Response:
session = DNSOverHTTPSSession("HNSDoH")
r = session.get(url,verify=False)
return r
try:
session = DNSOverHTTPSSession("HNSDoH")
r = session.get(url,verify=False,timeout=30)
return r
except Exception as e:
return ProxyError(str(e))
def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
# Set proxy host to https if not 127.0.0.1 or localhost
if ":5000" not in proxyHost:
proxyHost = proxyHost.replace("http","https")
# Find all instances of the url in the html
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
proxyUrl = f"{proxyHost}proxy/{hostUrl}"
@ -244,8 +251,41 @@ def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
if not ignored:
link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}"
scripts = soup.find_all('script')
for script in scripts:
if len(script.text) > 0:
script.text = proxyCleanJS(script.text,url,proxyHost)
return soup.prettify()
def proxyCleanJS(jsContent: str, url: str, proxyHost: str):
# Set proxy host to https if not 127.0.0.1 or localhost
if ":5000" not in proxyHost:
proxyHost = proxyHost.replace("http","https")
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
proxyUrl = f"{proxyHost}proxy/{hostUrl}"
if "dprofile" in url:
jsContent = jsContent.replace("window.location.hostname", f"\"{urlparse(url).netloc}\"")
jsContent = jsContent.replace("src=\"img", f"src=\"{proxyUrl}/img")
return jsContent
# Replace all instances of the url with the proxy url
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
proxyUrl = f"{proxyHost}proxy/{hostUrl}"
jsContent = jsContent.replace(hostUrl,proxyUrl)
# Common ways to get current url
for locator in ["window.location.href","window.location","location.href","location"]:
jsContent = jsContent.replace(locator,proxyUrl)
return jsContent
# if __name__ == "__main__":
# print(curl("https://dso.dprofile"))