feat: Add better cleaning for dprofile

This commit is contained in:
Nathan Woodburn 2025-02-27 23:35:23 +11:00
parent 6d77096a15
commit 7f0a13f44b
Signed by: nathanwoodburn
GPG Key ID: 203B000478AD0EF1
2 changed files with 61 additions and 12 deletions

View File

@ -94,12 +94,21 @@ def proxy(url: str):
content: requests.Response = tools.proxy(url) content: requests.Response = tools.proxy(url)
if not content.ok: if not content.ok:
print(content.text)
return render_template("500.html"), 500 return render_template("500.html"), 500
# Get the content type # Get the content type
contentType = content.headers.get("Content-Type") contentType = content.headers.get("Content-Type")
if "text/html" in contentType: if "text/html" in contentType:
return tools.cleanProxyContent(content.text,url,request.host_url) response = make_response(tools.cleanProxyContent(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
# Clean JS
if "text/javascript" in contentType or 'application/javascript' in contentType:
response = make_response(tools.proxyCleanJS(content.text,url,request.host_url))
response.headers["Content-Type"] = contentType
return response
response = make_response(content.content) response = make_response(content.content)
response.headers["Content-Type"] = contentType response.headers["Content-Type"] = contentType

View File

@ -13,7 +13,7 @@ import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from requests_doh import DNSOverHTTPSSession, add_dns_provider from requests_doh import DNSOverHTTPSSession, add_dns_provider
import urllib3 import urllib3
import socket
resolver = dns.resolver.Resolver() resolver = dns.resolver.Resolver()
resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"] resolver.nameservers = ["194.50.5.28","194.50.5.27","194.50.5.26"]
@ -199,21 +199,28 @@ def curl(url: str):
except Exception as e: except Exception as e:
return {"success": False, "error": "An error occurred", "message": str(e)} return {"success": False, "error": "An error occurred", "message": str(e)}
class ProxyError(Exception):
def __init__(self, message):
self.message = message
self.text = message
self.ok = False
self.status_code = 500
super().__init__(self.message)
def proxy(url: str) -> requests.Response: def proxy(url: str) -> requests.Response:
session = DNSOverHTTPSSession("HNSDoH") try:
r = session.get(url,verify=False) session = DNSOverHTTPSSession("HNSDoH")
return r r = session.get(url,verify=False,timeout=30)
return r
except Exception as e:
return ProxyError(str(e))
def cleanProxyContent(htmlContent: str,url:str, proxyHost: str): def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
# Set proxy host to https if not 127.0.0.1 or localhost # Set proxy host to https if not 127.0.0.1 or localhost
if ":5000" not in proxyHost: if ":5000" not in proxyHost:
proxyHost = proxyHost.replace("http","https") proxyHost = proxyHost.replace("http","https")
# Find all instances of the url in the html # Find all instances of the url in the html
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}" hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
@ -243,9 +250,42 @@ def cleanProxyContent(htmlContent: str,url:str, proxyHost: str):
break break
if not ignored: if not ignored:
link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}" link.attrs[attrib] = f"{proxyUrl}/{link[attrib]}"
scripts = soup.find_all('script')
for script in scripts:
if len(script.text) > 0:
script.text = proxyCleanJS(script.text,url,proxyHost)
return soup.prettify() return soup.prettify()
def proxyCleanJS(jsContent: str, url: str, proxyHost: str):
# Set proxy host to https if not 127.0.0.1 or localhost
if ":5000" not in proxyHost:
proxyHost = proxyHost.replace("http","https")
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
proxyUrl = f"{proxyHost}proxy/{hostUrl}"
if "dprofile" in url:
jsContent = jsContent.replace("window.location.hostname", f"\"{urlparse(url).netloc}\"")
jsContent = jsContent.replace("src=\"img", f"src=\"{proxyUrl}/img")
return jsContent
# Replace all instances of the url with the proxy url
hostUrl = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
proxyUrl = f"{proxyHost}proxy/{hostUrl}"
jsContent = jsContent.replace(hostUrl,proxyUrl)
# Common ways to get current url
for locator in ["window.location.href","window.location","location.href","location"]:
jsContent = jsContent.replace(locator,proxyUrl)
return jsContent
# if __name__ == "__main__": # if __name__ == "__main__":
# print(curl("https://dso.dprofile")) # print(curl("https://dso.dprofile"))