feat: Add tool to estimate date of a webpage
All checks were successful
Build Docker / BuildImage (push) Successful in 2m34s
All checks were successful
Build Docker / BuildImage (push) Successful in 2m34s
This commit is contained in:
171
tools.py
171
tools.py
@@ -1,18 +1,44 @@
|
||||
from flask import Request, render_template, jsonify, make_response
|
||||
import os
|
||||
from functools import cache
|
||||
import datetime
|
||||
from typing import Optional, Dict, Union, Tuple
|
||||
import re
|
||||
from dateutil.parser import parse
|
||||
|
||||
# HTTP status codes
|
||||
HTTP_OK = 200
|
||||
HTTP_BAD_REQUEST = 400
|
||||
HTTP_NOT_FOUND = 404
|
||||
|
||||
|
||||
def getClientIP(request):
|
||||
def getClientIP(request: Request) -> str:
|
||||
"""
|
||||
Get the client's IP address from the request.
|
||||
|
||||
Args:
|
||||
request (Request): The Flask request object
|
||||
|
||||
Returns:
|
||||
str: The client's IP address
|
||||
"""
|
||||
x_forwarded_for = request.headers.get("X-Forwarded-For")
|
||||
if x_forwarded_for:
|
||||
ip = x_forwarded_for.split(",")[0]
|
||||
else:
|
||||
ip = request.remote_addr
|
||||
if ip is None:
|
||||
ip = "unknown"
|
||||
return ip
|
||||
|
||||
|
||||
def getGitCommit():
|
||||
def getGitCommit() -> str:
|
||||
"""
|
||||
Get the current git commit hash.
|
||||
|
||||
Returns:
|
||||
str: The current git commit hash or a failure message
|
||||
"""
|
||||
# if .git exists, get the latest commit hash
|
||||
if os.path.isdir(".git"):
|
||||
git_dir = ".git"
|
||||
@@ -35,77 +61,156 @@ def getGitCommit():
|
||||
|
||||
def isCurl(request: Request) -> bool:
|
||||
"""
|
||||
Check if the request is from curl or hurl
|
||||
Check if the request is from curl or hurl.
|
||||
|
||||
Args:
|
||||
request (Request): The Flask request object
|
||||
|
||||
Returns:
|
||||
bool: True if the request is from curl, False otherwise
|
||||
|
||||
bool: True if the request is from curl or hurl, False otherwise
|
||||
"""
|
||||
if request.headers and request.headers.get("User-Agent"):
|
||||
# Check if curl
|
||||
if "curl" in request.headers.get("User-Agent", ""):
|
||||
return True
|
||||
if "hurl" in request.headers.get("User-Agent",""):
|
||||
return True
|
||||
user_agent = request.headers.get("User-Agent", "")
|
||||
return "curl" in user_agent or "hurl" in user_agent
|
||||
return False
|
||||
|
||||
|
||||
def isCrawler(request: Request) -> bool:
|
||||
"""
|
||||
Check if the request is from a web crawler (e.g., Googlebot, Bingbot)
|
||||
Check if the request is from a web crawler (e.g., Googlebot, Bingbot).
|
||||
|
||||
Args:
|
||||
request (Request): The Flask request object
|
||||
|
||||
Returns:
|
||||
bool: True if the request is from a web crawler, False otherwise
|
||||
"""
|
||||
|
||||
if request.headers and request.headers.get("User-Agent"):
|
||||
# Check if Googlebot or Bingbot
|
||||
if "Googlebot" in request.headers.get(
|
||||
"User-Agent", ""
|
||||
) or "Bingbot" in request.headers.get("User-Agent", ""):
|
||||
return True
|
||||
user_agent = request.headers.get("User-Agent", "")
|
||||
return "Googlebot" in user_agent or "Bingbot" in user_agent
|
||||
return False
|
||||
|
||||
|
||||
@cache
|
||||
def getAddress(coin: str) -> str:
|
||||
"""
|
||||
Get the wallet address for a cryptocurrency.
|
||||
|
||||
Args:
|
||||
coin (str): The cryptocurrency code
|
||||
|
||||
Returns:
|
||||
str: The wallet address or empty string if not found
|
||||
"""
|
||||
address = ""
|
||||
if os.path.isfile(".well-known/wallets/" + coin.upper()):
|
||||
with open(".well-known/wallets/" + coin.upper()) as file:
|
||||
wallet_path = f".well-known/wallets/{coin.upper()}"
|
||||
if os.path.isfile(wallet_path):
|
||||
with open(wallet_path) as file:
|
||||
address = file.read()
|
||||
return address
|
||||
|
||||
|
||||
def getFilePath(name, path):
|
||||
def getFilePath(name: str, path: str) -> Optional[str]:
|
||||
"""
|
||||
Find a file in a directory tree.
|
||||
|
||||
Args:
|
||||
name (str): The filename to find
|
||||
path (str): The root directory to search
|
||||
|
||||
Returns:
|
||||
Optional[str]: The full path to the file or None if not found
|
||||
"""
|
||||
for root, dirs, files in os.walk(path):
|
||||
if name in files:
|
||||
return os.path.join(root, name)
|
||||
return None
|
||||
|
||||
|
||||
def json_response(request: Request, message: str = "404 Not Found", code: int = 404):
|
||||
return jsonify(
|
||||
{
|
||||
"status": code,
|
||||
"message": message,
|
||||
"ip": getClientIP(request),
|
||||
}
|
||||
), code
|
||||
def json_response(request: Request, message: Union[str, Dict] = "404 Not Found", code: int = 404):
|
||||
"""
|
||||
Create a JSON response with standard formatting.
|
||||
|
||||
Args:
|
||||
request (Request): The Flask request object
|
||||
message (Union[str, Dict]): The response message or data
|
||||
code (int): The HTTP status code
|
||||
|
||||
Returns:
|
||||
Tuple[Dict, int]: The JSON response and HTTP status code
|
||||
"""
|
||||
if isinstance(message, dict):
|
||||
# Add status and ip to dict
|
||||
message["status"] = code
|
||||
message["ip"] = getClientIP(request)
|
||||
return jsonify(message), code
|
||||
|
||||
return jsonify({
|
||||
"status": code,
|
||||
"message": message,
|
||||
"ip": getClientIP(request),
|
||||
}), code
|
||||
|
||||
|
||||
def error_response(request: Request, message: str = "404 Not Found", code: int = 404, force_json: bool = False):
|
||||
def error_response(
|
||||
request: Request,
|
||||
message: str = "404 Not Found",
|
||||
code: int = 404,
|
||||
force_json: bool = False
|
||||
) -> Union[Tuple[Dict, int], object]:
|
||||
"""
|
||||
Create an error response in JSON or HTML format.
|
||||
|
||||
Args:
|
||||
request (Request): The Flask request object
|
||||
message (str): The error message
|
||||
code (int): The HTTP status code
|
||||
force_json (bool): Whether to force JSON response regardless of client
|
||||
|
||||
Returns:
|
||||
Union[Tuple[Dict, int], object]: The JSON or HTML response
|
||||
"""
|
||||
if force_json or isCurl(request):
|
||||
return json_response(request, message, code)
|
||||
|
||||
# Check if <error code>.html exists in templates
|
||||
template_name = f"{code}.html" if os.path.isfile(f"templates/{code}.html") else "404.html"
|
||||
response = make_response(render_template(
|
||||
"404.html", code=code, message=message), code)
|
||||
if os.path.isfile(f"templates/{code}.html"):
|
||||
response = make_response(render_template(
|
||||
f"{code}.html", code=code, message=message), code)
|
||||
template_name, code=code, message=message), code)
|
||||
|
||||
# Add message to response headers
|
||||
response.headers["X-Error-Message"] = message
|
||||
return response
|
||||
|
||||
|
||||
def parse_date(date_groups: list[str]) -> str | None:
|
||||
"""
|
||||
Parse a list of date components into YYYY-MM-DD format.
|
||||
Uses dateutil.parser for robust parsing.
|
||||
Works for:
|
||||
- DD Month YYYY
|
||||
- Month DD, YYYY
|
||||
- YYYY-MM-DD
|
||||
- YYYYMMDD
|
||||
- Month YYYY (defaults day to 1)
|
||||
- Handles ordinal suffixes (st, nd, rd, th)
|
||||
"""
|
||||
try:
|
||||
# Join date groups into a single string
|
||||
date_str = " ".join(date_groups).strip()
|
||||
|
||||
# Remove ordinal suffixes
|
||||
date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str, flags=re.IGNORECASE)
|
||||
|
||||
# Parse with dateutil, default day=1 if missing
|
||||
dt = parse(date_str, default=datetime.datetime(1900, 1, 1))
|
||||
|
||||
# If year is missing, parse will fallback to 1900 → reject
|
||||
if dt.year == 1900:
|
||||
return None
|
||||
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user