diff --git a/blueprints/api.py b/blueprints/api.py index 986c769..ed69315 100644 --- a/blueprints/api.py +++ b/blueprints/api.py @@ -2,16 +2,25 @@ from flask import Blueprint, request, jsonify import os import datetime import requests +import re from mail import sendEmail -from tools import getClientIP, getGitCommit, json_response +from tools import getClientIP, getGitCommit, json_response, parse_date from blueprints.sol import sol_bp +from dateutil import parser as date_parser +# Constants +HTTP_OK = 200 +HTTP_BAD_REQUEST = 400 +HTTP_UNAUTHORIZED = 401 +HTTP_NOT_FOUND = 404 +HTTP_UNSUPPORTED_MEDIA = 415 +HTTP_SERVER_ERROR = 500 api_bp = Blueprint('api', __name__) # Register solana blueprint - api_bp.register_blueprint(sol_bp) +# Load configuration NC_CONFIG = requests.get( "https://cloud.woodburn.au/s/4ToXgFe3TnnFcN7/download/website-conf.json" ).json() @@ -23,6 +32,7 @@ if 'time-zone' not in NC_CONFIG: @api_bp.route("/") @api_bp.route("/help") def help(): + """Provide API documentation and help.""" return jsonify({ "message": "Welcome to Nathan.Woodburn/ API! This is a personal website. For more information, visit https://nathan.woodburn.au", "endpoints": { @@ -37,71 +47,78 @@ def help(): "base_url": "/api/v1", "version": getGitCommit(), "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) @api_bp.route("/version") def version(): + """Get the current version of the website.""" return jsonify({"version": getGitCommit()}) @api_bp.route("/time") def time(): + """Get the current time in the configured timezone.""" timezone_offset = datetime.timedelta(hours=NC_CONFIG["time-zone"]) timezone = datetime.timezone(offset=timezone_offset) - time = datetime.datetime.now(tz=timezone) + current_time = datetime.datetime.now(tz=timezone) return jsonify({ - "timestring": time.strftime("%A, %B %d, %Y %I:%M %p"), - "timestamp": time.timestamp(), + "timestring": current_time.strftime("%A, %B %d, %Y %I:%M %p"), + "timestamp": current_time.timestamp(), "timezone": NC_CONFIG["time-zone"], - "timeISO": time.isoformat(), + "timeISO": current_time.isoformat(), "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) @api_bp.route("/timezone") def timezone(): + """Get the current timezone setting.""" return jsonify({ "timezone": NC_CONFIG["time-zone"], "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) + @api_bp.route("/message") def message(): + """Get the message from the configuration.""" return jsonify({ "message": NC_CONFIG["message"], "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) @api_bp.route("/ip") def ip(): + """Get the client's IP address.""" return jsonify({ "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) @api_bp.route("/email", methods=["POST"]) def email_post(): + """Send an email via the API (requires API key).""" # Verify json if not request.is_json: - return json_response(request, "415 Unsupported Media Type", 415) + return json_response(request, "415 Unsupported Media Type", HTTP_UNSUPPORTED_MEDIA) # Check if api key sent data = request.json if not data: - return json_response(request, "400 Bad Request", 400) + return json_response(request, "400 Bad Request", HTTP_BAD_REQUEST) if "key" not in data: - return json_response(request, "400 Bad Request 'key' missing", 400) + return json_response(request, "400 Bad Request 'key' missing", HTTP_BAD_REQUEST) if data["key"] != os.getenv("EMAIL_KEY"): - return json_response(request, "401 Unauthorized", 401) + return json_response(request, "401 Unauthorized", HTTP_UNAUTHORIZED) # TODO: Add client info to email return sendEmail(data) @@ -109,6 +126,7 @@ def email_post(): @api_bp.route("/project") def project(): + """Get information about the current git project.""" gitinfo = { "website": None, } @@ -129,12 +147,125 @@ def project(): gitinfo["website"] = git["repo"]["website"] except Exception as e: print(f"Error getting git data: {e}") - return json_response(request, "500 Internal Server Error", 500) + return json_response(request, "500 Internal Server Error", HTTP_SERVER_ERROR) return jsonify({ "repo_name": repo_name, "repo_description": repo_description, "repo": gitinfo, "ip": getClientIP(request), - "status": 200 + "status": HTTP_OK }) + + +@api_bp.route("/page_date") +def page_date(): + url = request.args.get("url") + if not url: + return json_response(request, "400 Bad Request 'url' missing", HTTP_BAD_REQUEST) + + verbose = request.args.get("verbose", "").lower() in ["true", "1", "yes", "y"] + + if not url.startswith(("https://", "http://")): + return json_response(request, "400 Bad Request 'url' invalid", HTTP_BAD_REQUEST) + + try: + r = requests.get(url, timeout=5) + r.raise_for_status() + except requests.exceptions.RequestException as e: + return json_response(request, f"400 Bad Request 'url' unreachable: {e}", HTTP_BAD_REQUEST) + + page_text = r.text + + # Remove ordinal suffixes globally + page_text = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', page_text, flags=re.IGNORECASE) + # Remove HTML comments + page_text = re.sub(r'', '', page_text, flags=re.DOTALL) + + date_patterns = [ + r'(\d{4})[/-](\d{1,2})[/-](\d{1,2})', # YYYY-MM-DD + r'(\d{1,2})[/-](\d{1,2})[/-](\d{4})', # DD-MM-YYYY + r'(?:Last updated:|Updated:|Updated last:)?\s*(\d{1,2})\s+([A-Za-z]{3,9})[, ]?\s*(\d{4})', # DD Month YYYY + r'(?:\b\w+\b\s+){0,3}([A-Za-z]{3,9})\s+(\d{1,2}),?\s*(\d{4})', # Month DD, YYYY with optional words + r'\b(\d{4})(\d{2})(\d{2})\b', # YYYYMMDD + r'(?:Last updated:|Updated:|Last update)?\s*([A-Za-z]{3,9})\s+(\d{4})', # Month YYYY only + ] + + + + # Structured data patterns + json_date_patterns = { + r'"datePublished"\s*:\s*"([^"]+)"': "published", + r'"dateModified"\s*:\s*"([^"]+)"': "modified", + r']*?)property\s*=\s*"article:published_time"\s+content\s*=\s*"([^"]+)"': "published", + r']*?)property\s*=\s*"article:modified_time"\s+content\s*=\s*"([^"]+)"': "modified", + r' str: + """ + Get the client's IP address from the request. + + Args: + request (Request): The Flask request object + + Returns: + str: The client's IP address + """ x_forwarded_for = request.headers.get("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for.split(",")[0] else: ip = request.remote_addr + if ip is None: + ip = "unknown" return ip -def getGitCommit(): +def getGitCommit() -> str: + """ + Get the current git commit hash. + + Returns: + str: The current git commit hash or a failure message + """ # if .git exists, get the latest commit hash if os.path.isdir(".git"): git_dir = ".git" @@ -35,77 +61,156 @@ def getGitCommit(): def isCurl(request: Request) -> bool: """ - Check if the request is from curl or hurl + Check if the request is from curl or hurl. Args: request (Request): The Flask request object + Returns: - bool: True if the request is from curl, False otherwise - + bool: True if the request is from curl or hurl, False otherwise """ if request.headers and request.headers.get("User-Agent"): - # Check if curl - if "curl" in request.headers.get("User-Agent", ""): - return True - if "hurl" in request.headers.get("User-Agent",""): - return True + user_agent = request.headers.get("User-Agent", "") + return "curl" in user_agent or "hurl" in user_agent return False def isCrawler(request: Request) -> bool: """ - Check if the request is from a web crawler (e.g., Googlebot, Bingbot) + Check if the request is from a web crawler (e.g., Googlebot, Bingbot). + Args: request (Request): The Flask request object + Returns: bool: True if the request is from a web crawler, False otherwise """ - if request.headers and request.headers.get("User-Agent"): - # Check if Googlebot or Bingbot - if "Googlebot" in request.headers.get( - "User-Agent", "" - ) or "Bingbot" in request.headers.get("User-Agent", ""): - return True + user_agent = request.headers.get("User-Agent", "") + return "Googlebot" in user_agent or "Bingbot" in user_agent return False @cache def getAddress(coin: str) -> str: + """ + Get the wallet address for a cryptocurrency. + + Args: + coin (str): The cryptocurrency code + + Returns: + str: The wallet address or empty string if not found + """ address = "" - if os.path.isfile(".well-known/wallets/" + coin.upper()): - with open(".well-known/wallets/" + coin.upper()) as file: + wallet_path = f".well-known/wallets/{coin.upper()}" + if os.path.isfile(wallet_path): + with open(wallet_path) as file: address = file.read() return address -def getFilePath(name, path): +def getFilePath(name: str, path: str) -> Optional[str]: + """ + Find a file in a directory tree. + + Args: + name (str): The filename to find + path (str): The root directory to search + + Returns: + Optional[str]: The full path to the file or None if not found + """ for root, dirs, files in os.walk(path): if name in files: return os.path.join(root, name) + return None -def json_response(request: Request, message: str = "404 Not Found", code: int = 404): - return jsonify( - { - "status": code, - "message": message, - "ip": getClientIP(request), - } - ), code +def json_response(request: Request, message: Union[str, Dict] = "404 Not Found", code: int = 404): + """ + Create a JSON response with standard formatting. + + Args: + request (Request): The Flask request object + message (Union[str, Dict]): The response message or data + code (int): The HTTP status code + + Returns: + Tuple[Dict, int]: The JSON response and HTTP status code + """ + if isinstance(message, dict): + # Add status and ip to dict + message["status"] = code + message["ip"] = getClientIP(request) + return jsonify(message), code + + return jsonify({ + "status": code, + "message": message, + "ip": getClientIP(request), + }), code -def error_response(request: Request, message: str = "404 Not Found", code: int = 404, force_json: bool = False): +def error_response( + request: Request, + message: str = "404 Not Found", + code: int = 404, + force_json: bool = False +) -> Union[Tuple[Dict, int], object]: + """ + Create an error response in JSON or HTML format. + + Args: + request (Request): The Flask request object + message (str): The error message + code (int): The HTTP status code + force_json (bool): Whether to force JSON response regardless of client + + Returns: + Union[Tuple[Dict, int], object]: The JSON or HTML response + """ if force_json or isCurl(request): return json_response(request, message, code) # Check if .html exists in templates + template_name = f"{code}.html" if os.path.isfile(f"templates/{code}.html") else "404.html" response = make_response(render_template( - "404.html", code=code, message=message), code) - if os.path.isfile(f"templates/{code}.html"): - response = make_response(render_template( - f"{code}.html", code=code, message=message), code) + template_name, code=code, message=message), code) # Add message to response headers response.headers["X-Error-Message"] = message return response + + +def parse_date(date_groups: list[str]) -> str | None: + """ + Parse a list of date components into YYYY-MM-DD format. + Uses dateutil.parser for robust parsing. + Works for: + - DD Month YYYY + - Month DD, YYYY + - YYYY-MM-DD + - YYYYMMDD + - Month YYYY (defaults day to 1) + - Handles ordinal suffixes (st, nd, rd, th) + """ + try: + # Join date groups into a single string + date_str = " ".join(date_groups).strip() + + # Remove ordinal suffixes + date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str, flags=re.IGNORECASE) + + # Parse with dateutil, default day=1 if missing + dt = parse(date_str, default=datetime.datetime(1900, 1, 1)) + + # If year is missing, parse will fallback to 1900 → reject + if dt.year == 1900: + return None + + return dt.strftime("%Y-%m-%d") + + except (ValueError, TypeError): + return None +