diff --git a/sitemap/.gitignore b/sitemap/.gitignore new file mode 100644 index 0000000..1d17dae --- /dev/null +++ b/sitemap/.gitignore @@ -0,0 +1 @@ +.venv diff --git a/sitemap/Dockerfile b/sitemap/Dockerfile new file mode 100644 index 0000000..f6d933b --- /dev/null +++ b/sitemap/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.13-alpine AS base +ENV PYTHONUNBUFFERED=1 +WORKDIR /app +COPY crontab /etc/crontabs/root +COPY requirements.txt ./ +RUN apk add --no-cache gcc musl-dev libffi-dev cronie && \ + pip install --no-cache-dir -r requirements.txt + +FROM base AS runner +WORKDIR /app +COPY . . +ENV API_BASE_URL= +ENV FRONTEND_URL= +ENV STORAGE_PATH=/app/static +RUN touch /var/log/cron.log && chmod 0644 /var/log/cron.log +VOLUME [ "/app/static/sitemap.xml" ] +CMD ["/bin/sh", "-c", "python /app/gen_sitemap.py && crond -f && tail -f /var/log/cron.log"] diff --git a/sitemap/README.md b/sitemap/README.md new file mode 100644 index 0000000..e69de29 diff --git a/sitemap/crontab b/sitemap/crontab new file mode 100644 index 0000000..05a7fcf --- /dev/null +++ b/sitemap/crontab @@ -0,0 +1,3 @@ +0 0 * * * /usr/local/bin/python /app/gen_sitemap.py +0 8 * * * /usr/local/bin/python /app/gen_sitemap.py +0 16 * * * /usr/local/bin/python /app/gen_sitemap.py diff --git a/sitemap/gen_sitemap.py b/sitemap/gen_sitemap.py new file mode 100644 index 0000000..4e89d69 --- /dev/null +++ b/sitemap/gen_sitemap.py @@ -0,0 +1,67 @@ +import os +from urllib.parse import urljoin +import xml.etree.ElementTree as ET + +import requests +from pydantic import BaseModel + + +API_BASE_URL = os.environ.get("API_BASE_URL") +FRONTEND_URL = os.environ.get("FRONTEND_URL") +STORAGE_PATH = os.environ.get("STORAGE_PATH", "./static") + + +class SitemapItem(BaseModel): + loc: str + lastmod: str = None + + +def get_posts(): + url = urljoin(API_BASE_URL, "post") + response = requests.get(url) + response.raise_for_status() + + return response.json() + + +def map_post_to_sitemap_item(post: dict) -> SitemapItem: + loc = urljoin(FRONTEND_URL, f"post/{post['id']}") + lastmod = post["published_time"][:10] + return SitemapItem(loc=loc, lastmod=lastmod) + + +def generate_sitemap(items: list[SitemapItem]) -> str: + header = '\n' + root = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") + + for item in items: + url_element = ET.SubElement(root, "url") + ET.SubElement(url_element, "loc").text = item.loc + if item.lastmod: + ET.SubElement(url_element, "lastmod").text = item.lastmod + + return header + ET.tostring(root, encoding="unicode") + + +def main(): + if not API_BASE_URL: + raise ValueError("API_BASE_URL environment variable is not set.") + if not FRONTEND_URL: + raise ValueError("FRONTEND_URL environment variable is not set.") + + posts = get_posts() + static_pages = [ + SitemapItem(loc=FRONTEND_URL), + SitemapItem(loc=urljoin(FRONTEND_URL, "post")), + ] + sitemap_items = [*static_pages, *map(map_post_to_sitemap_item, posts)] + + sitemap = generate_sitemap(sitemap_items) + sitemap_path = os.path.join(STORAGE_PATH, "sitemap.xml") + os.makedirs(STORAGE_PATH, exist_ok=True) + with open(sitemap_path, "w", encoding="utf-8") as f: + f.write(sitemap) + + +if __name__ == "__main__": + main() diff --git a/sitemap/requirements.txt b/sitemap/requirements.txt new file mode 100644 index 0000000..903705e --- /dev/null +++ b/sitemap/requirements.txt @@ -0,0 +1,2 @@ +pydantic +requests