import os from urllib.parse import urljoin import xml.etree.ElementTree as ET import requests from pydantic import BaseModel API_BASE_URL = os.environ.get("API_BASE_URL") FRONTEND_URL = os.environ.get("FRONTEND_URL") STORAGE_PATH = os.environ.get("STORAGE_PATH", "./static") class SitemapItem(BaseModel): loc: str lastmod: str = None def get_posts(): url = urljoin(API_BASE_URL, "post") response = requests.get(url) response.raise_for_status() return response.json() def map_post_to_sitemap_item(post: dict) -> SitemapItem: loc = urljoin(FRONTEND_URL, f"post/{post['id']}") lastmod = post["published_time"][:10] return SitemapItem(loc=loc, lastmod=lastmod) def generate_sitemap(items: list[SitemapItem]) -> str: header = '\n' root = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") for item in items: url_element = ET.SubElement(root, "url") ET.SubElement(url_element, "loc").text = item.loc if item.lastmod: ET.SubElement(url_element, "lastmod").text = item.lastmod return header + ET.tostring(root, encoding="unicode") def main(): if not API_BASE_URL: raise ValueError("API_BASE_URL environment variable is not set.") if not FRONTEND_URL: raise ValueError("FRONTEND_URL environment variable is not set.") posts = get_posts() static_pages = [ SitemapItem(loc=FRONTEND_URL), SitemapItem(loc=urljoin(FRONTEND_URL, "post")), ] sitemap_items = [*static_pages, *map(map_post_to_sitemap_item, posts)] sitemap = generate_sitemap(sitemap_items) sitemap_path = os.path.join(STORAGE_PATH, "sitemap.xml") os.makedirs(STORAGE_PATH, exist_ok=True) with open(sitemap_path, "w", encoding="utf-8") as f: f.write(sitemap) if __name__ == "__main__": main()