blog/sitemap/gen_sitemap.py

68 lines
1.9 KiB
Python

import os
from urllib.parse import urljoin
import xml.etree.ElementTree as ET
import requests
from pydantic import BaseModel
API_BASE_URL = os.environ.get("API_BASE_URL")
FRONTEND_URL = os.environ.get("FRONTEND_URL")
STORAGE_PATH = os.environ.get("STORAGE_PATH", "./static")
class SitemapItem(BaseModel):
loc: str
lastmod: str = None
def get_posts():
url = urljoin(API_BASE_URL, "post")
response = requests.get(url)
response.raise_for_status()
return response.json()
def map_post_to_sitemap_item(post: dict) -> SitemapItem:
loc = urljoin(FRONTEND_URL, f"post/{post['id']}")
lastmod = post["published_time"][:10]
return SitemapItem(loc=loc, lastmod=lastmod)
def generate_sitemap(items: list[SitemapItem]) -> str:
header = '<?xml version="1.0" encoding="UTF-8"?>\n'
root = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
for item in items:
url_element = ET.SubElement(root, "url")
ET.SubElement(url_element, "loc").text = item.loc
if item.lastmod:
ET.SubElement(url_element, "lastmod").text = item.lastmod
return header + ET.tostring(root, encoding="unicode")
def main():
if not API_BASE_URL:
raise ValueError("API_BASE_URL environment variable is not set.")
if not FRONTEND_URL:
raise ValueError("FRONTEND_URL environment variable is not set.")
posts = get_posts()
static_pages = [
SitemapItem(loc=FRONTEND_URL),
SitemapItem(loc=urljoin(FRONTEND_URL, "post")),
]
sitemap_items = [*static_pages, *map(map_post_to_sitemap_item, posts)]
sitemap = generate_sitemap(sitemap_items)
sitemap_path = os.path.join(STORAGE_PATH, "sitemap.xml")
os.makedirs(STORAGE_PATH, exist_ok=True)
with open(sitemap_path, "w", encoding="utf-8") as f:
f.write(sitemap)
if __name__ == "__main__":
main()