import os import requests import time import logging from datetime import datetime, timezone, timedelta from sqlalchemy import DateTime, Float, String, create_engine from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session from pydantic import BaseModel from google import genai LOG = logging.getLogger(__name__) class ModelProductResponse(BaseModel): name: str store: str price: float expires_in_days: int current_page: int next_page_url: str class Base(DeclarativeBase): pass class Discount(Base): __tablename__ = "discount" id: Mapped[int] = mapped_column(primary_key=True) product: Mapped[str] = mapped_column(String(255)) store: Mapped[str] = mapped_column(String(255)) price: Mapped[float] = mapped_column(Float) since: Mapped[datetime] = mapped_column(DateTime) until: Mapped[datetime] = mapped_column(DateTime) def fetch_page_data(client: genai.Client, url: str) -> list[ModelProductResponse]: curl = requests.get(url) curl.raise_for_status() prompt = f"Parse HTML input into JSON and lowercase all names.\n\n{curl.text}" response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt, config={ "response_mime_type": "application/json", "response_schema": list[ModelProductResponse], }, ) return response.parsed def main(): engine = create_engine("mysql+pymysql://folderz:folderz@localhost:3306/folderz") Base.metadata.create_all(engine) key = os.getenv("API_KEY") client = genai.Client(api_key=key) base_url = "https://www.folderz.nl" url = base_url + "/populaire-aanbiedingen" while url: discounts: list[Discount] = [] with Session(engine) as session: LOG.info(f"querying {url}") for response in fetch_page_data(client, url): url = base_url + response.next_page_url now = datetime.now(timezone.utc) discount = Discount( product=response.name, store=response.store, price=response.price, since=now, until=now + timedelta(days=response.expires_in_days), ) discounts.append(discount) session.add_all(discounts) session.commit() time.sleep(1) if __name__ == "__main__": main()