summaryrefslogtreecommitdiff
path: root/folderz-parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'folderz-parser.py')
-rw-r--r--folderz-parser.py96
1 files changed, 0 insertions, 96 deletions
diff --git a/folderz-parser.py b/folderz-parser.py
deleted file mode 100644
index c859332..0000000
--- a/folderz-parser.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import os
-import requests
-import time
-import logging
-
-from datetime import datetime, timezone, timedelta
-
-from sqlalchemy import DateTime, Float, String, create_engine
-from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session
-from pydantic import BaseModel
-from google import genai
-
-
-LOG = logging.getLogger(__name__)
-
-
-class ModelProductResponse(BaseModel):
- name: str
- store: str
- price: float
- expires_in_days: int
- current_page: int
- next_page_url: str
-
-
-class Base(DeclarativeBase):
- pass
-
-
-class Discount(Base):
- __tablename__ = "discount"
-
- id: Mapped[int] = mapped_column(primary_key=True)
- product: Mapped[str] = mapped_column(String(255))
- store: Mapped[str] = mapped_column(String(255))
- price: Mapped[float] = mapped_column(Float)
- since: Mapped[datetime] = mapped_column(DateTime)
- until: Mapped[datetime] = mapped_column(DateTime)
-
-
-def fetch_page_data(client: genai.Client, url: str) -> list[ModelProductResponse]:
- curl = requests.get(url)
- curl.raise_for_status()
-
- prompt = f"Parse HTML input into JSON and lowercase all names.\n\n{curl.text}"
-
- response = client.models.generate_content(
- model="gemini-2.0-flash",
- contents=prompt,
- config={
- "response_mime_type": "application/json",
- "response_schema": list[ModelProductResponse],
- },
- )
-
- return response.parsed
-
-
-def main():
- engine = create_engine("mysql+pymysql://folderz:folderz@localhost:3306/folderz")
- Base.metadata.create_all(engine)
-
- key = os.getenv("API_KEY")
- client = genai.Client(api_key=key)
-
- base_url = "https://www.folderz.nl"
- url = base_url + "/populaire-aanbiedingen"
-
- while url:
- discounts: list[Discount] = []
-
- with Session(engine) as session:
- LOG.info(f"querying {url}")
-
- for response in fetch_page_data(client, url):
- url = base_url + response.next_page_url
- now = datetime.now(timezone.utc)
-
- discount = Discount(
- product=response.name,
- store=response.store,
- price=response.price,
- since=now,
- until=now + timedelta(days=response.expires_in_days),
- )
-
- discounts.append(discount)
-
- session.add_all(discounts)
- session.commit()
-
- time.sleep(1)
-
-
-if __name__ == "__main__":
- main()