summaryrefslogtreecommitdiff
path: root/folderz-parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'folderz-parser.py')
-rw-r--r--folderz-parser.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/folderz-parser.py b/folderz-parser.py
new file mode 100644
index 0000000..610833a
--- /dev/null
+++ b/folderz-parser.py
@@ -0,0 +1,58 @@
+import os
+import requests
+
+from datetime import datetime, timezone
+
+from pydantic import BaseModel
+from google import genai
+
+
+class ModelProductResponse(BaseModel):
+ name: str
+ store: str
+ price: float
+ current_page: int
+ next_page_url: str
+
+
+class Product(ModelProductResponse):
+ date: datetime
+
+
+def main():
+ key = os.getenv("API_KEY")
+ client = genai.Client(api_key=key)
+
+ base_url = "https://www.folderz.nl"
+ init_url = base_url + "/populaire-aanbiedingen"
+
+ curl = requests.get(init_url)
+ curl.raise_for_status()
+
+ proompt = """Given an HTML input, give me the product name(s), store(s) and price(s). Please lowercase all names.
+
+ %s
+ """
+
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=proompt % curl.text,
+ config={
+ "response_mime_type": "application/json",
+ "response_schema": list[ModelProductResponse],
+ },
+ )
+
+ model_responses: list[ModelProductResponse] = response.parsed
+ products: list[Product] = []
+
+ for response in model_responses:
+ print(response.current_page, response.next_page_url)
+
+ products.append(
+ Product(**response.model_dump(), date=datetime.now(timezone.utc))
+ )
+
+
+if __name__ == "__main__":
+ main()