diff options
author | Jasper Ras <jaspert.ras@gmail.com> | 2025-02-19 22:40:03 +0100 |
---|---|---|
committer | Jasper Ras <jaspert.ras@gmail.com> | 2025-02-19 22:40:03 +0100 |
commit | a0e06aba76af1c267fb65421f6589844df8b19cf (patch) | |
tree | 5be75b8832d9f97e2dd2bff88f4b3cac8de7b7c1 /folderz-parser.py | |
parent | 74f86ab57f824f4f446d6f102ae8ad0038be5de1 (diff) |
parse aanbiedingen
Diffstat (limited to 'folderz-parser.py')
-rw-r--r-- | folderz-parser.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/folderz-parser.py b/folderz-parser.py new file mode 100644 index 0000000..610833a --- /dev/null +++ b/folderz-parser.py @@ -0,0 +1,58 @@ +import os +import requests + +from datetime import datetime, timezone + +from pydantic import BaseModel +from google import genai + + +class ModelProductResponse(BaseModel): + name: str + store: str + price: float + current_page: int + next_page_url: str + + +class Product(ModelProductResponse): + date: datetime + + +def main(): + key = os.getenv("API_KEY") + client = genai.Client(api_key=key) + + base_url = "https://www.folderz.nl" + init_url = base_url + "/populaire-aanbiedingen" + + curl = requests.get(init_url) + curl.raise_for_status() + + proompt = """Given an HTML input, give me the product name(s), store(s) and price(s). Please lowercase all names. + + %s + """ + + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=proompt % curl.text, + config={ + "response_mime_type": "application/json", + "response_schema": list[ModelProductResponse], + }, + ) + + model_responses: list[ModelProductResponse] = response.parsed + products: list[Product] = [] + + for response in model_responses: + print(response.current_page, response.next_page_url) + + products.append( + Product(**response.model_dump(), date=datetime.now(timezone.utc)) + ) + + +if __name__ == "__main__": + main() |