diff options
Diffstat (limited to 'folderz-parser.py')
-rw-r--r-- | folderz-parser.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/folderz-parser.py b/folderz-parser.py new file mode 100644 index 0000000..610833a --- /dev/null +++ b/folderz-parser.py @@ -0,0 +1,58 @@ +import os +import requests + +from datetime import datetime, timezone + +from pydantic import BaseModel +from google import genai + + +class ModelProductResponse(BaseModel): + name: str + store: str + price: float + current_page: int + next_page_url: str + + +class Product(ModelProductResponse): + date: datetime + + +def main(): + key = os.getenv("API_KEY") + client = genai.Client(api_key=key) + + base_url = "https://www.folderz.nl" + init_url = base_url + "/populaire-aanbiedingen" + + curl = requests.get(init_url) + curl.raise_for_status() + + proompt = """Given an HTML input, give me the product name(s), store(s) and price(s). Please lowercase all names. + + %s + """ + + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=proompt % curl.text, + config={ + "response_mime_type": "application/json", + "response_schema": list[ModelProductResponse], + }, + ) + + model_responses: list[ModelProductResponse] = response.parsed + products: list[Product] = [] + + for response in model_responses: + print(response.current_page, response.next_page_url) + + products.append( + Product(**response.model_dump(), date=datetime.now(timezone.utc)) + ) + + +if __name__ == "__main__": + main() |