diff options
author | Jasper Ras <jaspert.ras@gmail.com> | 2025-02-19 22:40:03 +0100 |
---|---|---|
committer | Jasper Ras <jaspert.ras@gmail.com> | 2025-02-19 22:40:03 +0100 |
commit | a0e06aba76af1c267fb65421f6589844df8b19cf (patch) | |
tree | 5be75b8832d9f97e2dd2bff88f4b3cac8de7b7c1 | |
parent | 74f86ab57f824f4f446d6f102ae8ad0038be5de1 (diff) |
parse aanbiedingen
-rw-r--r-- | devenv.nix | 2 | ||||
-rw-r--r-- | folderz-parser.py | 58 | ||||
-rw-r--r-- | hello.py | 24 | ||||
-rw-r--r-- | pyproject.toml | 2 | ||||
-rw-r--r-- | uv.lock | 8 |
5 files changed, 68 insertions, 26 deletions
@@ -15,7 +15,7 @@ # processes.cargo-watch.exec = "cargo-watch"; # https://devenv.sh/services/ - # services.postgres.enable = true; + services.postgres.enable = true; enterShell = '' source .devenv/state/venv/bin/activate diff --git a/folderz-parser.py b/folderz-parser.py new file mode 100644 index 0000000..610833a --- /dev/null +++ b/folderz-parser.py @@ -0,0 +1,58 @@ +import os +import requests + +from datetime import datetime, timezone + +from pydantic import BaseModel +from google import genai + + +class ModelProductResponse(BaseModel): + name: str + store: str + price: float + current_page: int + next_page_url: str + + +class Product(ModelProductResponse): + date: datetime + + +def main(): + key = os.getenv("API_KEY") + client = genai.Client(api_key=key) + + base_url = "https://www.folderz.nl" + init_url = base_url + "/populaire-aanbiedingen" + + curl = requests.get(init_url) + curl.raise_for_status() + + proompt = """Given an HTML input, give me the product name(s), store(s) and price(s). Please lowercase all names. + + %s + """ + + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=proompt % curl.text, + config={ + "response_mime_type": "application/json", + "response_schema": list[ModelProductResponse], + }, + ) + + model_responses: list[ModelProductResponse] = response.parsed + products: list[Product] = [] + + for response in model_responses: + print(response.current_page, response.next_page_url) + + products.append( + Product(**response.model_dump(), date=datetime.now(timezone.utc)) + ) + + +if __name__ == "__main__": + main() diff --git a/hello.py b/hello.py deleted file mode 100644 index 36b8e05..0000000 --- a/hello.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys -import os -import requests - -from google import genai - - -def main(): - key = os.getenv("API_KEY") - client = genai.Client(api_key=key) - # input = sys.argv[1] - - curl = requests.get("https://www.folderz.nl/") - curl.raise_for_status() - - # print(curl.text) - - response = client.models.generate_content(model="gemini-2.0-flash", contents=input) - - print(response.text) - - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml index 2a605be..c13fff9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,4 +6,6 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "google-genai>=1.2.0", + "pydantic>=2.10.6", + "requests>=2.32.3", ] @@ -69,10 +69,16 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "google-genai" }, + { name = "pydantic" }, + { name = "requests" }, ] [package.metadata] -requires-dist = [{ name = "google-genai", specifier = ">=1.2.0" }] +requires-dist = [ + { name = "google-genai", specifier = ">=1.2.0" }, + { name = "pydantic", specifier = ">=2.10.6" }, + { name = "requests", specifier = ">=2.32.3" }, +] [[package]] name = "google-auth" |