summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJasper Ras <jaspert.ras@gmail.com>2025-02-19 22:40:03 +0100
committerJasper Ras <jaspert.ras@gmail.com>2025-02-19 22:40:03 +0100
commita0e06aba76af1c267fb65421f6589844df8b19cf (patch)
tree5be75b8832d9f97e2dd2bff88f4b3cac8de7b7c1
parent74f86ab57f824f4f446d6f102ae8ad0038be5de1 (diff)
parse aanbiedingen
-rw-r--r--devenv.nix2
-rw-r--r--folderz-parser.py58
-rw-r--r--hello.py24
-rw-r--r--pyproject.toml2
-rw-r--r--uv.lock8
5 files changed, 68 insertions, 26 deletions
diff --git a/devenv.nix b/devenv.nix
index 327bf00..297e292 100644
--- a/devenv.nix
+++ b/devenv.nix
@@ -15,7 +15,7 @@
# processes.cargo-watch.exec = "cargo-watch";
# https://devenv.sh/services/
- # services.postgres.enable = true;
+ services.postgres.enable = true;
enterShell = ''
source .devenv/state/venv/bin/activate
diff --git a/folderz-parser.py b/folderz-parser.py
new file mode 100644
index 0000000..610833a
--- /dev/null
+++ b/folderz-parser.py
@@ -0,0 +1,58 @@
+import os
+import requests
+
+from datetime import datetime, timezone
+
+from pydantic import BaseModel
+from google import genai
+
+
+class ModelProductResponse(BaseModel):
+ name: str
+ store: str
+ price: float
+ current_page: int
+ next_page_url: str
+
+
+class Product(ModelProductResponse):
+ date: datetime
+
+
+def main():
+ key = os.getenv("API_KEY")
+ client = genai.Client(api_key=key)
+
+ base_url = "https://www.folderz.nl"
+ init_url = base_url + "/populaire-aanbiedingen"
+
+ curl = requests.get(init_url)
+ curl.raise_for_status()
+
+ proompt = """Given an HTML input, give me the product name(s), store(s) and price(s). Please lowercase all names.
+
+ %s
+ """
+
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=proompt % curl.text,
+ config={
+ "response_mime_type": "application/json",
+ "response_schema": list[ModelProductResponse],
+ },
+ )
+
+ model_responses: list[ModelProductResponse] = response.parsed
+ products: list[Product] = []
+
+ for response in model_responses:
+ print(response.current_page, response.next_page_url)
+
+ products.append(
+ Product(**response.model_dump(), date=datetime.now(timezone.utc))
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/hello.py b/hello.py
deleted file mode 100644
index 36b8e05..0000000
--- a/hello.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import sys
-import os
-import requests
-
-from google import genai
-
-
-def main():
- key = os.getenv("API_KEY")
- client = genai.Client(api_key=key)
- # input = sys.argv[1]
-
- curl = requests.get("https://www.folderz.nl/")
- curl.raise_for_status()
-
- # print(curl.text)
-
- response = client.models.generate_content(model="gemini-2.0-flash", contents=input)
-
- print(response.text)
-
-
-if __name__ == "__main__":
- main()
diff --git a/pyproject.toml b/pyproject.toml
index 2a605be..c13fff9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,4 +6,6 @@ readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"google-genai>=1.2.0",
+ "pydantic>=2.10.6",
+ "requests>=2.32.3",
]
diff --git a/uv.lock b/uv.lock
index 631b974..d532dea 100644
--- a/uv.lock
+++ b/uv.lock
@@ -69,10 +69,16 @@ version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "google-genai" },
+ { name = "pydantic" },
+ { name = "requests" },
]
[package.metadata]
-requires-dist = [{ name = "google-genai", specifier = ">=1.2.0" }]
+requires-dist = [
+ { name = "google-genai", specifier = ">=1.2.0" },
+ { name = "pydantic", specifier = ">=2.10.6" },
+ { name = "requests", specifier = ">=2.32.3" },
+]
[[package]]
name = "google-auth"