1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
import os
import requests
import time
import logging
from datetime import datetime, timezone, timedelta
from sqlalchemy import DateTime, Float, String, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, Session
from pydantic import BaseModel
from google import genai
LOG = logging.getLogger(__name__)
class ModelProductResponse(BaseModel):
name: str
store: str
price: float
expires_in_days: int
current_page: int
next_page_url: str
class Base(DeclarativeBase):
pass
class Discount(Base):
__tablename__ = "discount"
id: Mapped[int] = mapped_column(primary_key=True)
product: Mapped[str] = mapped_column(String(255))
store: Mapped[str] = mapped_column(String(255))
price: Mapped[float] = mapped_column(Float)
since: Mapped[datetime] = mapped_column(DateTime)
until: Mapped[datetime] = mapped_column(DateTime)
def fetch_page_data(client: genai.Client, url: str) -> list[ModelProductResponse]:
curl = requests.get(url)
curl.raise_for_status()
prompt = f"Parse HTML input into JSON and lowercase all names.\n\n{curl.text}"
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
config={
"response_mime_type": "application/json",
"response_schema": list[ModelProductResponse],
},
)
return response.parsed
def main():
engine = create_engine("mysql+pymysql://folderz:folderz@localhost:3306/folderz")
Base.metadata.create_all(engine)
key = os.getenv("API_KEY")
client = genai.Client(api_key=key)
base_url = "https://www.folderz.nl"
url = base_url + "/populaire-aanbiedingen"
while url:
discounts: list[Discount] = []
with Session(engine) as session:
LOG.info(f"querying {url}")
for response in fetch_page_data(client, url):
url = base_url + response.next_page_url
now = datetime.now(timezone.utc)
discount = Discount(
product=response.name,
store=response.store,
price=response.price,
since=now,
until=now + timedelta(days=response.expires_in_days),
)
discounts.append(discount)
session.add_all(discounts)
session.commit()
time.sleep(1)
if __name__ == "__main__":
main()
|