Skip to content

Commit

Permalink
fix: Parse Qwerty restaurant from their website
Browse files Browse the repository at this point in the history
  • Loading branch information
pkoprda authored and jdobes committed Jan 27, 2025
1 parent 1b65029 commit e8e0a11
Showing 1 changed file with 53 additions and 83 deletions.
136 changes: 53 additions & 83 deletions api/restaurants/qwerty.py
Original file line number Diff line number Diff line change
@@ -1,98 +1,68 @@
from datetime import date, timedelta
# -*- coding: utf-8 -*-
import logging
import os

from playwright.sync_api import sync_playwright, expect
from datetime import date, timedelta
from unidecode import unidecode

from .utils import fetch_html

NAME = "Qwerty"
URL = "https://www.facebook.com/QwertyRestaurant"
URL = "https://qwerty-restaurant--catering3.webnode.cz/menu/"
GPS = (49.235655201154906, 16.573111921257983)

PLAYWRIGHT_PROXY = os.getenv("PLAYWRIGHT_PROXY", "")

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def parse_menu():
result = {}
last_monday = date.today() - timedelta(days=date.today().weekday())
with sync_playwright() as playwright:
kwargs = {
"viewport": {"width": 1080, "height": 1920},
"locale": "cs-CZ"
}
if PLAYWRIGHT_PROXY:
kwargs["proxy"] = {"server": PLAYWRIGHT_PROXY}
browser = playwright.firefox.launch(headless=True)
context = browser.new_context(**kwargs)
page = context.new_page()
page.goto(URL)

cookie_button = page.get_by_role("button", name="Odmítnout")
expect(cookie_button).to_be_visible()
cookie_button.click()

login_close_button = page.get_by_role("button", name="Zavřít")
expect(login_close_button).to_be_visible()
login_close_button.click()

# Wait for initial posts to load
page.wait_for_load_state("networkidle")

posts = page.locator("[data-ad-preview='message']").all()
logger.debug("Checking last %d posts.", len(posts))
for idx, post in enumerate(posts):
show_more_button = post.get_by_role("button", name="Zobrazit víc")
show_more_clicked = False
if show_more_button.is_visible():
show_more_clicked = True
show_more_button.click()
lines = post.locator("[style='text-align: start;']").all()
logger.debug("Post %d, show_more_clicked=%s, lines=%d", idx+1, show_more_clicked, len(lines))

current_date = None
weekly_section = False
weekly_offer = []
# Strip first line with dates
for line in lines:
txt = line.text_content()
day_identifier = unidecode(txt.lower())
if "pondeli" == day_identifier:
current_date = last_monday
elif "utery" == day_identifier:
current_date = last_monday + timedelta(days=1)
elif "streda" == day_identifier:
current_date = last_monday + timedelta(days=2)
elif "ctvrtek" == day_identifier:
current_date = last_monday + timedelta(days=3)
elif "patek" == day_identifier:
current_date = last_monday + timedelta(days=4)
elif ("tydenni" in day_identifier or "tydne" in day_identifier) and not "polevka" in day_identifier:
current_date = None
weekly_section = True
elif current_date and txt:
result.setdefault(current_date, []).append(txt)
elif weekly_section and txt:
weekly_offer.append(txt)

if weekly_offer:
for _, offer in result.items():
offer.append("")
offer.append("All week:")
offer.extend(weekly_offer)
def format_line(line):
return (
line.text.strip().replace("\n", " ").replace("1)", "\n1)").replace("Kč", "Kč\n")
)

if result:
logger.debug("Seems like post %d contains menu, skipping the rest.", idx+1)
break

#page.pause()
#page.screenshot(path="example.png")
browser.close()

if not result:
logger.debug("No menu found in posts.")
def parse_menu():
current_date = None
last_monday = date.today() - timedelta(days=date.today().weekday())
result = {}
weekly_section = False
weekly_offer = []
html = fetch_html(URL)
if html:
for line in html.find_all("div", {"class": "mt-i cf"}):
txt = format_line(line)
day_identifier = unidecode(txt.lower())

if "pondeli" in day_identifier:
current_date = last_monday
txt = txt.replace("PONDĚLÍ", "")
elif "utery" in day_identifier:
current_date = last_monday + timedelta(days=1)
txt = txt.replace("ÚTERÝ", "")
elif "streda" in day_identifier:
current_date = last_monday + timedelta(days=2)
txt = txt.replace("STŘEDA", "")
elif "ctvrtek" in day_identifier:
current_date = last_monday + timedelta(days=3)
txt = txt.replace("ČTVRTEK", "")
elif "patek" in day_identifier:
current_date = last_monday + timedelta(days=4)
txt = txt.replace("PÁTEK", "")
elif "tydenni" in day_identifier and not "polevka" in day_identifier:
current_date = None
weekly_section = True
txt = txt.replace("TÝDENNÍ MENU", "")

if current_date and txt:
result.setdefault(current_date, []).append(txt)
elif weekly_section and txt:
weekly_offer.append(txt)

if weekly_offer:
for _, offer in result.items():
offer.append("All week:")
offer.extend(weekly_offer)

if not result:
logger.error("No menu found.")

return result

Expand Down

0 comments on commit e8e0a11

Please sign in to comment.