from datetime import date from playwright.sync_api import sync_playwright import re def get_game_details(url): """ Scrapes game details (title, NSUID, price, original price, discount, release date, discount end date, description, fsId, image URL) from a Nintendo eShop webpage. Args: url (str): The URL of the Nintendo eShop game page. Returns: dict: A dictionary containing the extracted information, or None if the information cannot be found. """ game_data = {} with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() page.goto(url) try: # Extract price and discount (as before) price_selector = 'div.plm-price__main' page.wait_for_selector(price_selector) price_element = page.query_selector(price_selector) if price_element: full_price_text = price_element.inner_text().strip() price_match = re.search(r'(\d+,\d{2})', full_price_text) if price_match: game_data['price'] = price_match.group(1) discount_element = price_element.query_selector('span.plm-price__discount') game_data['is_discounted'] = bool(discount_element) if discount_element: discount_text = discount_element.inner_text().strip() discount_match = re.search(r'\(-\s*(\d+)%\)', discount_text) if discount_match: game_data['discount_percentage'] = int(discount_match.group(1)) original_price_selector = 'div.plm-price__original' original_price_element = page.locator(original_price_selector) if original_price_element.count() > 0: original_price_text = original_price_element.inner_text().strip() original_price_match = re.search(r'(\d+,\d{2})', original_price_text) if original_price_match: game_data['original_price'] = original_price_match.group(1) # Extract discount end date (as before) discount_end_date_selector = 'div.plm-price__disclaimer' discount_end_date_element = page.query_selector(discount_end_date_selector) if discount_end_date_element: discount_end_date_text = discount_end_date_element.inner_text().strip() date_match = re.search(r'jusqu\'au\s*:\s*(\d{2}/\d{2}/\d{4})', discount_end_date_text, re.IGNORECASE) if date_match: game_data['discount_end_date'] = date_match.group(1) # Extract data from window.dataLayer (as before) script_content = page.evaluate(""" () => { const scripts = document.querySelectorAll('script'); for (const script of scripts) { if (script.textContent.includes('window.dataLayer')) { return script.textContent; } } return null; } """) if script_content: title_match = re.search(r'"gameTitle":\s*"([^"]+)"', script_content) nsuid_match = re.search(r'"offdeviceNsuID":\s*"([^"]+)"', script_content) release_date_match = re.search(r'releaseDate:\s*"([^"]+)"', script_content) page_id_match = re.search(r'"pageID":\s*"([^"]+)"', script_content) if title_match: game_data['gameTitle'] = title_match.group(1) if nsuid_match: game_data['offdeviceNsuID'] = nsuid_match.group(1) if release_date_match and 'releaseDate' not in game_data: game_data['releaseDate'] = release_date_match.group(1) if page_id_match: game_data['fsId'] = page_id_match.group(1) # Extract description (as before) description_selector = 'div.col-xs-12.content' description_element = page.query_selector(description_selector) if description_element: paragraphs = description_element.query_selector_all('p') description_parts = [p.inner_text().strip() for p in paragraphs] game_data['description'] = "\n\n".join(description_parts) # Extract image URL from meta tags image_url = page.locator('meta[property="og:image"]').get_attribute('content') if image_url: game_data['image_url'] = image_url else: image_url = page.locator('meta[itemprop="image"]').get_attribute('content') if image_url: game_data['image_url'] = image_url sanitizedDatas = sanitizeScraperResults(game_data) return sanitizedDatas except Exception as e: print(f"An error occurred: {e}") return None finally: browser.close() def sanitizeScraperResults(game_data): if "original_price" in game_data: pass else: game_data['original_price'] = game_data['price'] if "discount_end_date" in game_data: pass else: game_data['discount_end_date'] = '24/12/2030' if "discount_percentage" in game_data: pass else: game_data['discount_percentage'] = "0" if "is_discounted" in game_data: pass else: game_data['is_discounted'] = False return game_data """ if __name__ == "__main__": url = "https://www.nintendo.com/fr-fr/Jeux/Jeux-Nintendo-Switch/Trinity-Trigger-2361693.html" game_data = get_game_details(url) if game_data: print("--- Game Details ---") for key, value in game_data.items(): print(f"{key}: {value}") else: print("Could not retrieve game details.") """