https://www.sofascore.com/
Я создаю этот тестовый код перед использованием на устройстве E2 в своем плагине
Код: Выделить всё
# python3 -m venv venv
# source venv/bin/activate
# pip install playwright
# playwright install
import json
import codecs
import os
import asyncio
from datetime import date
# Import the main asynchronous Playwright API
from playwright.async_api import async_playwright
# --- Configuration ---
# Use os.path.expanduser('~') for a portable path
sofa_debug_path = os.path.expanduser("~/Desktop/sofascore_data.json")
# --- Main Asynchronous Logic ---
async def fetch_and_save_data():
"""
Launches a full browser (Playwright) to make the request, bypassing
advanced anti-bot measures that block simple 'requests' scripts.
"""
# 1. Setup API URL
today_iso = date.today().isoformat()
# Note: We must fetch the API URL using the browser context
url = 'https://api.sofascore.com/api/v1/sport/football/scheduled-events/{0}'.format(today_iso)
print(f"Starting browser automation to fetch data for: {today_iso}")
print(f"Target URL: {url}")
try:
async with async_playwright() as p:
# 2. Launch the browser (headless=True means it runs invisibly)
browser = await p.chromium.launch(headless=True)
context = await browser.new_context()
page = await context.new_page()
# 3. CRITICAL STEP: Warm up the session by visiting the main site first.
# This allows the browser to run JS and collect necessary cookies/tokens
# that the API call expects.
print("Warming up browser context on sofascore.com...")
await page.goto("https://www.sofascore.com/", wait_until="networkidle", timeout=30000)
# 4. Navigate directly to the API endpoint within the authenticated context
print("Fetching API data using the warm context...")
response = await page.goto(url, wait_until="load", timeout=10000)
# 5. Check response status
if response.status != 200:
# If the status is not 200, we print the status and stop.
await browser.close()
print(f"Error fetching data: Status Code {response.status} from {url}")
print("The browser-based request was blocked or the URL is incorrect.")
return
# 6. Extract the content (which is the JSON response)
data_str = await page.content()
# Playwright returns the HTML body content. Since this is an API call,
# the raw response text is what we need, which can be accessed via
# response.text() if we fetched it differently, but page.content() might
# return the text wrapped in
tags. Let's try to parse the content
# after stripping HTML.
# Since the API response is raw JSON, page.content() will likely return the JSON string
# wrapped in tags (like {...}). We need the raw text.
raw_text = await response.text()
# 7. Close browser
await browser.close()
print("Browser closed.")
# 8. Parse the JSON string into a Python dictionary
data = json.loads(raw_text)
# 9. Extract and format data (same as before)
events = data.get('events', [])
print(f"Found {len(events)} events.")
formatted_data_str = json.dumps(events, indent=4, ensure_ascii=False)
# 10. Write the formatted data to the file
print(f"Writing data to: {sofa_debug_path}")
with codecs.open(sofa_debug_path, "w", encoding="utf-8") as f:
f.write(formatted_data_str)
print("\nSuccessfully fetched and saved data using Playwright!")
except Exception as e:
print(f"An error occurred during Playwright execution: {e}")
# If the browser is open due to an error, ensure it is closed gracefully
try:
if 'browser' in locals() and browser:
await browser.close()
except Exception:
pass
# Execute the asynchronous function
if __name__ == "__main__":
asyncio.run(fetch_and_save_data())
Пример:
эти два совпадения я не могу найти в json-файле
Код: Выделить всё
Albania
Kategoria Superiore
18:00-
Egnatia
Vllaznia
18:00-
Vora
Partizani
Подробнее здесь: https://stackoverflow.com/questions/798 ... ascore-com
Мобильная версия