Парсер на Python возвращает пустой список (я думаю, это проблема выбора класса HTML)

Парсер на Python возвращает пустой список (я думаю, это проблема выбора класса HTML) ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Парсер на Python возвращает пустой список (я думаю, это проблема выбора класса HTML)

Цитата

Сообщение Anonymous » 19 янв 2025, 15:38

Идея такова: я хочу собрать название квартиры и ее цену в виде списка для каждой квартиры на веб-сайте.
Я сделал простой парсер на Python, но, похоже, я не могу получить никаких значений, так как он возвращает пустой список.
Мое предположение: я просто не могу найти правильный класс/контейнер, содержащий эту информацию, поэтому он возвращает пустой список.

Код: Выделить всё

# Importing selenium, CSV, and time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
import csv
import time
from webdriver_manager.chrome import ChromeDriverManager

# Running the browser in the background without GPU and Sandbox
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# Using Service and CDM to specify the driver path
service = Service(ChromeDriverManager().install())

# Initializing the driver
driver = webdriver.Chrome(service=service)

# Opening the developer's URL
print("Opening the page...")
driver.get('https://etalongroup.ru/msk/object/voxhall/')
print("The page is opened.")

# Delay for the page to fully load
time.sleep(30)

# Getting the HTML
page_source = driver.page_source

# Closing the driver
driver.quit()

# Parsing HTML with bs4
soup = BeautifulSoup(page_source, 'html.parser')

# List with apartment data
apartments = []

# Searching for prices in  text-scarlet
price_elements = soup.find_all('span', class_='th-h4 text-scarlet')

# Searching for titles in  'aria-label'
title_elements = soup.find_all('div', {'aria-label': True})

# Collecting data
for price_element, title_element in zip(price_elements, title_elements):
price = price_element.text.strip()
title = title_element['aria-label'].strip()
apartments.append({'Title': title, 'Price': price})

print(apartments)

# Script completion message
print("The script has finished executing.")

Я ожидаю взамен список или словарь, который будет проходить через веб-сайт и собирать данные [n123-30 000 000] и т. д. для каждого представленного объекта

Подробнее здесь: https://stackoverflow.com/questions/793 ... ction-issu

1737290325

Anonymous

Идея такова: я хочу собрать название квартиры и ее цену в виде списка для каждой квартиры на веб-сайте.
Я сделал простой парсер на Python, но, похоже, я не могу получить никаких значений, так как он возвращает пустой список.
Мое предположение: я просто не могу найти правильный класс/контейнер, содержащий эту информацию, поэтому он возвращает пустой список.[code]# Importing selenium, CSV, and time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
import csv
import time
from webdriver_manager.chrome import ChromeDriverManager

# Running the browser in the background without GPU and Sandbox
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')

# Using Service and CDM to specify the driver path
service = Service(ChromeDriverManager().install())

# Initializing the driver
driver = webdriver.Chrome(service=service)

# Opening the developer's URL
print("Opening the page...")
driver.get('https://etalongroup.ru/msk/object/voxhall/')
print("The page is opened.")

# Delay for the page to fully load
time.sleep(30)

# Getting the HTML
page_source = driver.page_source

# Closing the driver
driver.quit()

# Parsing HTML with bs4
soup = BeautifulSoup(page_source, 'html.parser')

# List with apartment data
apartments = []

# Searching for prices in  text-scarlet
price_elements = soup.find_all('span', class_='th-h4 text-scarlet')

# Searching for titles in  'aria-label'
title_elements = soup.find_all('div', {'aria-label': True})

# Collecting data
for price_element, title_element in zip(price_elements, title_elements):
price = price_element.text.strip()
title = title_element['aria-label'].strip()
apartments.append({'Title': title, 'Price': price})

print(apartments)

# Script completion message
print("The script has finished executing.")
[/code]
Я ожидаю взамен список или словарь, который будет проходить через веб-сайт и собирать данные [n123-30 000 000] и т. д. для каждого представленного объекта 

Подробнее здесь: [url]https://stackoverflow.com/questions/79367570/parser-on-python-returns-an-empty-list-i-guess-its-an-html-class-selection-issu[/url]