Я пытаюсь парсить веб-сайт, используя следующее руководство: как парсить данные из магазина с помощью красивого супа
Однако я столкнулся с ошибкой, о которой не знаю. как решить.
Ниже мой код:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from time import sleep
# Create object for Chrome options
chrome_options = Options()
# Set Chrome driver options
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument('start-maximized')
chrome_options.add_argument(r'user-data-dir=C:\Users\behke\AppData\Local\Google\Chrome\User Data\Default')
chrome_options.add_argument("disable-infobars")
chrome_options.add_experimental_option("prefs", {
"profile.default_content_setting_values.notifications": 2
})
# Set the path to ChromeDriver executable
chromedriver_path = r'C:\Users\behke\Documents\chromedriver_win32\chromedriver.exe'
# Initialize the WebDriver with Chrome options
browser = webdriver.Chrome(executable_path=chromedriver_path)
# Define the base URL
base_url = 'https://shopee.com.my/search?keyword=foundation'
# Get the webpage
browser.get(base_url)
# Set a delay for waiting for elements to load
delay = 5
# Empty lists to store scraped data
item_cost, item_init_cost, item_loc = [], [], []
item_name, items_sold, discount_percent = [], [], []
# Main scraping loop
while True:
try:
WebDriverWait(browser, delay)
print("Page is ready")
sleep(5)
html = browser.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
soup = BeautifulSoup(html, "html.parser")
# Find item names
for item_n in soup.find_all('div', class_='text-base/5 truncate'):
item_name.append(item_n.text)
# Find item prices
for item_c in soup.find_all('span', class_='text-base/5 truncate'):
item_cost.append(item_c.text)
# Find initial item cost
for item_ic in soup.find_all('div', class_ = 'truncate flex items-baseline'):
item_init_cost.append(item_ic.text)
# Find total number of items sold/month
for items_s in soup.find_all('div', class_ = 'truncate text-shopee-black87 text-xs min-h-4 flex-shrink-1'):
items_sold.append(items_s.text)
# Find item discount percent
for dp in soup.find_all('div', class_ = 'text-shopee-primary font-medium bg-shopee-pink py-0.5 px-1 text-sp10/3 h-4 shrink-0 mr-1'):
discount_percent.append(dp.text)
# Find item location
for il in soup.find_all('class', class_ = 'ml-[3px] align-middle'):
item_loc.append(il.text)
break # Exit the loop once scraping is done
except TimeoutException:
print("Loading took too much time! - Try again")
# Once scraping is complete, you can further process or analyze the scraped data as needed
# Close the browser
browser.quit()
При запуске я получаю следующее сообщение об ошибке.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [7], in ()
22 chromedriver_path = r'C:\Users\behke\Documents\chromedriver_win32\chromedriver.exe'
24 # Initialize the WebDriver with Chrome options
---> 25 browser = webdriver.Chrome(executable_path=chromedriver_path)
27 # Define the base URL
28 base_url = 'https://shopee.com.my/search?keyword=foundation'
TypeError: __init__() got an unexpected keyword argument 'executable_path'
Что я делал ранее:
Я использую Chrome версии 123.0.6312.124 (официальная сборка) (64-разрядная версия). Я загрузил chromedriver, и двойной щелчок по chromedriver.exe показывает следующее на экране cmd.
Starting ChromeDriver 114.0.5735.90 (386bc09e8f4f2e025eddae123f36f6263096ae49-refs/branch-heads/5735@{#1052}) on port 9515
Only local connections are allowed.
Please see https://chromedriver.chromium.org/secur ... iderations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
Подробнее здесь: https://stackoverflow.com/questions/783 ... river-path
Ошибка веб-скрапинга — webdriver.Chrome(executable_path=chromedriver_path) ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение