Я пытаюсь удалить данные с этого сайта
https://www.eurobasket.com/Basketball-B ... 84-Lebanon
На веб-сайте есть две таблицы:
но данные, отображаемые в строке таблиц, отличаются от данных в исходном коде html (после выполнения элемента проверки).
например, это данные для первой строки:
SMdRl-XIuQ, zRij
45
4-9 (38.7%)
0-9 (96.3%)
5-5 (5%)
5
6
6
1
6
5
5
6
5
8
86
5
5
но имя игрока Жан Абдель-Нур, а не SMdRl-XIuQ, zRij и тому подобное по цифрам.
Я пробовал селен, но не помогло
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = "https://www.eurobasket.com/Basketball-B ... 84-Lebanon"
df = extract_box_score_from_url(url)
df
Можете ли вы помочь мне найти способ очистить эти данные? Я попробовал Селен
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = "https://www.eurobasket.com/Basketball-B ... 84-Lebanon"
df = extract_box_score_from_url(url)
df
Подробнее здесь: https://stackoverflow.com/questions/793 ... -on-screen
HTML в элементе проверки отличается от отображаемого на экране. ⇐ Html
Программисты Html
-
Anonymous
1736051820
Anonymous
Я пытаюсь удалить данные с этого сайта
https://www.eurobasket.com/Basketball-Box-Score.aspx?Game=2009_1211_2563_2684-Lebanon
На веб-сайте есть две таблицы:
но данные, отображаемые в строке таблиц, отличаются от данных в исходном коде html (после выполнения элемента проверки).
например, это данные для первой строки:
[url=https://basketball.asia-basket.com/player/Jean-Abdel-Nour/45278]SMdRl-XIuQ, zRij[/url]
45
4-9 (38.7%)
0-9 (96.3%)
5-5 (5%)
5
6
6
1
6
5
5
6
5
8
86
5
5
но имя игрока Жан Абдель-Нур, а не SMdRl-XIuQ, zRij и тому подобное по цифрам.
Я пробовал селен, но не помогло
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = "https://www.eurobasket.com/Basketball-Box-Score.aspx?Game=2009_1211_2563_2684-Lebanon"
df = extract_box_score_from_url(url)
df
Можете ли вы помочь мне найти способ очистить эти данные? Я попробовал Селен
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = "https://www.eurobasket.com/Basketball-Box-Score.aspx?Game=2009_1211_2563_2684-Lebanon"
df = extract_box_score_from_url(url)
df
Подробнее здесь: [url]https://stackoverflow.com/questions/79330149/html-in-the-inspect-element-is-different-that-the-one-displayed-on-screen[/url]
Ответить
1 сообщение
• Страница 1 из 1
Перейти
- Кемерово-IT
- ↳ Javascript
- ↳ C#
- ↳ JAVA
- ↳ Elasticsearch aggregation
- ↳ Python
- ↳ Php
- ↳ Android
- ↳ Html
- ↳ Jquery
- ↳ C++
- ↳ IOS
- ↳ CSS
- ↳ Excel
- ↳ Linux
- ↳ Apache
- ↳ MySql
- Детский мир
- Для души
- ↳ Музыкальные инструменты даром
- ↳ Печатная продукция даром
- Внешняя красота и здоровье
- ↳ Одежда и обувь для взрослых даром
- ↳ Товары для здоровья
- ↳ Физкультура и спорт
- Техника - даром!
- ↳ Автомобилистам
- ↳ Компьютерная техника
- ↳ Плиты: газовые и электрические
- ↳ Холодильники
- ↳ Стиральные машины
- ↳ Телевизоры
- ↳ Телефоны, смартфоны, плашеты
- ↳ Швейные машинки
- ↳ Прочая электроника и техника
- ↳ Фототехника
- Ремонт и интерьер
- ↳ Стройматериалы, инструмент
- ↳ Мебель и предметы интерьера даром
- ↳ Cантехника
- Другие темы
- ↳ Разное даром
- ↳ Давай меняться!
- ↳ Отдам\возьму за копеечку
- ↳ Работа и подработка в Кемерове
- ↳ Давай с тобой поговорим...
Мобильная версия