Возможно, вам придется установить необходимые зависимости с помощью команды pip.
- запросы на установку pip
- pip install bs4
- pip install selenium
- pip install pandas
- pip install openpyxl
- pip install xlsxwriter
Код: Выделить всё
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import pandas as pd
url = 'https://en.wikipedia.org/wiki/List_of_largest_companies_in_the_United_States_by_revenue'
page = requests.get(url)
soup = BeautifulSoup(page.text, "html.parser")
table = soup.find_all('table')[0] # You can set index for table 0 or 1 or 2 as in webpage there are total 3 tables & having same classname so far.
#print(soup)
world_titles = table.find_all('th')
word_table_titles = [title.text.strip() for title in world_titles]
#print(word_table_titles)
df = pd.DataFrame(columns = word_table_titles)
collumn_data = table.find_all('tr')
for row in collumn_data[1:]:
row_data = row.find_all('td')
indivisualRowData = [data.text.strip() for data in row_data]
lenght = len(df)
df.loc[lenght] = indivisualRowData
#print(indivisualRowData)
from datetime import datetime
#current_working_directory = os.getcwd()
#print(current_working_directory)
#df.to_xlsx(r'/storage/emulated/0/Python Programming',index = False)
filename = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
with pd.ExcelWriter( filename + ' Output.xlsx') as writer:
df.to_excel(writer, index = False)
Подробнее здесь: https://stackoverflow.com/questions/791 ... excel-file