import requests
import pandas as pd
import numpy as np
import os
print("Saving file to:", os.getcwd())
# Define the URL
URL = "https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"
# Fetch the HTML content of the webpage
response = requests.get(URL)
if response.status_code == 200:
print("Successfully fetched the page!")
html_content = response.text
else:
print(f"Failed to fetch the page. Status code: {response.status_code}")
exit()
# Extract tables from the webpage
try:
tables = pd.read_html(html_content)
if not tables:
print("No tables found in the HTML content.")
exit()
except ValueError as e:
print(f"Error reading HTML tables: {e}")
exit()
# Inspect all extracted tables
for i, table in enumerate(tables):
print(f"Table {i}:")
print(table.head())
print("\n")
# Select the required table (adjust index if necessary)
df = tables[3] # Replace 3 with the correct index if needed
print("Selected table:")
print(df.head())
# Dynamically rename and inspect columns
df.columns = range(df.shape[1]) # Replace headers with numerical indices
print("Columns after renaming:", df.columns)
# Handle missing columns dynamically
if 2 in df.columns:
df = df[[0, 2]] # Select columns 0 and 2
else:
print("Column 2 not found. Available columns:", df.columns)
exit()
# Retain rows for the top 10 economies
df = df.iloc[1:11, :]
# Rename columns
df.columns = ['Country', 'GDP (Million USD)']
# Convert GDP from Million USD to Billion USD and round to 2 decimal places
df['GDP (Million USD)'] = df['GDP (Million USD)'].astype(float)
df['GDP (Million USD)'] = np.round(df['GDP (Million USD)'] / 1000, 2)
# Rename the column header to 'GDP (Billion USD)'
df.rename(columns={'GDP (Million USD)': 'GDP (Billion USD)'}, inplace=True)
# Save the DataFrame to a CSV file
df.to_csv(r"C:\Users\Path\Largest_economies.csv", index=False)
print("The top 10 economies by GDP have been saved to 'Largest_economies.csv'.")
Проверил текущий рабочий каталог с помощью os.getcwd(), явно установил путь к файлу в методе to_csv(), проверил разрешения на запись, сохранив его в более простых местах, таких как рабочий стол, завернув в to_csv( ) в блоке try-кроме для обнаружения ошибок и обеспечения правильной кодировки и существования папки для успешного создания файла
[code]import requests import pandas as pd import numpy as np import os print("Saving file to:", os.getcwd())
# Define the URL URL = "https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"
# Fetch the HTML content of the webpage response = requests.get(URL) if response.status_code == 200: print("Successfully fetched the page!") html_content = response.text else: print(f"Failed to fetch the page. Status code: {response.status_code}") exit()
# Extract tables from the webpage try: tables = pd.read_html(html_content) if not tables: print("No tables found in the HTML content.") exit() except ValueError as e: print(f"Error reading HTML tables: {e}") exit()
# Inspect all extracted tables for i, table in enumerate(tables): print(f"Table {i}:") print(table.head()) print("\n")
# Select the required table (adjust index if necessary) df = tables[3] # Replace 3 with the correct index if needed print("Selected table:") print(df.head())
# Dynamically rename and inspect columns df.columns = range(df.shape[1]) # Replace headers with numerical indices print("Columns after renaming:", df.columns)
# Handle missing columns dynamically if 2 in df.columns: df = df[[0, 2]] # Select columns 0 and 2 else: print("Column 2 not found. Available columns:", df.columns) exit()
# Retain rows for the top 10 economies df = df.iloc[1:11, :]
# Convert GDP from Million USD to Billion USD and round to 2 decimal places df['GDP (Million USD)'] = df['GDP (Million USD)'].astype(float) df['GDP (Million USD)'] = np.round(df['GDP (Million USD)'] / 1000, 2)
# Rename the column header to 'GDP (Billion USD)' df.rename(columns={'GDP (Million USD)': 'GDP (Billion USD)'}, inplace=True)
# Save the DataFrame to a CSV file df.to_csv(r"C:\Users\Path\Largest_economies.csv", index=False)
print("The top 10 economies by GDP have been saved to 'Largest_economies.csv'.") [/code] Проверил текущий рабочий каталог с помощью os.getcwd(), явно установил путь к файлу в методе to_csv(), проверил разрешения на запись, сохранив его в более простых местах, таких как рабочий стол, завернув в to_csv( ) в блоке try-кроме для обнаружения ошибок и обеспечения правильной кодировки и существования папки для успешного создания файла