Файл CSV не генерирует в ожидаемой папке. Отпечатаны в окне терминала

Файл CSV не генерирует в ожидаемой папке. Отпечатаны в окне терминала ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Файл CSV не генерирует в ожидаемой папке. Отпечатаны в окне терминала

Цитата

Сообщение Anonymous » 26 янв 2025, 19:41

import requests
import pandas as pd
import numpy as np
import os
print("Saving file to:", os.getcwd())

# Define the URL
URL = "https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"

# Fetch the HTML content of the webpage
response = requests.get(URL)
if response.status_code == 200:
print("Successfully fetched the page!")
html_content = response.text
else:
print(f"Failed to fetch the page. Status code: {response.status_code}")
exit()

# Extract tables from the webpage
try:
tables = pd.read_html(html_content)
if not tables:
print("No tables found in the HTML content.")
exit()
except ValueError as e:
print(f"Error reading HTML tables: {e}")
exit()

# Inspect all extracted tables
for i, table in enumerate(tables):
print(f"Table {i}:")
print(table.head())
print("\n")

# Select the required table (adjust index if necessary)
df = tables[3]  # Replace 3 with the correct index if needed
print("Selected table:")
print(df.head())

# Dynamically rename and inspect columns
df.columns = range(df.shape[1])  # Replace headers with numerical indices
print("Columns after renaming:", df.columns)

# Handle missing columns dynamically
if 2 in df.columns:
df = df[[0, 2]]  # Select columns 0 and 2
else:
print("Column 2 not found. Available columns:", df.columns)
exit()

# Retain rows for the top 10 economies
df = df.iloc[1:11, :]

# Rename columns
df.columns = ['Country', 'GDP (Million USD)']

# Convert GDP from Million USD to Billion USD and round to 2 decimal places
df['GDP (Million USD)'] = df['GDP (Million USD)'].astype(float)
df['GDP (Million USD)'] = np.round(df['GDP (Million USD)'] / 1000, 2)

# Rename the column header to 'GDP (Billion USD)'
df.rename(columns={'GDP (Million USD)': 'GDP (Billion USD)'}, inplace=True)

# Save the DataFrame to a CSV file
df.to_csv(r"C:\Users\Path\Largest_economies.csv", index=False)

print("The top 10 economies by GDP have been saved to 'Largest_economies.csv'.")

Проверил текущий рабочий каталог с помощью os.getcwd(), явно установил путь к файлу в методе to_csv(), проверил разрешения на запись, сохранив его в более простых местах, таких как рабочий стол, завернув в to_csv( ) в блоке try-кроме для обнаружения ошибок и обеспечения правильной кодировки и существования папки для успешного создания файла

Подробнее здесь: https://stackoverflow.com/questions/793 ... nal-window

1737909701

Anonymous

[code]import requests
import pandas as pd
import numpy as np
import os
print("Saving file to:", os.getcwd())

# Define the URL
URL = "https://web.archive.org/web/20230902185326/https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29"

# Fetch the HTML content of the webpage
response = requests.get(URL)
if response.status_code == 200:
print("Successfully fetched the page!")
html_content = response.text
else:
print(f"Failed to fetch the page. Status code: {response.status_code}")
exit()

# Extract tables from the webpage
try:
tables = pd.read_html(html_content)
if not tables:
print("No tables found in the HTML content.")
exit()
except ValueError as e:
print(f"Error reading HTML tables: {e}")
exit()

# Inspect all extracted tables
for i, table in enumerate(tables):
print(f"Table {i}:")
print(table.head())
print("\n")

# Select the required table (adjust index if necessary)
df = tables[3]  # Replace 3 with the correct index if needed
print("Selected table:")
print(df.head())

# Dynamically rename and inspect columns
df.columns = range(df.shape[1])  # Replace headers with numerical indices
print("Columns after renaming:", df.columns)

# Handle missing columns dynamically
if 2 in df.columns:
df = df[[0, 2]]  # Select columns 0 and 2
else:
print("Column 2 not found. Available columns:", df.columns)
exit()

# Retain rows for the top 10 economies
df = df.iloc[1:11, :]

# Rename columns
df.columns = ['Country', 'GDP (Million USD)']

# Convert GDP from Million USD to Billion USD and round to 2 decimal places
df['GDP (Million USD)'] = df['GDP (Million USD)'].astype(float)
df['GDP (Million USD)'] = np.round(df['GDP (Million USD)'] / 1000, 2)

# Rename the column header to 'GDP (Billion USD)'
df.rename(columns={'GDP (Million USD)': 'GDP (Billion USD)'}, inplace=True)

# Save the DataFrame to a CSV file
df.to_csv(r"C:\Users\Path\Largest_economies.csv", index=False)

print("The top 10 economies by GDP have been saved to 'Largest_economies.csv'.")
[/code]
Проверил текущий рабочий каталог с помощью os.getcwd(), явно установил путь к файлу в методе to_csv(), проверил разрешения на запись, сохранив его в более простых местах, таких как рабочий стол, завернув в to_csv( ) в блоке try-кроме для обнаружения ошибок и обеспечения правильной кодировки и существования папки для успешного создания файла 

Подробнее здесь: [url]https://stackoverflow.com/questions/79388899/csv-file-not-generating-in-expected-folder-prints-out-in-terminal-window[/url]