Что я пробовал:
Я пытался выбрать элемент div, используя class_="paywall" и class_="styles_leadArticleText__etnRf".
Я добавил User-Agent в свои заголовки, чтобы убедиться, что меня не заблокировали.
Я использовалverify=False для обработки Проблемы с сертификатом SSL на моем локальном компьютере.
Проблема: переменнаяarticle_body постоянно возвращает None, что приводит к тому, что мой полный_текст имеет значение «Н/Д». Я подозреваю, что на веб-сайте используются динамические классы или структура, в которой я неправильно перемещаюсь.
Ссылка на веб-сайт: https://renewablesnow.com/news/india-bo ... 5-1287332/
Возникли проблемы с извлечением выделенного текста на прикрепленной фотографии.

Мой код:
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 4 12:25:27 2026
@author: hbasamh
"""
import requests
from bs4 import BeautifulSoup
import urllib3
import csv
import re
# This line hides the warning that you are connecting to an unverified site
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def scrape_renewables_article(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
try:
# ADD verify=False HERE
response = requests.get(url, headers=headers, verify=False)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# 1. Title
title = soup.find('h1').get_text(strip=True) if soup.find('h1') else "N/A"
# 2 & 3. Date and Time (extracted from the meta or specific time tag)
# RenewablesNow usually stores this in a or specific meta tag
date_element = soup.find('span', class_="pe-0 pe-sm-2 border-end col-sm-6 text-right-center-sm text-14-sm")
full_date = date_element.get_text(strip=True) if date_element else "N/A"
# Example split: "Dec 29, 2025 | 5:24:20 PM" -> you can split if needed
# 4. Author
author_element = soup.find('span', class_="text-decoration-underline text-dark c-p ms-1")
author = author_element.get_text(strip=True) if author_element else "N/A"
# 5. Article Text
article_body = soup.find('div', class_="d-flex flex-column align-items-center pt-4 mt-2 paywall") or soup.find('div', class_="styles_leadArticleText__etnRf text-just")
if article_body:
paragraphs = [p.get_text(strip=True) for p in article_body.find_all('p')]
full_text = " ".join(paragraphs)
else:
full_text = "N/A"
# 6, 7, 8. Sectors, Regions, and Topics
# These are usually found in sidebar tags or footer lists
# 1. Extract SECTORS
sectors = []
sector_label = soup.find(string=re.compile("Sector", re.I))
if sector_label:
# Find the container immediately following the "Sector" label
sectors_parent = sector_label.find_next("div", class_="mt-0 mt-xl-2 d-flex flex-row flex-wrap")
if sectors_parent:
for a in sectors_parent.find_all("a"):
h6 = a.find("h6")
if h6:
sectors.append(h6.get_text(strip=True))
print(f"Sectors: {sectors}")
# 2. Extract REGIONS
regions = []
region_label = soup.find(string=re.compile("Region/Country", re.I))
if region_label:
# Find the container immediately following the "Region/Country" label
regions_parent = region_label.find_next("div", class_="mt-0 mt-xl-2 d-flex flex-row flex-wrap")
if regions_parent:
for a in regions_parent.find_all("a"):
h6 = a.find("h6")
if h6:
regions.append(h6.get_text(strip=True))
print(f"Regions: {regions}")
# 3. Extract TOPICS
topics = []
topics_label = soup.find(string=re.compile("Topics", re.I))
if topics_label:
# Find the container immediately following the "Topics" label
topics_parent = topics_label.find_next("div", class_="mt-0 mt-xl-2 d-flex flex-row flex-wrap")
if topics_parent:
for a in topics_parent.find_all("a"):
h6 = a.find("h6")
if h6:
topics.append(h6.get_text(strip=True))
print(f"Topics: {topics}")
return {
'Title': title,
'Date_Time': full_date,
'Author': author,
'Content': full_text,
'Sectors': sectors,
'Regions': regions,
'Topics': topics,
'URL': url
}
except Exception as e:
print(f"Error scraping {url}: {e}")
return None
# --- Execution ---
urls = [
"https://renewablesnow.com/news/india-bo ... 5-1287332/"
]
data_list = []
for link in urls:
result = scrape_renewables_article(link)
if result:
data_list.append(result)
# Save to CSV
keys = data_list[0].keys()
with open('renewables_data.csv', 'w', newline='', encoding='utf-8') as f:
dict_writer = csv.DictWriter(f, fieldnames=keys)
dict_writer.writeheader()
dict_writer.writerows(data_list)
print("Data saved to renewables_data.csv")
Подробнее здесь: https://stackoverflow.com/questions/798 ... pite-using
Мобильная версия