Код: Выделить всё
def parse(self, response):
items = []
for item in response.xpath("//div[@class='item']"):
try:
hnwi_title = item.xpath(".//h3/text()").get().strip()
demographics_text = item.xpath(".//span[@class='Demographics']/text()").get().strip()
if hnwi_title and demographics_text:
item_data = {
'HNWI_Title': hnwi_title,
'Demographics': demographics_text
}
items.append(item_data)
self.logger.debut(f'Extracted item: {item_data}')
except Exception as e:
self.logger.error(f'Error parsing item: {e}')
if items:
self.logger.info(f'Total items extracted: {len(items)}')
else:
self.logger.warning(f'No items extracted from {response.url}')
yield items
next_page = response.xpath("//a[@class = 'next -page']/@href").get()
if next_page:
yield scrapy.Request(response.urljoin(next_page), callback=self.parse)
Подробнее здесь: https://stackoverflow.com/questions/790 ... b-scraping