Парсинг веб-страниц / Парсинг веб-страниц Zomato с помощью BeautifulSoupPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Парсинг веб-страниц / Парсинг веб-страниц Zomato с помощью BeautifulSoup

Сообщение Anonymous »

Я попробовал парсинг веб-страниц со ссылкой на https://datascienceplus.com/zomato-web- ... in-python/
Просто скопировал и вставил код на сайт, но получаю ошибку на втором шаге.
import requests
from bs4 import BeautifulSoup

#Used headers/agent because the request was timed out and asking for an agent.
#Using following code we can fake the agent.
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
response = requests.get("https://www.zomato.com/bangalore/top-re ... rs=headers)

Ниже приведен код ошибки.
Я использовал ноутбук Jupiter. Знаете, почему я это получаю?
Я совершенно новичок в этом и даже не до конца понимаю, что делают эти переменные (заголовки, ответ).
---------------------------------------------------------------------------
RemoteDisconnected Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:

~/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:

~/anaconda3/lib/python3.7/http/client.py in getresponse(self)
1320 try:
-> 1321 response.begin()
1322 except ConnectionError:

~/anaconda3/lib/python3.7/http/client.py in begin(self)
295 while True:
--> 296 version, status, reason = self._read_status()
297 if status != CONTINUE:

~/anaconda3/lib/python3.7/http/client.py in _read_status(self)
264 # sending a valid response.
--> 265 raise RemoteDisconnected("Remote end closed connection without"
266 " response")

RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred:

ProtocolError Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()

~/anaconda3/lib/python3.7/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
367 if read is False or not self._is_method_retryable(method):
--> 368 raise six.reraise(type(error), error, _stacktrace)
369 elif read is not None:

~/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:

~/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:

~/anaconda3/lib/python3.7/http/client.py in getresponse(self)
1320 try:
-> 1321 response.begin()
1322 except ConnectionError:

~/anaconda3/lib/python3.7/http/client.py in begin(self)
295 while True:
--> 296 version, status, reason = self._read_status()
297 if status != CONTINUE:

~/anaconda3/lib/python3.7/http/client.py in _read_status(self)
264 # sending a valid response.
--> 265 raise RemoteDisconnected("Remote end closed connection without"
266 " response")

ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

During handling of the above exception, another exception occurred:

ConnectionError Traceback (most recent call last)
in
2 #Using following code we can fake the agent.
3 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
----> 4 response = requests.get("https://www.zomato.com/bangalore/top-re ... rs=headers)

~/anaconda3/lib/python3.7/site-packages/requests/api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77

~/anaconda3/lib/python3.7/site-packages/requests/api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62

~/anaconda3/lib/python3.7/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp

~/anaconda3/lib/python3.7/site-packages/requests/sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)

~/anaconda3/lib/python3.7/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
496
497 except (ProtocolError, socket.error) as err:
--> 498 raise ConnectionError(err, request=request)
499
500 except MaxRetryError as e:

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Подробнее здесь: https://stackoverflow.com/questions/680 ... utifulsoup
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение
  • Интернет -соскабливание / Zomato Web Scraping с BeautifulSoup
    Anonymous » » в форуме Python
    0 Ответы
    4 Просмотры
    Последнее сообщение Anonymous
  • Парсинг веб-страниц с помощью Python BeautifulSoup в Spyder IDE
    Anonymous » » в форуме Python
    0 Ответы
    28 Просмотры
    Последнее сообщение Anonymous
  • Парсинг веб-страниц с помощью Python BeautifulSoup в Spyder IDE
    Anonymous » » в форуме Python
    0 Ответы
    12 Просмотры
    Последнее сообщение Anonymous
  • Парсинг веб-страниц с помощью Python BeautifulSoup в Spyder IDE
    Anonymous » » в форуме Python
    0 Ответы
    18 Просмотры
    Последнее сообщение Anonymous
  • Парсинг веб-страниц с помощью Python BeautifulSoup в Spyder IDE
    Anonymous » » в форуме Python
    0 Ответы
    15 Просмотры
    Последнее сообщение Anonymous

Вернуться в «Python»