я использую блокнот Jupyter.
код
импортировать pandas как pd
url='https://www.fdic.gov/bank-failures/failed-bank-list'
Код: Выделить всё
dfs=pd.read_html(url)
Код: Выделить всё
#**causing error**
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File /lib/python312.zip/urllib/request.py:1344, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1343 try:
-> 1344 h.request(req.get_method(), req.selector, req.data, headers,
1345 encode_chunked=req.has_header('Transfer-encoding'))
1346 except OSError as err: # timeout error
File /lib/python312.zip/http/client.py:1327, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1326 """Send a complete request to the server."""
-> 1327 self._send_request(method, url, body, headers, encode_chunked)
File /lib/python312.zip/http/client.py:1373, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1372 body = _encode(body, 'body')
-> 1373 self.endheaders(body, encode_chunked=encode_chunked)
File /lib/python312.zip/http/client.py:1322, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1321 raise CannotSendHeader()
-> 1322 self._send_output(message_body, encode_chunked=encode_chunked)
File /lib/python312.zip/http/client.py:1081, in HTTPConnection._send_output(self, message_body, encode_chunked)
1080 del self._buffer[:]
-> 1081 self.send(msg)
1083 if message_body is not None:
1084
1085 # create a consistent interface to message_body
File /lib/python312.zip/http/client.py:1025, in HTTPConnection.send(self, data)
1024 if self.auto_open:
-> 1025 self.connect()
1026 else:
File /lib/python312.zip/http/client.py:1461, in HTTPSConnection.connect(self)
1459 "Connect to a host on a given (SSL) port."
-> 1461 super().connect()
1463 if self._tunnel_host:
File /lib/python312.zip/http/client.py:991, in HTTPConnection.connect(self)
990 sys.audit("http.client.connect", self, self.host, self.port)
--> 991 self.sock = self._create_connection(
992 (self.host,self.port), self.timeout, self.source_address)
993 # Might fail in OSs that don't implement TCP_NODELAY
File /lib/python312.zip/socket.py:852, in create_connection(address, timeout, source_address, all_errors)
851 if not all_errors:
--> 852 raise exceptions[0]
853 raise ExceptionGroup("create_connection failed", exceptions)
File /lib/python312.zip/socket.py:837, in create_connection(address, timeout, source_address, all_errors)
836 sock.bind(source_address)
--> 837 sock.connect(sa)
838 # Break explicitly a reference cycle
OSError: [Errno 23] Host is unreachable
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
Cell In[9], line 2
1 url='https://www.fdic.gov/bank-failures/failed-bank-list'
----> 2 dfs=pd.read_html(url)
File /lib/python3.12/site-packages/pandas/io/html.py:1246, in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only, extract_links, dtype_backend, storage_options)
1230 if isinstance(io, str) and not any(
1231 [
1232 is_file_like(io),
(...)
1236 ]
1237 ):
1238 warnings.warn(
1239 "Passing literal html to 'read_html' is deprecated and "
1240 "will be removed in a future version. To read from a "
(...)
1243 stacklevel=find_stack_level(),
1244 )
-> 1246 return _parse(
1247 flavor=flavor,
1248 io=io,
1249 match=match,
1250 header=header,
1251 index_col=index_col,
1252 skiprows=skiprows,
1253 parse_dates=parse_dates,
1254 thousands=thousands,
1255 attrs=attrs,
1256 encoding=encoding,
1257 decimal=decimal,
1258 converters=converters,
1259 na_values=na_values,
1260 keep_default_na=keep_default_na,
1261 displayed_only=displayed_only,
1262 extract_links=extract_links,
1263 dtype_backend=dtype_backend,
1264 storage_options=storage_options,
1265 )
File /lib/python3.12/site-packages/pandas/io/html.py:989, in _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, storage_options, **kwargs)
978 p = parser(
979 io,
980 compiled_match,
(...)
985 storage_options,
986 )
988 try:
--> 989 tables = p.parse_tables()
990 except ValueError as caught:
991 # if `io` is an io-like object, check if it's seekable
992 # and try to rewind it before trying the next parser
993 if hasattr(io, "seekable") and io.seekable():
File /lib/python3.12/site-packages/pandas/io/html.py:249, in _HtmlFrameParser.parse_tables(self)
241 def parse_tables(self):
242 """
243 Parse and return all tables from the DOM.
244
(...)
247 list of parsed (header, body, footer) tuples from tables.
248 """
--> 249 tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
250 return (self._parse_thead_tbody_tfoot(table) for table in tables)
File /lib/python3.12/site-packages/pandas/io/html.py:812, in _LxmlFrameParser._build_doc(self)
810 pass
811 else:
--> 812 raise e
813 else:
814 if not hasattr(r, "text_content"):
File /lib/python3.12/site-packages/pandas/io/html.py:791, in _LxmlFrameParser._build_doc(self)
789 try:
790 if is_url(self.io):
--> 791 with get_handle(
792 self.io, "r", storage_options=self.storage_options
793 ) as f:
794 r = parse(f.handle, parser=parser)
795 else:
796 # try to parse the input in the simplest way
File /lib/python3.12/site-packages/pandas/io/common.py:728, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
725 codecs.lookup_error(errors)
727 # open URLs
--> 728 ioargs = _get_filepath_or_buffer(
729 path_or_buf,
730 encoding=encoding,
731 compression=compression,
732 mode=mode,
733 storage_options=storage_options,
734 )
736 handle = ioargs.filepath_or_buffer
737 handles: list[BaseBuffer]
File /lib/python3.12/site-packages/pandas/io/common.py:384, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
382 # assuming storage_options is to be interpreted as headers
383 req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options)
--> 384 with urlopen(req_info) as req:
385 content_encoding = req.headers.get("Content-Encoding", None)
386 if content_encoding == "gzip":
387 # Override compression based on Content-Encoding header
File /lib/python3.12/site-packages/pandas/io/common.py:289, in urlopen(*args, **kwargs)
283 """
284 Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
285 the stdlib.
286 """
287 import urllib.request
--> 289 return urllib.request.urlopen(*args, **kwargs)
File /lib/python312.zip/urllib/request.py:215, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
213 else:
214 opener = _opener
--> 215 return opener.open(url, data, timeout)
File /lib/python312.zip/urllib/request.py:515, in OpenerDirector.open(self, fullurl, data, timeout)
512 req = meth(req)
514 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 515 response = self._open(req, data)
517 # post-process response
518 meth_name = protocol+"_response"
File /lib/python312.zip/urllib/request.py:532, in OpenerDirector._open(self, req, data)
529 return result
531 protocol = req.type
--> 532 result = self._call_chain(self.handle_open, protocol, protocol +
533 '_open', req)
534 if result:
535 return result
File /lib/python312.zip/urllib/request.py:492, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
490 for handler in handlers:
491 func = getattr(handler, meth_name)
--> 492 result = func(*args)
493 if result is not None:
494 return result
File /lib/python312.zip/urllib/request.py:1392, in HTTPSHandler.https_open(self, req)
1391 def https_open(self, req):
-> 1392 return self.do_open(http.client.HTTPSConnection, req,
1393 context=self._context)
File /lib/python312.zip/urllib/request.py:1347, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1344 h.request(req.get_method(), req.selector, req.data, headers,
1345 encode_chunked=req.has_header('Transfer-encoding'))
1346 except OSError as err: # timeout error
-> 1347 raise URLError(err)
1348 r = h.getresponse()
1349 except:
URLError:
Подробнее здесь: https://stackoverflow.com/questions/793 ... -in-pandas