Код: Выделить всё
def get_tables(url):
res = requests.get(url)
comm = re.compile("")
soup = BeautifulSoup(comm.sub("",res.text),'lxml')
all_tables = soup.findAll("tbody")
team_table = all_tables[0]
player_table = all_tables[1]
return player_table, team_table
def get_frame(features, player_table):
pre_df_player = dict()
features_wanted_player = features
rows_player = player_table.find_all('tr')
for row in rows_player:
if(row.find('th',{"scope":"row"}) != None):
for f in features_wanted_player:
cell = row.find("td",{"data-stat": f})
a = cell.text.strip().encode()
text=a.decode("utf-8")
if(text == ''):
text = '0'
if((f!='player')&(f!='nationality')&(f!='position')&(f!='squad')&(f!='age')&(f!='birth_year')):
text = float(text.replace(',',''))
if f in pre_df_player:
pre_df_player[f].append(text)
else:
pre_df_player[f] = [text]
df_player = pd.DataFrame.from_dict(pre_df_player)
return df_player
def frame_for_category(category,top,end,features):
url = (top + category + end)
player_table, team_table = get_tables(url)
df_player = get_frame(features, player_table)
return df_player
def get_outfield_data(top, end):
df1 = frame_for_category('stats',top,end,stats)
...
return df
df_2018 = get_outfield_data('https://fbref.com/en/comps/Big5/2017-2018/','/players/2017-2018-Big-5-European-Leagues-Stats')
df_2018["player"] = df_2018["player"] + ', 2017-18'
...
df.head()
Код: Выделить всё
Cell In\[19\], line 2
1 player_table, team_table = get_tables('https://fbref.com/en/comps/Big5/2017-2018/stats/players/2017-2018-Big-5-European-Leagues-Stats')
\----\> 2 df_player = get_frame(stats, player_table)
3 pintf(df_player)
Cell In\[18\], line 21, in get_frame(features, player_table)
19 for f in features_wanted_player:
20 cell = row.find("td",{"data-stat": f})
\---\> 21 a = cell.text.strip().encode()
22 text=a.decode("utf-8")
23 if(text == ''):
AttributeError: 'NoneType' object has no attribute 'text'
Я возобновил код, но вы можете найти его на этой странице git: https://github.com/victorballesteros8/f ... /tree/main
Я попробовал использовать код в IDE и отладить его, чтобы выяснить, в чем причина проблемы, но из-за такого большого количества данных их трудно найти.
Подробнее здесь: https://stackoverflow.com/questions/772 ... crape-data