(ОШИБКА) Выберите один объект и все float и int в группе pandas.Python

Программы на Python
Ответить
Anonymous
 (ОШИБКА) Выберите один объект и все float и int в группе pandas.

Сообщение Anonymous »

У меня есть этот фрейм данных.

Код: Выделить всё

import pandas as pd

x = {
"year": ["2012", "2012", "2013", "2014", "2012", "2014", "2013", "2013", "2012", "2013", "2012", "2014", "2014", "2013", "2012", "2014"],
"class": ["A", "B", "C", "A", "C", "B", "B", "C", "A", "C", "B", "C", "A", "C", "B", "A"],
"gender": ["M", "F", "F", "M", "F", "M", "M", "F", "F", "F", "M", "M", "F", "M", "F", "F"],
"score1": ["6", "6", "8", "10", "6", "7", "6", "7", "8", "7", "10", "9", "9", "9", "8", "9"],
"score2": ["5", "9", "10", "5", "10", "9", "5", "7", "8", "9", "8", "8", "5", "5", "8", "5"],
"score3": ["5", "9", "9", "7", "8", "5", "9", "5", "7", "6", "5", "10", "8", "8", "6", "8"],
"score4": ["10", "8", "8", "10", "9", "8", "10", "9", "7", "8", "10", "9", "7", "7", "10", "7"]
}

data = pd.DataFrame(x)
Изображение
Я хочу найти медиану для каждого столбца с помощью dtypes = 'int64'. Затем я группирую столбцы класса в своем df.

Код: Выделить всё

data.groupby('class').median()
Но на нем отображается ошибка.

Код: Выделить всё

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1490, in GroupBy._cython_agg_general..array_func(values)
1489 try:
-> 1490     result = self.grouper._cython_operation(
1491         "aggregate",
1492         values,
1493         how,
1494         axis=data.ndim - 1,
1495         min_count=min_count,
1496         **kwargs,
1497     )
1498 except NotImplementedError:
1499     # generally if we have numeric_only=False
1500     # and non-applicable functions
1501     # try to python agg
1502     # TODO: shouldn't min_count matter?

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:959, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs)
958 ngroups = self.ngroups
--> 959 return cy_op.cython_operation(
960     values=values,
961     axis=axis,
962     min_count=min_count,
963     comp_ids=ids,
964     ngroups=ngroups,
965     **kwargs,
966 )

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:657, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs)
649     return self._ea_wrap_cython_operation(
650         values,
651         min_count=min_count,
(...)
654         **kwargs,
655     )
--> 657 return self._cython_op_ndim_compat(
658     values,
659     min_count=min_count,
660     ngroups=ngroups,
661     comp_ids=comp_ids,
662     mask=None,
663     **kwargs,
664 )

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:497, in WrappedCythonOp._cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
495     return res.T
--> 497 return self._call_cython_op(
498     values,
499     min_count=min_count,
500     ngroups=ngroups,
501     comp_ids=comp_ids,
502     mask=mask,
503     result_mask=result_mask,
504     **kwargs,
505 )

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:541, in WrappedCythonOp._call_cython_op(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
540 out_shape = self._get_output_shape(ngroups, values)
--> 541 func = self._get_cython_function(self.kind, self.how, values.dtype, is_numeric)
542 values = self._get_cython_vals(values)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:167, in WrappedCythonOp._get_cython_function(cls, kind, how, dtype, is_numeric)
165 if how in ["median", "cumprod"]:
166     # no fused types -> no __signatures__
--> 167     raise NotImplementedError(
168         f"function is not implemented for this dtype: "
169         f"[how->{how},dtype->{dtype_str}]"
170     )
171 if "object" not in f.__signatures__:
172     # raise NotImplementedError here rather than TypeError later

NotImplementedError: function is not implemented for this dtype: [how->median,dtype->object]

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\nanops.py:786, in nanmedian(values, axis, skipna, mask)
785 try:
--> 786     values = values.astype("f8")
787 except ValueError as err:
788     # e.g.  "could not convert string to float: 'a'"

ValueError: could not convert string to float: 'M'

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
Cell In[135], line 1
----> 1 data.groupby('class').median()

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1883, in GroupBy.median(self, numeric_only)
1862 @final
1863 def median(self, numeric_only: bool = False):
1864     """
1865     Compute median of groups, excluding missing values.
1866
(...)
1881         Median of values within each group.
1882     """
-> 1883     result = self._cython_agg_general(
1884         "median",
1885         alt=lambda x: Series(x).median(numeric_only=numeric_only),
1886         numeric_only=numeric_only,
1887     )
1888     return result.__finalize__(self.obj, method="groupby")

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1507, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
1503         result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1505     return result
-> 1507 new_mgr = data.grouped_reduce(array_func)
1508 res = self._wrap_agged_manager(new_mgr)
1509 out = self._wrap_aggregated_output(res)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1503, in BlockManager.grouped_reduce(self, func)
1499 if blk.is_object:
1500     # split on object-dtype blocks bc some columns may raise
1501     #  while others do not.
1502     for sb in blk._split():
-> 1503         applied = sb.apply(func)
1504         result_blocks = extend_blocks(applied, result_blocks)
1505 else:

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\blocks.py:329, in Block.apply(self, func, **kwargs)
323 @final
324 def apply(self, func, **kwargs) -> list[Block]:
325     """
326     apply the function to my values; return a block if we are not
327     one
328     """
--> 329     result = func(self.values, **kwargs)
331     return self._split_op_result(result)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1503, in GroupBy._cython_agg_general..array_func(values)
1490     result = self.grouper._cython_operation(
1491         "aggregate",
1492         values,
(...)
1496         **kwargs,
1497     )
1498 except NotImplementedError:
1499     # generally if we have numeric_only=False
1500     # and non-applicable functions
1501     # try to python agg
1502     # TODO: shouldn't min_count matter?
-> 1503     result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1505 return result

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1457, in GroupBy._agg_py_fallback(self, values, ndim, alt)
1452     ser = df.iloc[:, 0]
1454 # We do not get here with UDFs, so we know that our dtype
1455 #  should always be preserved by the implemented aggregations
1456 # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
-> 1457 res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
1459 if isinstance(values, Categorical):
1460     # Because we only get here with known dtype-preserving
1461     #  reductions, we cast back to Categorical.
1462     # TODO: if we ever get "rank" working, exclude it here.
1463     res_values = type(values)._from_sequence(res_values, dtype=values.dtype)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:994, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
987 if len(obj) > 0 and not isinstance(obj._values, np.ndarray):
988     # we can preserve a little bit more aggressively with EA dtype
989     #  because maybe_cast_pointwise_result will do a try/except
990     #  with _from_sequence.   NB we are assuming here that _from_sequence
991     #  is sufficiently strict that it casts appropriately.
992     preserve_dtype = True
--> 994 result = self._aggregate_series_pure_python(obj, func)
996 npvalues = lib.maybe_convert_objects(result, try_float=False)
997 if preserve_dtype:

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\ops.py:1015, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
1012 splitter = self._get_splitter(obj, axis=0)
1014 for i, group in enumerate(splitter):
-> 1015     res = func(group)
1016     res = libreduction.extract_result(res)
1018     if not initialized:
1019         # We only do this validation on the first iteration

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\groupby\groupby.py:1885, in GroupBy.median..(x)
1862 @final
1863 def median(self, numeric_only: bool = False):
1864     """
1865     Compute median of groups, excluding missing values.
1866
(...)
1881         Median of values within each group.
1882     """
1883     result = self._cython_agg_general(
1884         "median",
-> 1885         alt=lambda x: Series(x).median(numeric_only=numeric_only),
1886         numeric_only=numeric_only,
1887     )
1888     return result.__finalize__(self.obj, method="groupby")

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py:11623, in NDFrame._add_numeric_operations..median(self, axis, skipna, numeric_only, **kwargs)
11606 @doc(
11607     _num_doc,
11608     desc="Return the median of the values over the requested axis.",
(...)
11621     **kwargs,
11622 ):
> 11623     return NDFrame.median(self, axis, skipna, numeric_only, **kwargs)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py:11212, in NDFrame.median(self, axis, skipna, numeric_only, **kwargs)
11205 def median(
11206     self,
11207     axis: Axis | None = 0,
(...)
11210     **kwargs,
11211 ) -> Series | float:
> 11212     return self._stat_function(
11213         "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs
11214     )

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py:11158, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
11154     nv.validate_stat_func((), kwargs, fname=name)
11156 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11158 return self._reduce(
11159     func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
11160 )

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\series.py:4670, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
4665     raise TypeError(
4666         f"Series.{name} does not allow {kwd_name}={numeric_only} "
4667         "with non-numeric dtypes."
4668     )
4669 with np.errstate(all="ignore"):
-> 4670     return op(delegate, skipna=skipna, **kwds)

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\nanops.py:158, in bottleneck_switch.__call__..f(values, axis, skipna, **kwds)
156         result = alt(values, axis=axis, skipna=skipna, **kwds)
157 else:
--> 158     result = alt(values, axis=axis, skipna=skipna, **kwds)
160 return result

File c:\ProgramData\anaconda3\Lib\site-packages\pandas\core\nanops.py:789, in nanmedian(values, axis, skipna, mask)
786         values = values.astype("f8")
787     except ValueError as err:
788         # e.g. "could not convert string to float: 'a'"
--> 789         raise TypeError(str(err)) from err
790 if mask is not None:
791     values[mask] = np.nan

TypeError: could not convert string to float: 'M'
Из окна ошибки выше видно, что groupby выполняет агрегирование столбцов пола. Но когда я смотрю, как кто-то на YouTube делает это с тем же фреймом данных и тем же кодом, все в порядке и ошибок нет.
[img]https:// snipboard.io/QHfbBl.jpg[/img]

Итак, вопрос:
  • Почему это происходит? Это потому, что я использовал новейшую версию Python/Pandas? (Я работаю на Python 3.11.5 и Pandas 2.0.3. Пока я смотрел это видео на YouTube, оно было опубликовано 2 года назад).
  • Я что-то упустил в группе?


Подробнее здесь: https://stackoverflow.com/questions/791 ... as-groupby
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Python»