Я запускаю следующие ячейки:
Код: Выделить всё
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
cols = ['County', 'City', 'State', 'ZIP Code', 'Model Year', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility']
for col in cols:
le.fit(t[col])
x[col] = le.transform(x[col])
print(le.classes_)
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.5, random_state = 0)
r2_score(y_test, lm.predict(x_test))
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(x_train, y_train)
r2_score(y_test, regressor.predict(x_test))
r2_score(y_train, regressor.predict(x_train))
uv = np.nanpercentile(df2['Base MSRP'], [99])[0]*2
df2['Base MSRP'][(df2['Base MSRP']>uv)] = uv
df2 = df2[df2['Model Year'] != 'N/'] # Filter out rows where 'Model Year' is 'N/'
for col in cols:
df2[col] = df2[col].replace('N/', -1)
le.fit(df2[col])
df2[col] = le.transform(df2[col])
print(le.classes_)
le = preprocessing.LabelEncoder()
cols = ['County', 'City', 'State', 'ZIP Code', 'Model Year', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility']
for col in cols:
le.fit(t[col])
df2[col] = le.transform(df2[col])
print(le.classes_)
Код: Выделить всё
TypeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_16424\1094749331.py in
1 for col in cols:
2 le.fit(t[col])
----> 3 df2[col] = le.transform(df2[col])
4 print(le.classes_)
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\preprocessing\_label.py in transform(self, y)
136 return np.array([])
137
--> 138 return _encode(y, uniques=self.classes_)
139
140 def inverse_transform(self, y):
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\utils\_encode.py in _encode(values, uniques, check_unknown)
185 else:
186 if check_unknown:
--> 187 diff = _check_unknown(values, uniques)
188 if diff:
189 raise ValueError(f"y contains previously unseen labels: {str(diff)}")
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\utils\_encode.py in _check_unknown(values, known_values, return_mask)
259
260 # check for nans in the known_values
--> 261 if np.isnan(known_values).any():
262 diff_is_nan = np.isnan(diff)
263 if diff_is_nan.any():
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
Я пробовал использовать следующий код:
Код: Выделить всё
le = preprocessing.LabelEncoder()
cols = ['County', 'City', 'State', 'ZIP Code', 'Model Year', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility']
for col in cols:
le.fit(t[col])
df2[col] = le.transform(df2[col])
print(le.classes_)
Чтобы решить эту проблему, я попытался вставить недостающее значение («N/») вместо удаления это с помощью этого кода:
Код: Выделить всё
for col in cols:
le.fit(t[col].fillna('Missing')) # Impute missing values with 'Missing'
df2[col] = le.transform(df2[col].fillna('Missing'))
print(le.classes_)
Вот ссылка на мой блокнот: https://github.com/SteveAustin583/electric -vehicle-price-prediction-revengers/blob/main/revengers.ipynb
Вот ссылка на набор данных:
https://www.kaggle.com/datasets/rithura ... hicle-data
Как решить эту проблему?
Подробнее здесь: https://stackoverflow.com/questions/792 ... n-a-machin