Код: Выделить всё
coordinates = np.where(mask.transpose())[::-1]
AttributeError: 'bool' object has no attribute 'transpose'
Код: Выделить всё
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectFromModel
from itertools import combinations
import pandas as pd
import numpy as np
#read in data
training_data = pd.read_csv('train.csv')
testing_data = pd.read_csv('test.csv')
#seperate X and Y
X_train_full = training_data.copy()
y = X_train_full.Survived
X_train_full.drop(['Survived'], axis=1, inplace=True)
y_test = testing_data
#get all str columns
cat_columns1 = [cname for cname in X_train_full.columns if
X_train_full[cname].dtype == "object"]
interactions = pd.DataFrame(index= X_train_full)
#create new features
for combination in combinations(cat_columns1,2):
imputer = SimpleImputer(strategy='constant')
new_col_name = '_'.join(combination)
col1 = X_train_full[combination[0]]
col2 = X_train_full[combination[1]]
col1 = np.array(col1).reshape(-1,1)
col2 = np.array(col2).reshape(-1,1)
col1 = imputer.fit_transform(col1)
col2 = imputer.fit_transform(col2)
new_vals = col1 + '_' + col2
OneHot = OneHotEncoder()
interactions[new_col_name] = OneHot.fit_transform(new_vals)
interactions = interactions.reset_index(drop = True)
#create new dataframe with new features included
new_df = X_train_full.join(interactions)
#do the same for the test file
interactions2 = pd.DataFrame(index= y_test)
for combination in combinations(cat_columns1,2):
imputer = SimpleImputer(strategy='constant')
new_col_name = '_'.join(combination)
col1 = y_test[combination[0]]
col2 = y_test[combination[1]]
col1 = np.array(col1).reshape(-1,1)
col2 = np.array(col2).reshape(-1,1)
col1 = imputer.fit_transform(col1)
col2 = imputer.fit_transform(col2)
new_vals = col1 + '_' + col2
OneHot = OneHotEncoder()
interactions2[new_col_name] = OneHot.fit_transform(new_vals)
interactions2[new_col_name] = new_vals
interactions2 = interactions2.reset_index(drop = True)
y_test = y_test.join(interactions2)
#get names of cat columns (with new features added)
cat_columns = [cname for cname in new_df.columns if
new_df[cname].dtype == "object"]
# Select numerical columns
num_columns = [cname for cname in new_df.columns if
new_df[cname].dtype in ['int64', 'float64']]
#set up pipeline
numerical_transformer = SimpleImputer(strategy = 'constant')
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, num_columns),
('cat', categorical_transformer, cat_columns)
])
model = XGBClassifier()
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('model', model)
])
#fit model
my_pipeline.fit(new_df,y)
https://www.kaggle.com/c/titanic/ данные
Я не могу понять, что вызывает эту проблему. Любая помощь будет очень признательна.
Подробнее здесь: https://stackoverflow.com/questions/625 ... se-when-at