
Другие методы предварительной обработки сработали (то есть нет проблем со структурой набора данных и метриками).
Есть ли у кого-нибудь опыт работы с этой библиотекой и этой функцией?< /p>
Советы добро пожаловать!
**Чтобы попробовать это самостоятельно (на основе немецкого кредитного набора данных):
**
Библиотеки
!pip install aif360
from aif360.algorithms.preprocessing import LFR
import pandas as pd
import numpy as np
import scipy.optimize as optim
from aif360.algorithms import Transformer as TR
from aif360.algorithms.preprocessing.lfr_helpers import helpers as lfr_helpers
from aif360.algorithms.preprocessing import Reweighing
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import LFR, Reweighing
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
#import tensorflow as tf
from aif360.datasets import GermanDataset, BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.algorithms.preprocessing import OptimPreproc
from IPython.display import Markdown, display
import cvxpy as cp
import numpy as np
from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools
data = pd.read_csv('original_data.csv')
data.head()
# Example on gender
# Gender
dataset_gender = StandardDataset(data,
label_name='Creditability',
favorable_classes=[1],
protected_attribute_names=['Gender'],
privileged_classes=[[0]],
features_to_drop=['Age', 'Foreign_worker', 'Age_grouped'])
dataset_orig_train_gender, dataset_orig_test_gender = dataset_gender.split([0.7], shuffle=True)
privileged_groups_gender = [{'Gender': 0}]
unprivileged_groups_gender = [{'Gender': 1}]
metric_orig_train_gender = BinaryLabelDatasetMetric(dataset_orig_train_gender,
unprivileged_groups=unprivileged_groups_gender,
privileged_groups=privileged_groups_gender)
print("SPD gender= %f" % metric_orig_train_gender.statistical_parity_difference())
print("DI gender = %f" % metric_orig_train_gender.disparate_impact())
КОД ДЛЯ ОПТИМИЗИРОВАННОЙ ПРЕДВАРИТЕЛЬНОЙ ОБРАБОТКИ:
optim_options = {
"epsilon": 0.05,
"clist": [0.99, 1.99, 2.99],
"dlist": [.1, 0.05, 0]
}
OP = OptimPreproc(OptTools, optim_options)
OP = OP.fit(dataset_orig_train_gender)
dataset_preprocess_train_gender = OP.transform(dataset_orig_train_gender, transform_Y=True)
dataset_preprocess_train_gender = dataset_orig_train_gender.align_datasets(dataset_preprocess_train_gender)
Подробнее здесь: https://stackoverflow.com/questions/743 ... processing
Мобильная версия