- 106 наблюдений равны 0 (недействительно)
- 44 наблюдения равны 1 (действительно)
Вот это весь мой код:
Код: Выделить всё
# Logistic Regression
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import scipy
from scipy.stats import spearmanr
from pylab import rcParams
import seaborn as sb
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import scale
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import preprocessing
address = "dummy_csv-150.csv"
trades = pd.read_csv(address)
trades.columns=['location','app','el','rp','rule1','rule2','rule3','validity','transactions']
trades.head()
trade_data = trades.ix[:,(1,8)].values
trade_data_names = ['app','transactions']
# set dependent/response variable
y = trades.ix[:,7].values
# center around the data mean
X= scale(trade_data)
LogReg = LogisticRegression()
LogReg.fit(X,y)
print(LogReg.score(X,y))
y_pred = LogReg.predict(X)
from sklearn.metrics import classification_report
print(classification_report(y,y_pred))
log_prediction = LogReg.predict_log_proba(
[
[2, 14],[3,1], [1, 503],[1, 122],[1, 101],[1, 610],[1, 2120],[3, 85],[3, 91],[2, 167],[2, 553],[2, 144]
])
prediction = LogReg.predict([[2, 14],[3,1], [1, 503],[1, 122],[1, 101],[1, 610],[1, 2120],[3, 85],[3, 91],[2, 167],[2, 553],[2, 144]])
Код: Выделить всё
LogReg = LogisticRegression()
LogReg.fit(X,y)
Код: Выделить всё
X = array([[1, 345],
[1, 222],
[1, 500],
[2, 120]]....)
Нормализованный X, который передается в модель, в о т т а к о й : < / p > < b r / > < c o d e > [ [ - 1 . 6 7 1 7 7 6 5 9 0 . 1 4 3 9 6 5 0 3 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 - 0 . 1 4 5 3 8 9 3 2 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 0 . 5 0 8 5 9 8 5 6 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 - 0 . 3 8 5 3 4 1 7 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 - 0 . 4 3 2 3 9 1 1 9 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 0 . 7 4 3 8 4 6 ] < b r / > [ - 1 . 6 7 1 7 7 6 5 9 4 . 3 2 1 9 5 9 5 3 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 4 6 0 6 2 0 8 9 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 4 5 5 9 1 5 9 4 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 3 7 8 2 8 4 2 8 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 5 2 8 8 4 2 6 4 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 2 0 4 2 0 1 1 8 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 6 3 7 0 5 6 4 6 ] < b r / > [ 0 . 9 5 6 5 7 8 0 5 - 0 . 6 5 5 8 7 6 2 6 ] < b r / > [ 0 . 95657805 -0.66763863]
[-0.35759927 -0.25125067]
[-0.35759927 0.60975496]
[-0.35759927 -0.33358727]
[-0.35759927 -0.20420118]
[-0.35759927 1.37195666]
[-0.35759927 0.27805607]
[-0.35759927 0.09456307]
[-0.35759927 0.03810368]
[-0.35759927 -0.41121892]
[-0.35759927 -0.64411389]
[-0.35759927 -0.69586832]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.53825254]
[ 0.95657805 -0.53354759]
[ 0.95657805 -0.52413769]
[ 0.95657805 -0.57589213]
[ 0.95657805 0.03810368]
[ 0.95657805 -0.66293368]
[ 0.95657805 2.86107294]
[-1.67177659 0.14396503]
[-1.67177659 -0.14538932]
[-1.67177659 0.50859856]
[-1.67177659 -0.3853417 ]
[-1.67177659 -0.43239119]
[-1.67177659 0.743846 ]
[-1.67177659 4.32195953]
[ 0.95657805 -0.46062089]
[ 0.95657805 -0.45591594]
[ 0.95657805 -0.37828428]
[ 0.95657805 -0.52884264]
[ 0.95657805 -0.20420118]
[ 0.95657805 -0.63705646]
[ 0.95657805 -0.65587626]
[ 0.95657805 -0.66763863]
[-0.35759927 -0.25125067]
[-0.35759927 0.60975496]
[-0.35759927 -0.33358727]
[-0.35759927 -0.20420118]
[-0.35759927 1.37195666]
[-0.35759927 0.27805607]
[-0.35759927 0.09456307]
[-0.35759927 0.03810368]
[-0.35759927 -0.41121892]
[-0.35759927 -0.64411389]
[-0.35759927 -0.69586832]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.53825254]
[ 0.95657805 -0.53354759]
[ 0.95657805 -0.52413769]
[ 0.95657805 -0.57589213]
[ 0.95657805 0.03810368]
[ 0.95657805 -0.66293368]
[ 0.95657805 2.86107294]
[-1.67177659 0.14396503]
[-1.67177659 -0.14538932]
[-1.67177659 0.50859856]
[-1.67177659 -0.3853417 ]
[-1.67177659 -0.43239119]
[-1.67177659 0.743846 ]
[-1.67177659 4.32195953]
[ 0.95657805 -0.46062089]
[ 0.95657805 -0.45591594]
[ 0.95657805 -0.37828428]
[ 0.95657805 -0.52884264]
[ 0.95657805 -0.20420118]
[ 0.95657805 -0.63705646]
[ 0.95657805 -0.65587626]
[ 0.95657805 -0.66763863]
[-0.35759927 -0.25125067]
[-0.35759927 0.60975496]
[-0.35759927 -0.33358727]
[-0.35759927 -0.20420118]
[-0.35759927 1.37195666]
[-0.35759927 0.27805607]
[-0.35759927 0.09456307]
[-0.35759927 0.03810368]
[-0.35759927 -0.41121892]
[-0.35759927 -0.64411389]
[-0.35759927 -0.69586832]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.53825254]
[ 0.95657805 -0.53354759]
[ 0.95657805 -0.52413769]
[ 0.95657805 -0.57589213]
[ 0.95657805 0.03810368]
[ 0.95657805 -0.66293368]
[ 0.95657805 2.86107294]
[-1.67177659 0.14396503]
[-1.67177659 -0.14538932]
[-1.67177659 0.50859856]
[-1.67177659 -0.3853417 ]
[-1.67177659 -0.43239119]
[-1.67177659 0.743846 ]
[-1.67177659 4.32195953]
[ 0.95657805 -0.46062089]
[ 0.95657805 -0.45591594]
[ 0.95657805 -0.37828428]
[ 0.95657805 -0.52884264]
[ 0.95657805 -0.20420118]
[ 0.95657805 -0.63705646]
[ 0.95657805 -0.65587626]
[ 0.95657805 -0.66763863]
[-0.35759927 -0.25125067]
[-0.35759927 0.60975496]
[-0.35759927 -0.33358727]
[-0.35759927 -0.20420118]
[-0.35759927 1.37195666]
[-0.35759927 0.27805607]
[-0.35759927 0.09456307]
[-0.35759927 0.03810368]
[-0.35759927 -0.41121892]
[-0.35759927 -0.64411389]
[-0.35759927 -0.69586832]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.57353966]
[ 0.95657805 -0.53825254]
[ 0.95657805 -0.53354759]
[ 0.95657805 -0.52413769]
[ 0.95657805 -0.57589213]
[ 0.95657805 0.03810368]
[ 0.95657805 -0.66293368]
[ 0.95657805 2.86107294]
[-0.35759927 0.60975496]
[-0.35759927 -0.33358727]
[-0.35759927 -0.20420118]
[-0.35759927 1.37195666]
[-0.35759927 0.27805607]
[-0.35759927 0.09456307]
[-0.35759927 0.03810368]]
и Y:
Код: Выделить всё
[0 0 0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0
0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0
0 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0
1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0]
Код: Выделить всё
precision recall f1-score support
0 0.78 1.00 0.88 98
1 1.00 0.43 0.60 49
avg / total 0.85 0.81 0.78 147
Когда я запускаю model.predict_log_proba(test_data), я получаю интервалы вероятности, которые выглядят следующим образом: это:
Код: Выделить всё
array([[ -1.10164032e+01, -1.64301095e-05],
[ -2.06326947e+00, -1.35863187e-01],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00],
[ -inf, 0.00000000e+00]])
р>
Код: Выделить всё
[2, 14],[3,1], [1, 503],[1, 122],[1, 101],[1, 610],[1, 2120],[3, 85],[3, 91],[2, 167],[2, 553],[2, 144]
Что может быть не так? Данные соответствуют всем предположениям логистической модели (оба предиктора независимы, выходные данные являются двоичными, отсутствуют пропущенные точки данных).
Подробнее здесь: https://stackoverflow.com/questions/519 ... ys-predict