import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer, load_diabetes
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from xgboost import XGBClassifier, XGBRegressor
import time
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score

#1. 데이터
datasets = load_breast_cancer()
x = datasets.data
y = datasets.target
# print(x.shape, y.shape) # (569, 30) (569,)
# print(type(x))          # <class 'numpy.ndarray'>
print(np.unique(y, return_counts=True)) # (array([0, 1]), array([212, 357], dtype=int64))

x_train, x_test, y_train, y_test = train_test_split(
    x, y, shuffle=True, random_state=123, train_size=0.8, stratify=y
)
print(pd.Series(y_train).value_counts())
# 1    285
# 0    170

print("#========================== SMOTE 적용 후 ============================ ")
smote = SMOTE(random_state=123, k_neighbors=3)  
x_train, y_train = smote.fit_resample(x_train, y_train)

print(pd.Series(y_train).value_counts())
# 1    285
# 0    285

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

n_splits = 5
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123)

parameters = {'n_estimators': [500],
              'learning_rate' : [0.1],
              'max_depth' : [3],
              'gamma': [0],
              'min_child_weight': [0],
              'subsample' : [0.2],
              'colsample_bytree' : [0],
              'colsample_bylevel' : [0],
              'colsample_bynode' : [0],
              'reg_alpha' : [0],
              'reg_lambda' : [1],
              }  

#2. 모델
xgb = XGBClassifier(random_state=123,
                    )
model = GridSearchCV(xgb, parameters, cv=kfold, n_jobs=-1)

#3. 훈련
model.fit(x_train, y_train)

#4. 평가, 예측
#4. 평가, 예측

y_predict = model.predict(x_test) 
score = model.score(x_test, y_test)    
print('acc : ', score)
print('f1_score : ', f1_score(y_test, y_predict))

#SMOTE 적용 전
# acc :  0.9649122807017544
# f1_score :  0.9726027397260274

#SMOTE 적용 후
# acc :  0.9912280701754386
# f1_score :  0.993006993006993