import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

#1. 데이터
x, y = load_iris(return_X_y=True)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, shuffle=True, random_state=337, 
)

n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=337)

#2. 모델 구성
model = SVC()

#3, 4 컴파일, 훈련, 평가, 예측

score = cross_val_score(model, x_train, y_train, cv = kfold)
print('cross_val_score :', score,
      '\\n교차검증평균점수 :', round(np.mean(score),4))

y_predict = cross_val_predict(model, x_test, y_test, cv=kfold)
print('cross_val_predict ACC : ', accuracy_score(y_test, y_predict))

# cross_val_score : [1.         0.91304348 0.90909091 1.         1.        ] 
# 교차검증평균점수 : 0.9644
# cross_val_predict ACC :  0.9473684210526315     교차검증평균점수보다 안좋은 이유는 test_data의 과적합 때문이다.