import numpy as np
from sklearn.datasets import load_diabetes, load_breast_cancer, load_iris, load_digits, load_wine, fetch_covtype, fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score
import warnings
warnings.filterwarnings('ignore')
import time

x, y = load_iris(return_X_y=True)

x_train, x_test, y_train, y_test = train_test_split(
    x,y, train_size=0.8, random_state = 337, shuffle = True,
)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
from catboost import CatBoostClassifier, CatBoostRegressor
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
import time

from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
from catboost import CatBoostClassifier

x, y = load_iris(return_X_y=True)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.8, random_state=337, shuffle=True
)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

search_space = {
    'learning_rate': hp.uniform('learning_rate', 0.001, 0.2),
    'depth': hp.quniform('depth', 3, 16, 1),
    'one_hot_max_size': hp.quniform('one_hot_max_size', 24, 64, 1),
    'min_data_in_leaf': hp.quniform('min_data_in_leaf', 10, 200, 1),
    'bagging_temperature': hp.uniform('bagging_temperature', 0.5, 1),
    'random_strength': hp.uniform('random_strength', 0.5, 1),
    'l2_leaf_reg': hp.uniform('l2_leaf_reg', 0.001, 10)
}

def lgb_hamsu(search_space):
    params = {
        'iterations': 10,
        'learning_rate': search_space['learning_rate'],
        'depth': int(search_space['depth']),
        'l2_leaf_reg': search_space['l2_leaf_reg'],
        'bagging_temperature': search_space['bagging_temperature'],
        'random_strength': search_space['random_strength'],
        'one_hot_max_size': int(search_space['one_hot_max_size']),
        'min_data_in_leaf': int(search_space['min_data_in_leaf']),
        'task_type': 'CPU',
        'logging_level': 'Silent'
    }
    
    
    model = CatBoostClassifier(**params)

    model.fit(
        x_train, y_train,
        eval_set=(x_test, y_test),
        verbose=0,
        early_stopping_rounds=50
    )

    y_predict = model.predict(x_test)
    result = -(accuracy_score(y_test, y_predict))

    return result

trial_val = Trials()

best = fmin(
    fn=lgb_hamsu,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trial_val,
    rstate=np.random.default_rng(seed=10)
)
print('best', best)

results_val = pd.DataFrame(trial_val.vals)
results_df = pd.DataFrame(trial_val.results)

results = pd.concat([results_df, results_val], axis=1)
results = results.drop(['status'], axis=1)

min_loss_idx = results['loss'].idxmin()
min_loss_row = results.loc[min_loss_idx]
print(results)
print(results.min())

# loss                   -0.966667
# bagging_temperature     0.500120
# depth                   3.000000
# l2_leaf_reg             0.151843
# learning_rate           0.002697
# min_data_in_leaf       16.000000
# one_hot_max_size       25.000000
# random_strength         0.509109
# dtype: float64

m63_HyperOpt08_catboost_california