import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
# 1. 데이터
datasets = load_iris()
print(datasets.feature_names) # 판다스는 columns
x = datasets['data']
y = datasets.target
df = pd.DataFrame(x, columns=datasets.feature_names) #columns=datasets.feature_names => 컬럼 이름 넣기
# print(df)
df['Target(Y)'] = y #컬럼 새로 만들기
print(df)
print("==================== 상관계수 히트 맵 =====================")
print(df.corr()) #Correlation 상관성 상관관계는 무조건 신용해서는 안된다. Y와의 상관관계를 우선 확인한다.
# sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) Target(Y)
# sepal length (cm) 1.000000 -0.117570 0.871754 0.817941 0.782561
# sepal width (cm) -0.117570 1.000000 -0.428440 -0.366126 -0.426658
# petal length (cm) 0.871754 -0.428440 1.000000 0.962865 0.949035
# petal width (cm) 0.817941 -0.366126 0.962865 1.000000 0.956547
# Target(Y) 0.782561 -0.426658 0.949035 0.956547 1.000000
import matplotlib.pyplot as plt
import seaborn as sns
# sns.set(font_scale=1.2)
sns.heatmap(data=df.corr(), square=True, annot=True, cbar=True)
plt.show()
m30_상관계수2