import os import re import numpy as np from tensorflow.keras.preprocessing.sequence import pad_sequences import pandas as pd from sklearn.model_selection import train_test_split from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, LSTM, Flatten, Dropout from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.utils import to_categorical

1. 데이터

def bring(path): data_list = [] for filename in os.listdir(path): if filename.endswith('.txt'): with open(os.path.join(path, filename), 'r') as f: data = f.read().replace('\n', '').replace(' ', '') data = re.sub(r"[0-9]", "", data) data = data.replace('a', '1').replace('t', '2').replace('c', '3').replace('g', '4').replace('y', '5').replace('w', '6')\ .replace('r', '7').replace('k', '8').replace('v', '9').replace('n', '10').replace('s', '11').replace('m', '12') data = np.array([int(i) for i in data]) data = data.reshape(1, -1) data = pad_sequences(data, maxlen=maxlen, padding='pre', truncating='pre') data = data.reshape(-1, 1) data_list.append(data) all_data = np.array(np.concatenate(data_list, axis=1)) all_data = all_data.T return all_data

maxlen = 800

homo_path = './_data/pp/homo_sapiens/' x_homo = bring(homo_path) y_homo = np.array([0]*x_homo.shape[0])

culex_path = './_data/pp/Culex/' x_culex = bring(culex_path) y_culex = np.array([1]*x_culex.shape[0])

haemagogus_path = './_data/pp/Haemagogus/' x_haemagogus = bring(haemagogus_path) y_haemagogus = np.array([2]*x_haemagogus.shape[0])

ovis_path = './_data/pp/Ovis_aries/' x_ovis = bring(ovis_path) y_ovis = np.array([3]*x_ovis.shape[0])

mus_path = './_data/pp/Mus_musculus/' x_mus = bring(mus_path) y_mus = np.array([4]*x_mus.shape[0])

sciuridae_path = './_data/pp/Sciuridae/' x_sciuridae = bring(sciuridae_path) y_sciuridae = np.array([5]*x_sciuridae.shape[0])

canis_path = './_data/pp/Canis_lupus/' x_canis = bring(canis_path) y_canis = np.array([6]*x_canis.shape[0])

vulpes_path = './_data/pp/Vulpes_vulpes/' x_vulpes = bring(vulpes_path) y_vulpes = np.array([7]*x_vulpes.shape[0])

sus_path = './_data/pp/Sus_scrofa/' x_sus = bring(sus_path) y_sus = np.array([8]*x_sus.shape[0])

x = np.concatenate([x_homo, x_culex, x_haemagogus, x_ovis, x_mus, x_sciuridae, x_canis, x_vulpes, x_sus], axis=0) y = np.concatenate([y_homo, y_culex, y_haemagogus, y_ovis, y_mus, y_sciuridae, y_canis, y_vulpes, y_sus], axis=0)

y = to_categorical(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=123, shuffle=True)

2. 모델

model = Sequential() model.add(Dense(64, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(64, activation='relu')) model.add(Dense(len(y[0]), activation='softmax')) model.summary()

3. 컴파일, 훈련

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='acc') es = EarlyStopping(monitor='val_acc', patience=100, mode='auto', restore_best_weights=True) model.fit(x_train, y_train, epochs=1000, batch_size=3, validation_split=0.2, callbacks=[es])