딥러닝 (Seq to Seq)

짬그브·2025년 4월 14일

seq to seq 예제 1

import torch
import torch.nn as nn

x = list(map(ord, 'hello'))
y = list(map(ord, 'hola'))
print(x)
'''
[104, 101, 108, 108, 111]
'''

print(y)
'''
[104, 111, 108, 97]
'''

vocab_size = 256

x_data = torch.LongTensor(x)
y_data = torch.LongTensor(y)

class Seq2SeqNet(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.encoder = nn.GRU(hidden_size, hidden_size)
        self.decoder = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def init_state(self, batch_size=1):
        return torch.zeros(1, batch_size, self.hidden_size)

    def forward(self, inputs, targets):
        initstate = self.init_state()
        embedding = self.embedding(inputs).unsqueeze(dim=1)
        encoder_output, encoder_state = self.encoder(embedding, initstate)

        decoder_state = encoder_state
        decoder_input = torch.LongTensor([0]) #begin

        outputs = []

        for i in range(targets.size()[0]):
            decoder_input = self.embedding(decoder_input).unsqueeze(dim=1)
            decoder_output, decoder_state = self.decoder(decoder_input, decoder_state)
            foutput = self.fc(decoder_output)
            outputs.append(foutput)
            decoder_input = torch.LongTensor([targets[i]])

        outputs = torch.stack(outputs).squeeze()
        return outputs

seq2seq = Seq2SeqNet(vocab_size, 16)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(seq2seq.parameters(), lr=1e-3)

for epoch in range(1000):
    hypothesis = seq2seq(x_data, y_data)
    loss = loss_func(hypothesis, y_data)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f'epoch{epoch+1}, loss:{loss.item():.4f}')

        _, top = hypothesis.data.topk(k=1, dim=1)
        print([chr(c) for c in top.squeeze().numpy().tolist()])

        '''
        epoch1, loss:5.6597
        ['\x18', '\x18', '/', 'ñ']
        epoch51, loss:4.0978
        ['h', 'o', 'l', 'l']
        epoch101, loss:1.9054
        ['h', 'o', 'l', 'l']
        epoch151, loss:0.9075
        ['h', 'o', 'l', 'a']
        epoch201, loss:0.5561
        ['h', 'o', 'l', 'a']
        epoch251, loss:0.3881
        ['h', 'o', 'l', 'a']
        epoch301, loss:0.2807
        ['h', 'o', 'l', 'a']
        epoch351, loss:0.2125
        ['h', 'o', 'l', 'a']
        epoch401, loss:0.1683
        ['h', 'o', 'l', 'a']
        epoch451, loss:0.1379
        ['h', 'o', 'l', 'a']
        epoch501, loss:0.1158
        ['h', 'o', 'l', 'a']
        epoch551, loss:0.0992
        ['h', 'o', 'l', 'a']
        epoch601, loss:0.0861
        ['h', 'o', 'l', 'a']
        epoch651, loss:0.0757
        ['h', 'o', 'l', 'a']
        epoch701, loss:0.0672
        ['h', 'o', 'l', 'a']
        epoch751, loss:0.0602
        ['h', 'o', 'l', 'a']
        epoch801, loss:0.0542
        ['h', 'o', 'l', 'a']
        epoch851, loss:0.0492
        ['h', 'o', 'l', 'a']
        epoch901, loss:0.0448
        ['h', 'o', 'l', 'a']
        epoch951, loss:0.0410
        ['h', 'o', 'l', 'a']
                '''

seq to seq 예제 2

import re
import unicodedata
import numpy as np
from collections import Counter
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

num_samples = 33000

def unicode_to_ascii(s):
  # 프랑스어 악센트(accent) 삭제
  # 예시 : 'déjà diné' -> deja dine
  return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')


def preprocess_sentence(sent):
  # 악센트 삭제 함수 호출
  sent = unicode_to_ascii(sent.lower())

  # 단어와 구두점 사이에 공백을 만듭니다.
  # Ex) "he is a boy." => "he is a boy ."
  sent = re.sub(r"([?.!,¿])", r" \1", sent)

  # (a-z, A-Z, ".", "?", "!", ",") 이들을 제외하고는 전부 공백으로 변환합니다.
  sent = re.sub(r"[^a-zA-Z!.?]+", r" ", sent)

  # 다수 개의 공백을 하나의 공백으로 치환
  sent = re.sub(r"\s+", " ", sent)
  return sent

def load_preprocessed_data():
########################################################################
  encoder_input, decoder_input, decoder_target = [], [], []
  with open("data/fra.txt", "r", encoding='UTF-8') as lines:
    for i, line in enumerate(lines):
      src_line, tar_line, _ = line.strip().split('\t')
      src_line = [w for w in preprocess_sentence(src_line).split()]

      tar_line = preprocess_sentence(tar_line)
      tar_line_in = [w for w in ("<sos> " + tar_line).split()]
      tar_line_out = [w for w in (tar_line + " <eos>").split()]

      encoder_input.append(src_line)
      decoder_input.append(tar_line_in)
      decoder_target.append(tar_line_out)

      if i == num_samples - 1:
        break

  return encoder_input, decoder_input, decoder_target
########################################################################

# 전처리 테스트
########################################################################
en_sent = u"Have you had dinner?"
fr_sent = u"Avez-vous déjà diné?"

print('전처리 전 영어 문장 :', en_sent)
print('전처리 후 영어 문장 :',preprocess_sentence(en_sent))
print('전처리 전 프랑스어 문장 :', fr_sent)
print('전처리 후 프랑스어 문장 :', preprocess_sentence(fr_sent))
print()

sents_en_in, sents_fra_in, sents_fra_out = load_preprocessed_data()
print('인코더의 입력 :',sents_en_in[:5])
print('디코더의 입력 :',sents_fra_in[:5])
print('디코더의 레이블 :',sents_fra_out[:5])


########################################################################

def build_vocab(sents):
########################################################################
  word_list = []
  for sent in sents:
      for word in sent:
        word_list.append(word)

  word_counts = Counter(word_list)
  vocab = sorted(word_counts, key=word_counts.get, reverse=True)

  word_to_index = {}
  word_to_index['<PAD>'] = 0
  word_to_index['<UNK>'] = 1

  for index, word in enumerate(vocab) :
    word_to_index[word] = index + 2

  return word_to_index
##############################################################################################################

src_vocab = build_vocab(sents_en_in)
tar_vocab = build_vocab(sents_fra_in + sents_fra_out)

src_vocab_size = len(src_vocab)
tar_vocab_size = len(tar_vocab)
print("영어 단어 집합의 크기 : {:d}, 프랑스어 단어 집합의 크기 : {:d}".format(src_vocab_size, tar_vocab_size))

########################################################################
index_to_src = {v: k for k, v in src_vocab.items()}
index_to_tar = {v: k for k, v in tar_vocab.items()}
########################################################################


def texts_to_sequences(sents, word_to_index):
  ########################################################################
  encoded_X_data = []
  for sent in tqdm(sents):
    index_sequences = []
    for word in sent:
      try:
          index_sequences.append(word_to_index[word])
      except KeyError:
          index_sequences.append(word_to_index['<UNK>'])
    encoded_X_data.append(index_sequences)
  return encoded_X_data
  ########################################################################

########################################################################
encoder_input = texts_to_sequences(sents_en_in, src_vocab)
decoder_input = texts_to_sequences(sents_fra_in, tar_vocab)
decoder_target = texts_to_sequences(sents_fra_out, tar_vocab)
########################################################################
# 상위 5개의 샘플에 대해서 정수 인코딩 전, 후 문장 출력
# 인코더 입력이므로 <sos>나 <eos>가 없음
for i, (item1, item2) in zip(range(5), zip(sents_en_in, encoder_input)):
    print(f"Index: {i}, 정수 인코딩 전: {item1}, 정수 인코딩 후: {item2}")

def pad_sequences(sentences, max_len=None):
    # 최대 길이 값이 주어지지 않을 경우 데이터 내 최대 길이로 패딩
    ########################################################################
    if max_len is None:
        max_len = max([len(sentence) for sentence in sentences])

    features = np.zeros((len(sentences), max_len), dtype=int)
    for index, sentence in enumerate(sentences):
        if len(sentence) != 0:
            features[index, :len(sentence)] = np.array(sentence)
    return features
    ########################################################################

encoder_input = pad_sequences(encoder_input)
decoder_input = pad_sequences(decoder_input)
decoder_target = pad_sequences(decoder_target)

print('인코더의 입력의 크기(shape) :',encoder_input.shape)
print('디코더의 입력의 크기(shape) :',decoder_input.shape)
print('디코더의 레이블의 크기(shape) :',decoder_target.shape)


indices = np.arange(encoder_input.shape[0])
np.random.shuffle(indices)
print('랜덤 시퀀스 :',indices)


n_of_val = int(33000*0.1)
print('검증 데이터의 개수 :',n_of_val)


encoder_input_train = encoder_input[:-n_of_val]
decoder_input_train = decoder_input[:-n_of_val]
decoder_target_train = decoder_target[:-n_of_val]

encoder_input_test = encoder_input[-n_of_val:]
decoder_input_test = decoder_input[-n_of_val:]
decoder_target_test = decoder_target[-n_of_val:]


print('훈련 source 데이터의 크기 :',encoder_input_train.shape)
print('훈련 target 데이터의 크기 :',decoder_input_train.shape)
print('훈련 target 레이블의 크기 :',decoder_target_train.shape)
print('테스트 source 데이터의 크기 :',encoder_input_test.shape)
print('테스트 target 데이터의 크기 :',decoder_input_test.shape)
print('테스트 target 레이블의 크기 :',decoder_target_test.shape)


import torch
import torch.nn as nn
import torch.optim as optim

embedding_dim = 256
hidden_units = 256

class Encoder(nn.Module):
    ########################################################################
    def __init__(self, src_vocab_size, embedding_dim, hidden_units):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(src_vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True)

    def forward(self, x):
        # x.shape == (batch_size, seq_len, embedding_dim)
        x = self.embedding(x)
        # hidden.shape == (1, batch_size, hidden_units), cell.shape == (1, batch_size, hidden_units)
        _, (hidden, cell) = self.lstm(x)
        # 인코더의 출력은 hidden state, cell state
        return hidden, cell
    ########################################################################

class Decoder(nn.Module):
    ########################################################################
    def __init__(self, tar_vocab_size, embedding_dim, hidden_units):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(tar_vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True)
        self.fc = nn.Linear(hidden_units, tar_vocab_size)

    def forward(self, x, hidden, cell):
        # x.shape == (batch_size, seq_len, embedding_dim)
        x = self.embedding(x)

        # 디코더의 LSTM으로 인코더의 hidden state, cell state를 전달.
        # output.shape == (batch_size, seq_len, hidden_units)
        # hidden.shape == (1, batch_size, hidden_units)
        # cell.shape == (1, batch_size, hidden_units)
        output, (hidden, cell) = self.lstm(x, (hidden, cell))
        # output.shape: (batch_size, seq_len, tar_vocab_size)
        output = self.fc(output)
        # 디코더의 출력은 예측값, hidden state, cell state
        return output, hidden, cell
    ########################################################################

class Seq2Seq(nn.Module):
    ########################################################################
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, trg):
        hidden, cell = self.encoder(src)

        # 훈련 중에는 디코더의 출력 중 오직 output만 사용한다.
        output, _, _ = self.decoder(trg, hidden, cell)
        return output
    ########################################################################

encoder = Encoder(src_vocab_size, embedding_dim, hidden_units)
decoder = Decoder(tar_vocab_size, embedding_dim, hidden_units)
model = Seq2Seq(encoder, decoder)

loss_function = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters())

print(model)


def evaluation(model, dataloader, loss_function, device):
    model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    with torch.no_grad():
        for encoder_inputs, decoder_inputs, decoder_targets in dataloader:
            ########################################################################
            encoder_inputs = encoder_inputs.to(device)
            decoder_inputs = decoder_inputs.to(device)
            decoder_targets = decoder_targets.to(device)

            # outputs.shape == (batch_size, seq_len, tar_vocab_size)
            outputs = model(encoder_inputs, decoder_inputs)

            # outputs.view(-1, outputs.size(-1))의 shape는 (batch_size * seq_len, tar_vocab_size)
            # decoder_targets.view(-1)의 shape는 (batch_size * seq_len)
            loss = loss_function(outputs.view(-1, outputs.size(-1)), decoder_targets.view(-1))
            total_loss += loss.item()
            ########################################################################

            # 정확도 계산 (패딩 토큰 제외)
            mask = decoder_targets != 0
            total_correct += ((outputs.argmax(dim=-1) == decoder_targets) * mask).sum().item()
            total_count += mask.sum().item()

    return total_loss / len(dataloader), total_correct / total_count


encoder_input_train_tensor = torch.tensor(encoder_input_train, dtype=torch.long)
decoder_input_train_tensor = torch.tensor(decoder_input_train, dtype=torch.long)
decoder_target_train_tensor = torch.tensor(decoder_target_train, dtype=torch.long)

encoder_input_test_tensor = torch.tensor(encoder_input_test, dtype=torch.long)
decoder_input_test_tensor = torch.tensor(decoder_input_test, dtype=torch.long)
decoder_target_test_tensor = torch.tensor(decoder_target_test, dtype=torch.long)

# 데이터셋 및 데이터로더 생성
batch_size = 128

################################################################################################################################################

train_dataset = TensorDataset(encoder_input_train_tensor,
                              decoder_input_train_tensor, decoder_target_train_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

valid_dataset = TensorDataset(encoder_input_test_tensor,
                              decoder_input_test_tensor, decoder_target_test_tensor)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
################################################################################################################################################

# Training loop
best_val_loss = float('inf')

for epoch in range(num_epochs):
    ################################################################################################################################################
    model.train()
    for encoder_inputs, decoder_inputs, decoder_targets in train_dataloader:
        encoder_inputs = encoder_inputs.to(device)
        decoder_inputs = decoder_inputs.to(device)
        decoder_targets = decoder_targets.to(device)
        optimizer.zero_grad()
        outputs = model(encoder_inputs, decoder_inputs)

        loss = loss_function(outputs.view(-1, outputs.size(-1)), decoder_targets.view(-1))
        loss.backward()
        optimizer.step()

    train_loss, train_acc = evaluation(model, train_dataloader, loss_function, device)
    valid_loss, valid_acc = evaluation(model, valid_dataloader, loss_function, device)

    print(f'Epoch: {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}'
          f' | Valid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc:.4f}')

    # 검증 손실이 최소일 때 체크포인트 저장
    if valid_loss < best_val_loss:
        print(f'Validation loss improved from {best_val_loss:.4f} to {valid_loss:.4f}. 체크포인트를 저장합니다.')
        best_val_loss = valid_loss
        torch.save(model.state_dict(), 'best_model_checkpoint.pth')
    ################################################################################################################################################

# 모델 로드
model.load_state_dict(torch.load('best_model_checkpoint.pth'))
model.to(device)

val_loss, val_accuracy = evaluation(model, valid_dataloader, loss_function, device)

print(f'Best model validation loss: {val_loss:.4f}')
print(f'Best model validation accuracy: {val_accuracy:.4f}')


print(tar_vocab['<sos>'])
print(tar_vocab['<eos>'])


index_to_src = {v: k for k, v in src_vocab.items()}
index_to_tar = {v: k for k, v in tar_vocab.items()}

# 원문의 정수 시퀀스를 텍스트 시퀀스로 변환
def seq_to_src(input_seq):
  sentence = ''
  for encoded_word in input_seq:
    if(encoded_word != 0):
      sentence = sentence + index_to_src[encoded_word] + ' '
  return sentence



# 번역문의 정수 시퀀스를 텍스트 시퀀스로 변환
def seq_to_tar(input_seq):
########################################################################
  sentence = ''
  for encoded_word in input_seq:
    if(encoded_word != 0 and encoded_word != tar_vocab['<sos>'] and encoded_word != tar_vocab['<eos>']):
      sentence = sentence + index_to_tar[encoded_word] + ' '
  return sentence
########################################################################

print(encoder_input_test[25])
print(decoder_input_test[25])
print(decoder_target_test[25])


def decode_sequence(input_seq, model, src_vocab_size, tar_vocab_size, max_output_len,
                    int_to_src_token, int_to_tar_token):
    ####################################################################################################################
    encoder_inputs = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0).to(device)
    hidden, cell = model.encoder(encoder_inputs)

    # 시작 토큰 <sos>을 디코더의 첫 입력으로 설정
    # unsqueeze(0)는 배치 차원을 추가하기 위함.
    decoder_input = torch.tensor([3], dtype=torch.long).unsqueeze(0).to(device)

    decoded_tokens = []
    for _ in range(max_output_len):
        output, hidden, cell = model.decoder(decoder_input, hidden, cell)
        output_token = output.argmax(dim=-1).item()

        # 종료 토큰 <eos>
        if output_token == 4:
            break

        decoded_tokens.append(output_token)
        decoder_input = torch.tensor([output_token], dtype=torch.long).unsqueeze(0).to(device)

    return ' '.join(int_to_tar_token[token] for token in decoded_tokens)
####################################################################################################################


####################################################################################################################
for seq_index in [3, 50, 100, 300, 1001]:
  input_seq = encoder_input_train[seq_index]
  translated_text = decode_sequence(input_seq, model, src_vocab_size, tar_vocab_size, 20,
                                    index_to_src, index_to_tar)

  print("입력문장 :",seq_to_src(encoder_input_train[seq_index]))
  print("정답문장 :",seq_to_tar(decoder_input_train[seq_index]))
  print("번역문장 :",translated_text)
  print("-"*50)
  ########################################################################

전처리 전 영어 문장 : Have you had dinner?
전처리 후 영어 문장 : have you had dinner ?
전처리 전 프랑스어 문장 : Avez-vous déjà diné?
전처리 후 프랑스어 문장 : avez vous deja dine ?

인코더의 입력 : [['go', '.'], ['go', '.'], ['go', '.'], ['go', '.'], ['hi', '.']]
디코더의 입력 : [['<sos>', 'va', '!'], ['<sos>', 'marche', '.'], ['<sos>', 'en', 'route', '!'], ['<sos>', 'bouge', '!'], ['<sos>', 'salut', '!']]
디코더의 레이블 : [['va', '!', '<eos>'], ['marche', '.', '<eos>'], ['en', 'route', '!', '<eos>'], ['bouge', '!', '<eos>'], ['salut', '!', '<eos>']]
영어 단어 집합의 크기 : 4486, 프랑스어 단어 집합의 크기 : 7879
100%|██████████| 33000/33000 [00:00<00:00, 1377330.08it/s]
100%|██████████| 33000/33000 [00:00<00:00, 430012.62it/s]
100%|██████████| 33000/33000 [00:00<00:00, 1143485.28it/s]
Index: 0, 정수 인코딩 전: ['go', '.'], 정수 인코딩 후: [27, 2]
Index: 1, 정수 인코딩 전: ['go', '.'], 정수 인코딩 후: [27, 2]
Index: 2, 정수 인코딩 전: ['go', '.'], 정수 인코딩 후: [27, 2]
Index: 3, 정수 인코딩 전: ['go', '.'], 정수 인코딩 후: [27, 2]
Index: 4, 정수 인코딩 전: ['hi', '.'], 정수 인코딩 후: [736, 2]
인코더의 입력의 크기(shape) : (33000, 7)
디코더의 입력의 크기(shape) : (33000, 16)
디코더의 레이블의 크기(shape) : (33000, 16)
랜덤 시퀀스 : [ 1070 28491  7463 ... 18993 12704 11595]
검증 데이터의 개수 : 3300
훈련 source 데이터의 크기 : (29700, 7)
훈련 target 데이터의 크기 : (29700, 16)
훈련 target 레이블의 크기 : (29700, 16)
테스트 source 데이터의 크기 : (3300, 7)
테스트 target 데이터의 크기 : (3300, 16)
테스트 target 레이블의 크기 : (3300, 16)
Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(4486, 256, padding_idx=0)
    (lstm): LSTM(256, 256, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(7879, 256, padding_idx=0)
    (lstm): LSTM(256, 256, batch_first=True)
    (fc): Linear(in_features=256, out_features=7879, bias=True)
  )
)
Epoch: 1/100 | Train Loss: 2.9177 | Train Acc: 0.5298 | Valid Loss: 3.2859 | Valid Acc: 0.5009
Validation loss improved from inf to 3.2859. 체크포인트를 저장합니다.
Epoch: 2/100 | Train Loss: 2.2537 | Train Acc: 0.6068 | Valid Loss: 2.7700 | Valid Acc: 0.5642
Validation loss improved from 3.2859 to 2.7700. 체크포인트를 저장합니다.
Epoch: 3/100 | Train Loss: 1.8379 | Train Acc: 0.6486 | Valid Loss: 2.4960 | Valid Acc: 0.5951
Validation loss improved from 2.7700 to 2.4960. 체크포인트를 저장합니다.
Epoch: 4/100 | Train Loss: 1.5281 | Train Acc: 0.6864 | Valid Loss: 2.3431 | Valid Acc: 0.6108
Validation loss improved from 2.4960 to 2.3431. 체크포인트를 저장합니다.
Epoch: 5/100 | Train Loss: 1.2744 | Train Acc: 0.7247 | Valid Loss: 2.2489 | Valid Acc: 0.6217
Validation loss improved from 2.3431 to 2.2489. 체크포인트를 저장합니다.
Epoch: 6/100 | Train Loss: 1.0668 | Train Acc: 0.7609 | Valid Loss: 2.1640 | Valid Acc: 0.6348
Validation loss improved from 2.2489 to 2.1640. 체크포인트를 저장합니다.
Epoch: 7/100 | Train Loss: 0.8854 | Train Acc: 0.7957 | Valid Loss: 2.1130 | Valid Acc: 0.6413
Validation loss improved from 2.1640 to 2.1130. 체크포인트를 저장합니다.
Epoch: 8/100 | Train Loss: 0.7395 | Train Acc: 0.8275 | Valid Loss: 2.0769 | Valid Acc: 0.6460
Validation loss improved from 2.1130 to 2.0769. 체크포인트를 저장합니다.
Epoch: 9/100 | Train Loss: 0.6192 | Train Acc: 0.8524 | Valid Loss: 2.0538 | Valid Acc: 0.6553
Validation loss improved from 2.0769 to 2.0538. 체크포인트를 저장합니다.
Epoch: 10/100 | Train Loss: 0.5284 | Train Acc: 0.8725 | Valid Loss: 2.0402 | Valid Acc: 0.6553
Validation loss improved from 2.0538 to 2.0402. 체크포인트를 저장합니다.
Epoch: 11/100 | Train Loss: 0.4454 | Train Acc: 0.8878 | Valid Loss: 2.0429 | Valid Acc: 0.6589
Epoch: 12/100 | Train Loss: 0.3900 | Train Acc: 0.8958 | Valid Loss: 2.0511 | Valid Acc: 0.6612
Epoch: 13/100 | Train Loss: 0.3476 | Train Acc: 0.9039 | Valid Loss: 2.0516 | Valid Acc: 0.6638
Epoch: 14/100 | Train Loss: 0.3068 | Train Acc: 0.9102 | Valid Loss: 2.0711 | Valid Acc: 0.6632
...

...

...

Epoch: 93/100 | Train Loss: 0.1320 | Train Acc: 0.9288 | Valid Loss: 2.6425 | Valid Acc: 0.6622
Epoch: 94/100 | Train Loss: 0.1313 | Train Acc: 0.9290 | Valid Loss: 2.6218 | Valid Acc: 0.6640
Epoch: 95/100 | Train Loss: 0.1317 | Train Acc: 0.9288 | Valid Loss: 2.6541 | Valid Acc: 0.6624
Epoch: 96/100 | Train Loss: 0.1314 | Train Acc: 0.9287 | Valid Loss: 2.6714 | Valid Acc: 0.6630
Epoch: 97/100 | Train Loss: 0.1317 | Train Acc: 0.9287 | Valid Loss: 2.6623 | Valid Acc: 0.6634
Epoch: 98/100 | Train Loss: 0.1312 | Train Acc: 0.9284 | Valid Loss: 2.6688 | Valid Acc: 0.6616
Epoch: 99/100 | Train Loss: 0.1315 | Train Acc: 0.9282 | Valid Loss: 2.6900 | Valid Acc: 0.6624
C:\Users\hi\PycharmProjects\NL_deepPart\day4_code\Seq2SeqEx2.py:335: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  model.load_state_dict(torch.load('best_model_checkpoint.pth'))
Epoch: 100/100 | Train Loss: 0.1313 | Train Acc: 0.9286 | Valid Loss: 2.6774 | Valid Acc: 0.6611
Best model validation loss: 2.0402
Best model validation accuracy: 0.6553
3
4
[ 25  11  48 105   2   0   0]
[  3  46  42  61 521   2   0   0   0   0   0   0   0   0   0   0]
[ 46  42  61 521   2   4   0   0   0   0   0   0   0   0   0   0]
입력문장 : go . 
정답문장 : bouge ! 
번역문장 : va en route !
--------------------------------------------------
입력문장 : hello ! 
정답문장 : bonjour ! 
번역문장 : bonjour !
--------------------------------------------------
입력문장 : got it ! 
정답문장 : j ai pige ! 
번역문장 : ca a l air !
--------------------------------------------------
입력문장 : go home . 
정답문장 : rentre a la maison . 
번역문장 : rentrez a la maison .
--------------------------------------------------
입력문장 : forget me . 
정답문장 : oublie moi . 
번역문장 : oubliez moi .
--------------------------------------------------

짬그브

+AI to AI+

이전 포스트

RNN , Generation 모델 생성, 사용

다음 포스트

딥러닝 (Seq to Seq)

seq to seq 예제 1

seq to seq 예제 2

RNN , Generation 모델 생성, 사용

강화학습 개요

0개의 댓글