1. Text generation
2. Text classficiation
sentence = "life is like a box of chocolates you never know what you are going to get".split()
#Make a vokabulary list
vocab = list(set(sentence))
print(vocab)
word2index = {tkn: i for i, tkn in enumerate(vocab, 1)}
word2index['<unk>']=0
print(word2index)
#T his dict transfroms index to number(number)
index2word = {v: k for k, v in word2index.items()}
print(index2word)
# Function to make the input data(X) and the labels(Y)
def build_data(sentence, word2index):
encoded = [word2index[token] for token in sentence] # transforms word to index
input_seq, label_seq = encoded[:-1], encoded[1:] # Split the input sequence and label sequence
input_seq = torch.LongTensor(input_seq).unsqueeze(0)
label_seq = torch.LongTensor(label_seq).unsqueeze(0)
return input_seq, label_seq, encoded
x, y, encoded = build_data(sentence, word2index)
embedding_function = torch.nn.Embedding(num_embeddings=len(word2index), embedding_dim = 5)
embedding_function(x)
#hyper parameter
vocab_size = len(word2index) # 16
input_size = 5 # embbeding dim
hidden_size = 20 # hidden size of RNN layer
epochs = 200
class RNN_model(torch.nn.Module):
# vocab_size = size of the using word 16 (decided from original text)
# hidden_size = size of the RNN's output 20 (hyper-parameter)
# input size = size of input embedding 5 (decided from previous cell)
def __init__(self, vocab_size, input_size, hidden_size):
super(RNN_model, self).__init__()
# Embedding layer
self.embedding_layer = torch.nn.Embedding(num_embeddings=vocab_size,embedding_dim=input_size) # give embedding for each word
# RNN layer
self.rnn_layer = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True) # RNN(5, 20, num_layers=1)
# linear layer
self.linear = torch.nn.Linear(hidden_size, vocab_size) # Linear(in_features=20, out_features=16, bias=True)
def forward(self, x):
#1. Embedding layer
# size of the data: (batch_size, length of the sequence) -> (batch_size, lenth of the sequence, dimension of embedding)
y = self.embedding_layer(x)
# x: tensor([[ 5, 3, 7, 12, 8, 14, 15, 9, 4, 10, 6, 9, 1, 11, 2]]) torch.Size([1, 15])
# y: torch.Size([1, OO, 5])
#2. RNN layer
# size of the data: (batch_size, length of the sequence, embedding dimension) -> y: (batch_size, length of the sequence, hidden_size), hidden: (num_layer, batch_size, hidden_size)
y, hidden = self.rnn_layer(y)
# y: torch.Size([1, 15, 20]) hidden: torch.Size([1, 15, OO])
#3. Linear layer
# size of the data: (batch_size, length of the sequence, hidden_size) -> (batch_size, length of the sequence, vocab_size)
y = self.linear(y)
# y: torch.Size([1, 15, 16])
# Size of the return value: (batch_size*length of the sequence, vocab_size)
return y.view(-1, y.size(2)) # torch.Size([15, 16])
model = RNN_model(vocab_size, input_size, hidden_size)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters())
decode = lambda y: [index2word.get(x) for x in y]
# Training
print(f'life is like a box of chocolates you never know what you are going to get')
for epoch in range(1, epochs+1):
optimizer.zero_grad()
output = model(x)
loss = loss_function(output, y.view(-1))
loss.backward()
optimizer.step()
# Observe the result
if epoch % 20 == 0:
print(f"Epoch: {epoch}, Loss: {loss}")
pred = output.softmax(-1).argmax(-1).tolist()
print(" ".join(['life']+ decode(pred)))
print()
def one_hot_encoding(sentence, word2index):
encoded = torch.zeros(len(word2index), len(word2index))
for idx, word in enumerate(sentence):
word_index = word2index[word]
encoded[idx][word_index] = torch.LongTensor([1])
input_seq, label_seq = encoded[:-1], encoded[1:]
return input_seq.to(torch.long), label_seq.to(torch.long), encoded.to(torch.long)
x, y, encoded = one_hot_encoding(sentence, word2index)
print(x,y,encoded)
#hyper parameter
vocab_size = len(word2index) # 16
input_size = 5 # embbeding dim
hidden_size = 20 # hidden size of RNN layer
epochs = 200
class RNN_model(torch.nn.Module):
def __init__(self, vocab_size, input_size, hidden_size):
super(RNN_model, self).__init__()
self.vocab_size = vocab_size
self.input_size = input_size
self.hidden_size = hidden_size
#Embedding layer
self.embedding_layer = torch.nn.Linear(vocab_size, input_size)
#RNN layer
self.rnn_layer = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
#linear layer
self.linear = torch.nn.Linear(hidden_size, vocab_size)
def forward(self, x):
y = self.embedding_layer(x.to(torch.float)).reshape(1,x.shape[0],self.input_size)
y, hidden = self.rnn_layer(y)
y = self.linear(y)
return y.view(-1, y.size(2)) # torch.Size([15, 16])
model = RNN_model(vocab_size, input_size, hidden_size)
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
print(f'life is like a box of chocolates you never know what you are going to get')
for epoch in range(1, epochs+1):
optimizer.zero_grad()
output = model(x)
loss = loss_function(output, y.float())
loss.backward()
optimizer.step()
# Observe the result
if epoch % 20 == 0:
print(f"Epoch: {epoch}, Loss: {loss}")
pred = output.argmax(-1).tolist()
print(" ".join(['life']+ decode(pred)))
TEXT = torchtext.data.Field(sequential=True, batch_first=True, lower=True)
LABEL = torchtext.data.Field(sequential=False, batch_first=True)
trainset, testset = torchtext.datasets.IMDB.splits(TEXT, LABEL)
pos_data = vars(trainset[0]) # vars: returns dictionary of used class' object(사용된 클래스의 객체에 대한 dictionary 를 반환한다. )
neg_data = vars(trainset[20000])
TEXT.build_vocab(trainset, min_freq=5)
LABEL.build_vocab(trainset)
vocab_size = len(TEXT.vocab)
trainset, valset = trainset.split(split_ratio=0.8)
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
(trainset, valset, testset), batch_size=64,
shuffle=True, repeat=False
batch = next(iter(train_iter))
print(f'Shape of first batch: {batch.text.shape}')
batch = next(iter(train_iter))
print(f'Shape of second batch: {batch.text.shape}')
# Reset the data loader to learn the observed data as well
train_iter, val_iter, test_iter =torchtext.data.BucketIterator.splits(
(trainset, valset, testset), batch_size=64,
shuffle=True, repeat=False)
def train(model, optimizer, train_iter):
for b, batch in enumerate(train_iter):
x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
y.data.sub_(1) #transforms the labels into (0, 1)
optimizer.zero_grad()
logit = model(x)
loss = torch.nn.functional.cross_entropy(logit, y)
loss.backward()
optimizer.step()
def evaluate(model, val_iter):
corrects, total_loss = 0, 0
for batch in val_iter:
x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
y.data.sub_(1) #transforms the labels into (0, 1)
logit = model(x)
loss = torch.nn.functional.cross_entropy(logit, y, reduction='sum')
total_loss += loss.item()
corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
size = len(val_iter.dataset)
avg_loss = total_loss / size
avg_accuracy = 100.0 * corrects / size
return avg_loss, avg_accuracy
class LSTM(torch.nn.Module):
def __init__(self, vocab_size, input_size, n_labels, hidden_size, num_layers = 1, batch_first=True):
super(LSTM, self).__init__()
self.vocab_size = vocab_size # 46159
self.input_size = input_size # 128
self.hidden_size = hidden_size # 256
self.num_layers = num_layers # 2
# Embedding layer
self.embedding_layer = torch.nn.Embedding(num_embeddings=self.vocab_size,embedding_dim=self.input_size)
# Embedding(46159, 128)
# LSTM layer
self.lstm_layer = torch.nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers =self.num_layers, batch_first=True)
# LSTM(128, 256, num_layers=2, batch_first=True)
# linear layer
self.linear = torch.nn.Linear( hidden_size, n_labels)
# Linear(in_features=256, out_features=2, bias=True)
def forward(self, x):
#1. Embedding layer
#size of the data: (batch_size, lenth of the sequence) -> (batch_size, lenth of the sequence, dimension of embedding)
y = self.embedding_layer(x) # y torch.Size([64, 727, 128])
#Initial hidden state
h_0 = torch.zeros((self.num_layers, y.shape[0], self.hidden_size)).to(DEVICE)# h_0 torch.Size([2, 64, 256])
#Initial cell state
c_0 = torch.zeros((self.num_layers, y.shape[0], self.hidden_size)).to(DEVICE)# c_0 torch.Size([2, 64, 256])
#2. LSTM layer
# size of the data: (batch_size, lenth of the sequence, dimension of embedding) -> h_n, c_0 = (num_layers, batch size, hidden_size)
hidden_states, (h_n , c_n) = self.lstm_layer(y, (h_0, c_0)) # hidden_states torch.Size([64, 727, 256]) h_n torch.Size([2, 64, 256]) c_n torch.Size([2, 64, 256])
h_t = hidden_states[:,-1,:] # h_t torch.Size([64, 256]). ## Only need last one from sequence! Look at the dimension of h_t
#3. Linear layer
# size of the data: (batch_size, hidden_size) -> (batch_size, n_labels)
result = self.linear(h_t) # result torch.Size([64, 2])
#Size of the return value: (batch_size, n_labels)
return result
model = LSTM(vocab_size=vocab_size, input_size=128,num_layers = 1, n_labels=2, hidden_size=256).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
for e in range(1, epochs+1):
train(model, optimizer, train_iter)
val_loss, val_accuracy = evaluate(model, val_iter)
print(f"Epoch: {e}, Loss of validation: {val_loss} Accuracy of validation: {val_accuracy}")
class LSTMCell(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super(LSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.xh = nn.Linear(input_size, hidden_size * 4, bias=bias)
self.hh = nn.Linear(hidden_size, hidden_size * 4, bias=bias)
self.reset_parameters()
def reset_parameters(self):
std = 1.0 / np.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x, hx, cx):
gates = self.xh(input) + self.hh(hx)
input_gate, forget_gate, cell_gate, output_gate = gates.chunk(4, 1)
i_t = torch.sigmoid(input_gate)
f_t = torch.sigmoid(forget_gate)
g_t = torch.tanh(cell_gate)
o_t = torch.sigmoid(output_gate)
ct = cx * f_t + i_t * g_t
ht = o_t * torch.tanh(ct)
return (ht, ct)
class GRUCell(nn.Module):
def __init__(self, input_size, hidden_size, bias=True):
super(GRUCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.bias = bias
self.x2h = nn.Linear(input_size, 3 * hidden_size, bias=bias)
self.h2h = nn.Linear(hidden_size, 3 * hidden_size, bias=bias)
self.reset_parameters()
def reset_parameters(self):
std = 1.0 / np.sqrt(self.hidden_size)
for w in self.parameters():
w.data.uniform_(-std, std)
def forward(self, x, hx):
x_t = self.x2h(x)
h_t = self.h2h(hx)
x_reset, x_upd, x_new = x_t.chunk(3, 1)
h_reset, h_upd, h_new = h_t.chunk(3, 1)
reset_gate = torch.sigmoid(x_reset + h_reset)
update_gate = torch.sigmoid(x_upd + h_upd)
candidate_hidden_state = torch.tanh(x_new + (reset_gate * h_new))
hy = update_gate * hx + (1 - update_gate) * candidate_hidden_state
return hy