์์ ๋ฌธ์ฅ
>i feel hungry
i eat lunch
now i feel happy
# ์ฒ๋ฆฌํด์ผ ํ ๋ฌธ์ฅ์ ํ์ด์ฌ ๋ฆฌ์คํธ์ ์ฎ๊ฒจ ๋ด์์ต๋๋ค.
sentences=['i feel hungry', 'i eat lunch', 'now i feel happy']
# ํ์ด์ฌ split() ๋ฉ์๋๋ฅผ ์ด์ฉํด ๋จ์ด ๋จ์๋ก ๋ฌธ์ฅ์ ์ชผ๊ฐ ๋ด
๋๋ค.
word_list = 'i feel hungry'.split()
print(word_list)
index_to_word={} # ๋น ๋์
๋๋ฆฌ๋ฅผ ๋ง๋ค์ด์
# ๋จ์ด๋ค์ ํ๋์ฉ ์ฑ์ ๋ด
๋๋ค. ์ฑ์ฐ๋ ์์๋ ์ผ๋จ ์์๋ก ํ์์ต๋๋ค. ๊ทธ๋ฌ๋ ์ฌ์ค ์์๋ ์ค์ํ์ง ์์ต๋๋ค.
# <BOS>, <PAD>, <UNK>๋ ๊ด๋ก์ ์ผ๋ก ๋์
๋๋ฆฌ ๋งจ ์์ ๋ฃ์ด์ค๋๋ค.
index_to_word[0]='<PAD>' # ํจ๋ฉ์ฉ ๋จ์ด
index_to_word[1]='<BOS>' # ๋ฌธ์ฅ์ ์์์ง์
index_to_word[2]='<UNK>' # ์ฌ์ ์ ์๋(Unknown) ๋จ์ด
index_to_word[3]='i'
index_to_word[4]='feel'
index_to_word[5]='hungry'
index_to_word[6]='eat'
index_to_word[7]='lunch'
index_to_word[8]='now'
index_to_word[9]='happy'
print(index_to_word)
# ํ
์คํธ๋ฐ์ดํฐ๋ฅผ ์ซ์๋ก ๋ฐ๊พธ๊ธฐ
word_to_index={word:index for index, word in index_to_word.items()}
print(word_to_index)
# ๋ฌธ์ฅ 1๊ฐ๋ฅผ ํ์ฉํ ๋์
๋๋ฆฌ์ ํจ๊ป ์ฃผ๋ฉด, ๋จ์ด ์ธ๋ฑ์ค ๋ฆฌ์คํธ๋ก ๋ณํํด ์ฃผ๋ ํจ์๋ฅผ ๋ง๋ค์ด ๋ด
์๋ค.
# ๋จ, ๋ชจ๋ ๋ฌธ์ฅ์ <BOS>๋ก ์์ํ๋ ๊ฒ์ผ๋ก ํฉ๋๋ค.
def get_encoded_sentence(sentence, word_to_index):
return [word_to_index['<BOS>']]+[word_to_index[word]
if word in word_to_index
else word_to_index['<UNK>']
for word in sentence.split()]
print(get_encoded_sentence('i eat lunch', word_to_index))
def get_encoded_sentences(sentences, word_to_index):
return [get_encoded_sentence(sentence, word_to_index)
for sentence in sentences]
# sentences=['i feel hungry', 'i eat lunch', 'now i feel happy'] ๊ฐ
# ์๋์ ๊ฐ์ด ๋ณํ๋ฉ๋๋ค.
encoded_sentences = get_encoded_sentences(sentences, word_to_index)
print(encoded_sentences)
def get_decoded_sentence(encoded_sentence, index_to_word):
return ' '.join(index_to_word[index]
if index in index_to_word
else '<UNK>'
for index in encoded_sentence[1:])
#[1:]๋ฅผ ํตํด <BOS>๋ฅผ ์ ์ธ
print(get_decoded_sentence([1, 3, 4, 5], index_to_word))
def get_decoded_sentences(encoded_sentences, index_to_word):
return [get_decoded_sentence(encoded_sentence, index_to_word)
for encoded_sentence in encoded_sentences]
# encoded_sentences=[[1, 3, 4, 5], [1, 3, 6, 7], [1, 8, 3, 4, 9]] ๊ฐ
# ์๋์ ๊ฐ์ด ๋ณํ๋ฉ๋๋ค.
print(get_decoded_sentences(encoded_sentences, index_to_word))
raw_inputs = tf.keras.preprocessing.sequence.pad_sequences(raw_inputs,
value=word_to_index['<PAD>'],
padding='post',
maxlen=5)
print(raw_inputs)
๊น์ฑํ ๊ต์์ ๋ชจ๋์ ๋ฅ๋ฌ๋ ๊ฐ์ข 12๊ฐ.RNN
vocab_size = 10 # ์ดํ ์ฌ์ ์ ํฌ๊ธฐ์
๋๋ค(10๊ฐ์ ๋จ์ด)
word_vector_dim = 4 # ๋จ์ด ํ๋๋ฅผ ํํํ๋ ์๋ฒ ๋ฉ ๋ฒกํฐ์ ์ฐจ์ ์์
๋๋ค.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, word_vector_dim, input_shape=(None,)))
model.add(tf.keras.layers.Conv1D(16, 7, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(5))
model.add(tf.keras.layers.Conv1D(16, 7, activation='relu'))
model.add(tf.keras.layers.GlobalMaxPooling1D())
model.add(tf.keras.layers.Dense(8, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # ์ต์ข
์ถ๋ ฅ์ ๊ธ์ /๋ถ์ ์ ๋ํ๋ด๋ 1dim ์
๋๋ค.
model.summary()
Model: "sequential_2"
Layer (type) Output Shape Param #
\=================================================================
embedding_4 (Embedding) (None, None, 4) 40
conv1d (Conv1D) (None, None, 16) 464
max_pooling1d (MaxPooling1D) (None, None, 16) 0
conv1d_1 (Conv1D) (None, None, 16) 1808
global_max_pooling1d (Global (None, 16) 0
dense_4 (Dense) (None, 8) 136
dense_5 (Dense) (None, 1) 9
\=================================================================
Total params: 2,457
Trainable params: 2,457
Non-trainable params: 0
vocab_size = 10 # ์ดํ ์ฌ์ ์ ํฌ๊ธฐ์
๋๋ค(10๊ฐ์ ๋จ์ด)
word_vector_dim = 4 # ๋จ์ด ํ๋๋ฅผ ํํํ๋ ์๋ฒ ๋ฉ ๋ฒกํฐ์ ์ฐจ์ ์์
๋๋ค.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, word_vector_dim, input_shape=(None,)))
model.add(tf.keras.layers.GlobalMaxPooling1D())
model.add(tf.keras.layers.Dense(8, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # ์ต์ข
์ถ๋ ฅ์ ๊ธ์ /๋ถ์ ์ ๋ํ๋ด๋ 1dim ์
๋๋ค.
model.summary()
Model: "sequential_3"
Layer (type) Output Shape Param #
\=================================================================
embedding_5 (Embedding) (None, None, 4) 40
global_max_pooling1d_1 (Glob (None, 4) 0
dense_6 (Dense) (None, 8) 40
dense_7 (Dense) (None, 1) 9
\=================================================================
Total params: 89
Trainable params: 89
Non-trainable params: 0
๊ฐ ๋ชจ๋ธ์ ํ๋ผ๋ฏธํฐ์ ๋ํ ์ดํด ํ์