[1] Download the dataset
import requests
url = 'https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json'
filename = 'sarcasm.json'
response = requests.get(url)
with open(filename, 'wb') as f:
f.write(response.content)
import json
with open('./sarcasm.json', 'r') as f:
dataset = json.load(f)
sentences = []
labels = []
for item in dataset:
sentences.append(item['headline'])
labels.append(item['is_sarcastic'])
[2] Split the dataset
training_size = 20000
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]
[3] Data Preprocessing
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(training_sentences)
word_index= tokenizer.word_index
train_sequences = tokenizer.texts_to_sequences(training_sentences)
padded_train = pad_sequences(train_sequences, padding= 'post', truncating='post', maxlen=120)
test_sequences = tokenizer.texts_to_sequences(testing_sentences)
padded_test = pad_sequences(test_sequences, padding= 'post', truncating='post', maxlen=120)
train_labels = np.array(training_labels)
test_labels = np.array(testing_labels)
[4] Build and Compile the model
import tensorflow as tf
model = tf.keras.models.Sequential([
tf.keras.layers.Embedding(10000, 16, input_length=120),
tf.keras.layers.Conv1D(filters=128, kernel_size=5, activation='relu'),
tf.keras.layers.GlobalMaxPooling1D(),
tf.keras.layers.Dense(6, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',
metrics=['accuracy'],
optimizer='adam'
)
model.summary()
[5] Train the model
history = model.fit(padded_train, train_labels,
epochs=10,
validation_data = (padded_test, test_labels))
import matplotlib.pyplot as plt
# Plot Utility
def plot_graphs(history, string):
plt.plot(history.history[string])
plt.plot(history.history['val_'+string])
plt.xlabel("Epochs")
plt.ylabel(string)
plt.legend([string, 'val_'+string])
plt.show()
# Plot the accuracy and loss history
plot_graphs(history, 'accuracy')
plot_graphs(history, 'loss')