Skip to content

Commit

Permalink
Add file
Browse files Browse the repository at this point in the history
  • Loading branch information
susanli2016 authored Jun 18, 2018
1 parent 438ee89 commit 73f0c98
Showing 1 changed file with 103 additions and 0 deletions.
103 changes: 103 additions & 0 deletions project_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import numpy as np
from keras.losses import sparse_categorical_crossentropy
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical


def _test_model(model, input_shape, output_sequence_length, french_vocab_size):
if isinstance(model, Sequential):
model = model.model

assert model.input_shape == (None, *input_shape[1:]),\
'Wrong input shape. Found input shape {} using parameter input_shape={}'.format(model.input_shape, input_shape)

assert model.output_shape == (None, output_sequence_length, french_vocab_size),\
'Wrong output shape. Found output shape {} using parameters output_sequence_length={} and french_vocab_size={}'\
.format(model.output_shape, output_sequence_length, french_vocab_size)

assert len(model.loss_functions) > 0,\
'No loss function set. Apply the `compile` function to the model.'

assert sparse_categorical_crossentropy in model.loss_functions,\
'Not using `sparse_categorical_crossentropy` function for loss.'


def test_tokenize(tokenize):
sentences = [
'The quick brown fox jumps over the lazy dog .',
'By Jove , my quick study of lexicography won a prize .',
'This is a short sentence .']
tokenized_sentences, tokenizer = tokenize(sentences)
assert tokenized_sentences == tokenizer.texts_to_sequences(sentences),\
'Tokenizer returned and doesn\'t generate the same sentences as the tokenized sentences returned. '


def test_pad(pad):
tokens = [
[i for i in range(4)],
[i for i in range(6)],
[i for i in range(3)]]
padded_tokens = pad(tokens)
padding_id = padded_tokens[0][-1]
true_padded_tokens = np.array([
[i for i in range(4)] + [padding_id]*2,
[i for i in range(6)],
[i for i in range(3)] + [padding_id]*3])
assert isinstance(padded_tokens, np.ndarray),\
'Pad returned the wrong type. Found {} type, expected numpy array type.'
assert np.all(padded_tokens == true_padded_tokens), 'Pad returned the wrong results.'

padded_tokens_using_length = pad(tokens, 9)
assert np.all(padded_tokens_using_length == np.concatenate((true_padded_tokens, np.full((3, 3), padding_id)), axis=1)),\
'Using length argument return incorrect results'


def test_simple_model(simple_model):
input_shape = (137861, 21, 1)
output_sequence_length = 21
english_vocab_size = 199
french_vocab_size = 344

model = simple_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size)
_test_model(model, input_shape, output_sequence_length, french_vocab_size)


def test_embed_model(embed_model):
input_shape = (137861, 21)
output_sequence_length = 21
english_vocab_size = 199
french_vocab_size = 344

model = embed_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size)
_test_model(model, input_shape, output_sequence_length, french_vocab_size)


def test_encdec_model(encdec_model):
input_shape = (137861, 15, 1)
output_sequence_length = 21
english_vocab_size = 199
french_vocab_size = 344

model = encdec_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size)
_test_model(model, input_shape, output_sequence_length, french_vocab_size)


def test_bd_model(bd_model):
input_shape = (137861, 21, 1)
output_sequence_length = 21
english_vocab_size = 199
french_vocab_size = 344

model = bd_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size)
_test_model(model, input_shape, output_sequence_length, french_vocab_size)


def test_model_final(model_final):
input_shape = (137861, 15)
output_sequence_length = 21
english_vocab_size = 199
french_vocab_size = 344

model = model_final(input_shape, output_sequence_length, english_vocab_size, french_vocab_size)
_test_model(model, input_shape, output_sequence_length, french_vocab_size)

0 comments on commit 73f0c98

Please sign in to comment.