Advanced Tensorflow - A complete guide

Easy lookup of syntax

Amulya Reddy Konda
3 min readApr 18, 2020

Getting started

# Getting started tf.data.Dataset
.from_tensor_slice
.element_spec
.TfRecordDataset
.list_files # [paths,.] for data
.range
.zip.shuffle(buffer_zize=100) # zip to bind x,y
.repeat() or repeat(3).shuffle()
.batch(5, drop_remainder=True)
# shuffle and repeat doesnt report the end of the dataset
.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=bs))
.batch
.prefetch
it = iter(batched_dataset)
for batch in batched_dataset.take(5): # top 5
print([arr.numpy() for array in batch])
for elem in dataset: # iteration
print(elem.numpy())
it = iter(dataset) # iteration
print(next(it).numpy())
tf.random.uniform()
np.expand_dims # broadcasting
np.repeat
3D tensor - tensor[:,:,:]

Padding

# Paddingdataset = dataset.map(lambda x: tf.fill([tf.cast(x,tf.float32)],x))
dataset.padded_batch(5,padded_shapes=(None,)) # batch = 5

Training Workflows

# Training Workflowsdataset.repeat(3).batch(128)                # repeats dataset thrice
dataset.batch(128).repeat(3)
for epoch in epochs:
for batch in dataset.batches(128):
print(batch.shape)

Data normalization

# Data normalizationdataset.map(function)
tf.io.read_file(filename)
tf.image.decode_jpeg
.convert_image_dtype
.resize
img /= 255.0

Keras API

# Keras APItf.keras.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(10,activation='Softmax')
])
.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
.fit(dataset.repeat(),epochs=2,steps_per_epoch=20)
.evaluate
.predict
# can send labels as well for predict dataset, but it is ignored

Estimator API

# Estimator API
# pass input_fn & framework takes care of consuming its elements
import tensorflow_datasets as tfdsdef train_input_fn():
dataset = tf.data.experimental.make_csv_dataset(
file,batch_size=32,label_name='survived',na_value='?',num_epochs=2)
return dataset_batches = (dataset.cache().repeat().shuffle()
.prefetch(tf.data.experimental.AUTOTUNE))
embark = tf.feature_column.categorical_column_with_hash_bucket('em_town',32)
cls = tf.feature_column.categorical_column_with_vocabulary_list('class',['fisrt','second','third'])
age = tf.feature_column.numeric_column('age')
import tempfile
model_dir = tempfile.mkdtemp()
model = tf.estimator.LinearClassifier(
model_dir=model_dir, feature_columns=[embark,cls,age],n_classes=2)
model.train(input_fn=train_input_fn, steps=100)
.evaluate(input_fn=train_input_fn, steps=10)
.predict(train_input_fn)
# packing all columnsdef pack(features,label):
return tf.stack(list(features.values()),axis=-1), labeldataset.map(pack)
classifier = tf.estimator.LinearClassifier
.DNNClassifier
.DNNLinearCombinedClassifier
# Premade Estimators
classifier.train(
input_fn=lambda: input_fn(train, train_y, training=True),steps=5000)
classifier.evaluate(
input_fn=lambda: input_fn(test, test_y, training=False))
classifier.predict(input_fn=lambda: input_fn(predict_x))
tf.feature_columns.indicator_column(categorical_col)
tf.keras.applications.MobileNetV2(input_shape=(,,),include_top=Fals)

Text data

# Text datatokenize = tfds.features.text.Tokenizer()
vocab_set = set()
for text_tensor in all_labelled_data:
tokens = tokenizer.tokenize(text_tensor.numpy())
vocab_set.update(tokens)
encoder = tfds.features.text.TokenTextEncoder(vocab_set)
encoder.encode(example_text)
def encode(text_tensor, label):
encoded_text = encoder.encode(text_tensor.numpy())
return encoded_text, label
def encode_map_fn(text, label):
return tf.py_function(encode,inp=[text, label],
Tout=(tf.int64, tf.int64))
text_data.padded_batch(BATCH_SIZE,padded_shape=([-1],[]))
# Process unicode strings (text + emoji or chinese text!)tf.constant(u'')                                  # string scalar
tf.constant(u''.encode('UTF-16-BE')) # int32 vector
tf.constant([ord(char) for char in u''])
tf.strings.unicode_decode(text_utf8,input_encoding='UTF-8')
.unicode_encode(text_chars,output_encoding='UTF-8')
.unicode_transcode(text_utf8,
input_encoding='UTF8',
output_encoding='UTF-16-BE')
# ragged tensor by .unicode_decode for multiple strings u'Hello',u'Gøødnight',u'😊'
# Convert Ragged tensor to to_tensor() and to_sparse() and back
tf.strings.unicode_encode(
tf.RaggedTensor.from_tensor(batch_chars_padded, padding=-1),
output_encoding='UTF-8')
dataset.dropna()
col = dataset.pop('col_name')
# convert numerical to categorical if needed
# classes (0,1,2) => add three more columns with 0/1

Visualization

sns.pairplot

Early stopping

# Early stopping - when validation loss is increasing and training loss is decreasing, it is overfitting. Hence use early_stoppingearly_stop = keras.callbacks.EarlyStopping(monitor='val_loss',patience=10)
model.fit(.., callbacks=[early_stop,PrintDot()])

Feature columns

# Feature columnsfrom tensorflow import feature_column
col = feature_column.numeric_column('col_name')
feature_column.bucketized_column(col, boundaries=[20,30,40,50,80])
# it return one hot encoded values
'''
[[0,0,0,0,1],
[0,0,0,1,0],...]
'''
categ_col = feature_column.categorical_column_with_vocabulary_list(w,['cl1',''])
one_hot = feature_column.indicator_column(categ_col)
feature_column.embedding_column(word, dimension=8) # w = word
.categorical_column_with_hash_bucket(w,hash_bucket_size=1000)
.indicator_column(categ_col)
# Feature crossing - cross of two/more columns
feature_column.crossed_column([col1,col2],hash_bucket_size=1000)
.indicator_column(crossed_feature)

Save and Restore models

# Save and Restore modelsckpt_path = os.path.dirname("checkpoints/cp-{epoch:04d}.ckpt")tf.keras.callbacks.ModelCheckpoint(ckpt_path,save_weights_only=True,verbose=1,period=5)
model.load_weights(ckpt_path)
.save_weights(ckpt_path.format(epoch=0))
.fit
model.save('model.hd5')
model.load_model('model.hd5')
tf.train.latest_checkpoint(ckpt_path)

Transfer Learning

# Transfer Learning# Feature Generation / Feature Extraction
base_model.trainable = False # freeze base
new_l = tf.keras.layers.GlobalAveragePooling2D()
pred_layer = keras.layers.Dense(1) # cl'n head
tf.keras.Sequential([base_model,new_l,pred_l])
# Fine Tuning
# Ex: Have cat-dog classifying model but need to train on M/c parts
base_model.trainable = True
fine_tune_at = 100
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
fine_tune_epoch = 10
total_epochs = initial_epoch + fine_tune_epoch
model.fit(train_batches, epochs=total_epochs,
inital_epochs=inital_epochs, validation_data=validation_batches)

tf hub

# tf hubimport tensorflow_hub as hubembeddng="https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub.KerasLayer(embeddng, input_shape = [],
dtype=tf.string, trainable=True)

Example

# Examplenum_classes = 10

x = tf.placeholder(tf.float32, [None,img_size_flat])
y_true = tf.placeholder(tf.float32, [None,num_classes])
y_true_class = tf.placeholder(tf.int64, [None])

weights = tf.Variable(tf.zeros([img_size_flat,num_classes]))
biases = tf.Variable(tf.zeros([num_classes]))

logits = tf.matmul(x, weights) + biases
# logits = tf.contrib.layers.fully_connected(img_flat,62,tf.nn.relu)
# x = tf.nn.conv2d(x,W,strides=[1,strides,strides,1],padding='SAME')
# x = tf.nn.bias_add(x, b)
# x = tf.nn.relu(x)
# x = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
# x = tf.layers.dropout(0.2)
# nn = tf.layers.dense(x, 3, activation=tf.nn.sigmoid)

y_pred = tf.nn.softmax(logits)

y_pred_class = tf.argmax(y_pred,dimension=1)

cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y_true)
cost = tf.reduce_mean(cross_entropy_loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

correct_prediction = tf.equal(y_pred_class, y_true_class)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
# _, loss_value = sess.run([train_op, loss], feed_dict={x: images28, y: labels})
output = sess.run(y_pred_class,feed_dict={x:[np.zeros((23*23))]})
saver = tf.train.Saver()
saver.save(sess, 'save_model/my-test-model')
print(output)

--

--