Advanced Tensorflow - A complete guide

Easy lookup of syntax

3 min readApr 18, 2020

Getting started

# Getting started tf.data.Dataset
    .from_tensor_slice 
        .element_spec
    .TfRecordDataset
    .list_files                                 # [paths,.] for data
    .range
    .zip.shuffle(buffer_zize=100)               # zip to bind x,y
            .repeat() or repeat(3).shuffle()
            .batch(5, drop_remainder=True)
    # shuffle and repeat doesnt report the end of the dataset
    .apply(tf.data.experimental.shuffle_and_repeat(buffer_size=bs))
            .batch
            .prefetchit = iter(batched_dataset)
for batch in batched_dataset.take(5):           # top 5
    print([arr.numpy() for array in batch])for elem in dataset:                            # iteration
    print(elem.numpy())it = iter(dataset)                              # iteration
print(next(it).numpy())tf.random.uniform()
np.expand_dims                                  # broadcasting
np.repeat
3D tensor - tensor[:,:,:]

Padding

# Paddingdataset = dataset.map(lambda x: tf.fill([tf.cast(x,tf.float32)],x))
dataset.padded_batch(5,padded_shapes=(None,))   # batch = 5

Training Workflows

# Training Workflowsdataset.repeat(3).batch(128)                # repeats dataset thrice
dataset.batch(128).repeat(3)for epoch in epochs:
   for batch in dataset.batches(128):
       print(batch.shape)

Data normalization

# Data normalizationdataset.map(function)
tf.io.read_file(filename)
tf.image.decode_jpeg
        .convert_image_dtype
        .resize
        img /= 255.0

Keras API

# Keras APItf.keras.Sequential([
          tf.keras.layers.Flatten(),
          tf.keras.layers.Dense(10,activation='Softmax')
        ])
             .compile(optimizer='adam',
               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
               metrics=['accuracy'])
        .fit(dataset.repeat(),epochs=2,steps_per_epoch=20)
        .evaluate
        .predict# can send labels as well for predict dataset, but it is ignored

Estimator API

# Estimator API
# pass input_fn & framework takes care of consuming its elementsimport tensorflow_datasets as tfdsdef train_input_fn():
    dataset = tf.data.experimental.make_csv_dataset(
 file,batch_size=32,label_name='survived',na_value='?',num_epochs=2)
    return dataset_batches = (dataset.cache().repeat().shuffle()
                          .prefetch(tf.data.experimental.AUTOTUNE))embark = tf.feature_column.categorical_column_with_hash_bucket('em_town',32)
cls = tf.feature_column.categorical_column_with_vocabulary_list('class',['fisrt','second','third'])
age = tf.feature_column.numeric_column('age')import tempfile
model_dir = tempfile.mkdtemp()
model = tf.estimator.LinearClassifier(
  model_dir=model_dir, feature_columns=[embark,cls,age],n_classes=2)model.train(input_fn=train_input_fn, steps=100)
     .evaluate(input_fn=train_input_fn, steps=10)
     .predict(train_input_fn)# packing all columnsdef pack(features,label):
    return tf.stack(list(features.values()),axis=-1), labeldataset.map(pack)classifier = tf.estimator.LinearClassifier
                         .DNNClassifier
                         .DNNLinearCombinedClassifier# Premade Estimators
classifier.train(
input_fn=lambda: input_fn(train, train_y, training=True),steps=5000)classifier.evaluate(
input_fn=lambda: input_fn(test, test_y, training=False))classifier.predict(input_fn=lambda: input_fn(predict_x))

tf.feature_columns.indicator_column(categorical_col)
tf.keras.applications.MobileNetV2(input_shape=(,,),include_top=Fals)

Text data

# Text datatokenize = tfds.features.text.Tokenizer()
vocab_set = set()
for text_tensor in all_labelled_data:
     tokens = tokenizer.tokenize(text_tensor.numpy())
     vocab_set.update(tokens)encoder = tfds.features.text.TokenTextEncoder(vocab_set)
encoder.encode(example_text)def encode(text_tensor, label):
     encoded_text = encoder.encode(text_tensor.numpy())
     return encoded_text, labeldef encode_map_fn(text, label):
     return tf.py_function(encode,inp=[text, label],
                            Tout=(tf.int64, tf.int64))text_data.padded_batch(BATCH_SIZE,padded_shape=([-1],[]))

# Process unicode strings (text + emoji or chinese text!)tf.constant(u'')                                  # string scalar
tf.constant(u''.encode('UTF-16-BE'))              # int32 vector
tf.constant([ord(char) for char in u''])tf.strings.unicode_decode(text_utf8,input_encoding='UTF-8')
          .unicode_encode(text_chars,output_encoding='UTF-8')
          .unicode_transcode(text_utf8,
                             input_encoding='UTF8',
                             output_encoding='UTF-16-BE')# ragged tensor by .unicode_decode for multiple strings u'Hello',u'Gøødnight',u'😊'
# Convert Ragged tensor to to_tensor() and to_sparse() and backtf.strings.unicode_encode(
    tf.RaggedTensor.from_tensor(batch_chars_padded, padding=-1),
    output_encoding='UTF-8')

dataset.dropna()
col = dataset.pop('col_name')
# convert numerical to categorical if needed 
# classes (0,1,2) => add three more columns with 0/1

Visualization

sns.pairplot

Early stopping

# Early stopping - when validation loss is increasing and training loss is decreasing, it is overfitting. Hence use early_stoppingearly_stop = keras.callbacks.EarlyStopping(monitor='val_loss',patience=10)
model.fit(.., callbacks=[early_stop,PrintDot()])

Feature columns

# Feature columnsfrom tensorflow import feature_column
col = feature_column.numeric_column('col_name')
feature_column.bucketized_column(col, boundaries=[20,30,40,50,80])
# it return one hot encoded values
'''
[[0,0,0,0,1],
[0,0,0,1,0],...]
'''
categ_col = feature_column.categorical_column_with_vocabulary_list(w,['cl1',''])
one_hot = feature_column.indicator_column(categ_col)feature_column.embedding_column(word, dimension=8)       # w = word
       .categorical_column_with_hash_bucket(w,hash_bucket_size=1000)
       .indicator_column(categ_col)# Feature crossing - cross of two/more columns
feature_column.crossed_column([col1,col2],hash_bucket_size=1000)
              .indicator_column(crossed_feature)

Save and Restore models

# Save and Restore modelsckpt_path = os.path.dirname("checkpoints/cp-{epoch:04d}.ckpt")tf.keras.callbacks.ModelCheckpoint(ckpt_path,save_weights_only=True,verbose=1,period=5)
model.load_weights(ckpt_path)
     .save_weights(ckpt_path.format(epoch=0))
     .fit
model.save('model.hd5')
model.load_model('model.hd5')
tf.train.latest_checkpoint(ckpt_path)

Transfer Learning

# Transfer Learning# Feature Generation / Feature Extraction
base_model.trainable = False                           # freeze base
new_l = tf.keras.layers.GlobalAveragePooling2D()
pred_layer = keras.layers.Dense(1)                     # cl'n head
tf.keras.Sequential([base_model,new_l,pred_l])# Fine Tuning
# Ex: Have cat-dog classifying model but need to train on M/c parts
base_model.trainable = True
fine_tune_at = 100
for layer in base_model.layers[:fine_tune_at]:
     layer.trainable = False
fine_tune_epoch = 10
total_epochs = initial_epoch + fine_tune_epoch
model.fit(train_batches, epochs=total_epochs,
    inital_epochs=inital_epochs, validation_data=validation_batches)

tf hub

# tf hubimport tensorflow_hub as hubembeddng="https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub.KerasLayer(embeddng, input_shape = [],
                dtype=tf.string, trainable=True)

Example

# Examplenum_classes = 10

x = tf.placeholder(tf.float32, [None,img_size_flat])
y_true = tf.placeholder(tf.float32, [None,num_classes])
y_true_class = tf.placeholder(tf.int64, [None])

weights = tf.Variable(tf.zeros([img_size_flat,num_classes]))
biases = tf.Variable(tf.zeros([num_classes]))

logits = tf.matmul(x, weights) + biases
# logits = tf.contrib.layers.fully_connected(img_flat,62,tf.nn.relu)
# x = tf.nn.conv2d(x,W,strides=[1,strides,strides,1],padding='SAME')
# x = tf.nn.bias_add(x, b)
# x = tf.nn.relu(x)
# x = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
# x = tf.layers.dropout(0.2)
# nn = tf.layers.dense(x, 3, activation=tf.nn.sigmoid)

y_pred = tf.nn.softmax(logits)

y_pred_class = tf.argmax(y_pred,dimension=1)

cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y_true)
cost = tf.reduce_mean(cross_entropy_loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

correct_prediction = tf.equal(y_pred_class, y_true_class)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(epochs):
        # _, loss_value = sess.run([train_op, loss], feed_dict={x: images28, y: labels})
        output = sess.run(y_pred_class,feed_dict={x:[np.zeros((23*23))]})
        saver = tf.train.Saver()
        saver.save(sess, 'save_model/my-test-model')
    print(output)

Advanced Tensorflow - A complete guide

Easy lookup of syntax

Getting started

Padding

Training Workflows

Data normalization

Keras API

Estimator API

Text data

Visualization

Early stopping

Feature columns

Save and Restore models

Transfer Learning

tf hub

Example

Written by Amulya Reddy Konda