skip_thoughts/train.py

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Changes by Babylon Partners
#   - Added
#       --run_dir
#       --pretrained_word_emb_file
#       --word_emb_trainable
#       --skipgram_encoder
#       --decoder
#       --normalise_decoder_losses
#       --skipgram_prefactor
#       --sequence_prefactor
# ==============================================================================
"""Train the skip-thoughts model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import cPickle
import json
import os
import time

import tensorflow as tf

from skip_thoughts import configuration
from skip_thoughts import experiments
from skip_thoughts import skip_thoughts_model

FLAGS = tf.flags.FLAGS

# Data
tf.flags.DEFINE_string("input_file_pattern", None,
                       "File pattern of sharded TFRecord files containing "
                       "tf.Example protos.")

# Training dir
tf.flags.DEFINE_string("run_dir", None,
                       "Directory where all of the runs are.")
tf.flags.DEFINE_string("train_dir", None,
                       "Directory for training. Overwrites autogenerated "
                       "train_dir.")

# Vocabulary config
tf.flags.DEFINE_integer("vocab_size", 20000,
                        "Size of the vocabulary")


# Word embedding config
tf.flags.DEFINE_integer("word_dim", 620,
                        "Dimensionality of the word embeddings")
tf.flags.DEFINE_string("pretrained_word_emb_file", None,
                       "File containing pre-trained word embeddings,"
                       "such as word2vec")
tf.flags.DEFINE_bool("word_emb_trainable", False,
                     "Whether pre-trained word embeddings are"
                     "jointly trainable with the model.")

# Encoder config
tf.flags.DEFINE_integer("encoder_dim", 2400,
                        "The number of units to use in encoder and decoder"
                        "rnn cells.")
tf.flags.DEFINE_bool("skipgram_encoder", False,
                     "Whether to use a skipgram encoder (sum of embeddings)"
                     "instead of sequence encoder (RNN)")

# Decoder config
tf.flags.DEFINE_string("decoder", None,
                       "Decoder specification in SEQxSKGy format, "
                       "where x, y can be 0, 1, 2, and 3."
                       "SEQ stands for sequence (recurrent) decoder "
                       "and SKG stands for bag-of-words (BOW) decoder."
                       "0 - no decoder of this type is present"
                       "1 - decoder for the current sentence (Autoencoder)"
                       "2 - decoders for the previous and next sentences "
                       "(Skip-Though/FastSent style)"
                       "3 - decoders for previous, current, and next sentences"
                       "(Skip-Thought + Autoencoder)"
                       "Note that it is possible to combine SEQ and SKG")
tf.flags.DEFINE_bool("share_weights_logits", True,
                     "Whether to tie the weights in the output layer of the "
                     "decoder")
tf.flags.DEFINE_bool("normalise_decoder_losses", False,
                     "Whether to normalise the losses of the decoders. If "
                     "True, this divides each sequence loss by the number"
                     "of sequence decoders, and the skipgram "
                     "decoder losses by the number of skipgram decoders.")
tf.flags.DEFINE_float("skipgram_prefactor", 1.,
                      "Constant to multiply each skipgram loss with.")
tf.flags.DEFINE_float("sequence_prefactor", 1.,
                      "Constant to multiply each SEQ loss with.")

# Training config
tf.flags.DEFINE_integer("number_of_steps", 500000,
                        "The number of steps to take.")
tf.flags.DEFINE_float("gpu_fraction", 1.0,
                      "What fraction of the gpu to use")
tf.flags.DEFINE_integer("batch_size", 128,
                        "Batch size")

tf.logging.set_verbosity(tf.logging.INFO)


def _setup_learning_rate(config, global_step):
  """Sets up the learning rate with optional exponential decay.

  Args:
    config: Object containing learning rate configuration parameters.
    global_step: Tensor; the global step.

  Returns:
    learning_rate: Tensor; the learning rate with exponential decay.
  """
  if config.learning_rate_decay_factor > 0:
    learning_rate = tf.train.exponential_decay(
      learning_rate=float(config.learning_rate),
      global_step=global_step,
      decay_steps=config.learning_rate_decay_steps,
      decay_rate=config.learning_rate_decay_factor,
      staircase=False)
  else:
    learning_rate = tf.constant(config.learning_rate)
  return learning_rate


def write_config(train_dir, flags):
  flags_path = os.path.join(train_dir, 'flags.pkl')
  configs_pkl_path = os.path.join(train_dir, 'config.pkl')
  configs_pkl_json = os.path.join(train_dir, 'config.json')

  tf.logging.info("Writing out flags to {p}.".format(
    p=flags_path))
  with open(flags_path, 'w') as f:
    cPickle.dump(flags, f)

  tf.logging.info("Writing out config dict to {p}.".format(
    p=configs_pkl_path))
  with open(configs_pkl_path, 'w') as f:
    cPickle.dump(flags.__flags, f)

  tf.logging.info("Writing out config json to {p}.".format(
    p=configs_pkl_json))
  with open(configs_pkl_json, 'w') as f:
    json.dump(flags.__flags, f)

  return -1


def main(unused_argv):
  if not FLAGS.input_file_pattern:
    raise ValueError("--input_file_pattern is required.")
  if not FLAGS.run_dir:
    raise ValueError("--run_dir is required.")
  if not FLAGS.decoder:
    raise ValueError("--decoder is required.")

  if not FLAGS.train_dir:
    train_dir = os.path.join(
      FLAGS.run_dir, 'run_{t}'.format(t=time.time()))
    tf.logging.info("No specified --train_dir. Creating {d}.".format(
      d=train_dir))
    os.makedirs(train_dir)

    write_config(train_dir=train_dir, flags=FLAGS)

  else:
    tf.logging.info("Specified --train_dir {d}; Not autocreating.".format(
      d=FLAGS.train_dir))
    train_dir = FLAGS.train_dir

  decoder_config = experiments.get_decoder_config(flags=FLAGS)
  model_config = configuration.model_config(
    input_file_pattern=FLAGS.input_file_pattern,
    vocab_size=FLAGS.vocab_size,
    batch_size=FLAGS.batch_size,
    word_embedding_dim=FLAGS.word_dim,
    pretrained_word_emb_file=FLAGS.pretrained_word_emb_file,
    word_emb_trainable=FLAGS.word_emb_trainable,
    encoder_dim=FLAGS.encoder_dim,
    skipgram_encoder=FLAGS.skipgram_encoder,
    sequence_decoder_pre=decoder_config.sequence_decoder_pre,
    sequence_decoder_cur=decoder_config.sequence_decoder_cur,
    sequence_decoder_post=decoder_config.sequence_decoder_post,
    skipgram_decoder_pre=decoder_config.skipgram_decoder_pre,
    skipgram_decoder_cur=decoder_config.skipgram_decoder_cur,
    skipgram_decoder_post=decoder_config.skipgram_decoder_post,
    share_weights_logits=FLAGS.share_weights_logits,
    normalise_decoder_losses=FLAGS.normalise_decoder_losses,
    skipgram_prefactor=FLAGS.skipgram_prefactor,
    sequence_prefactor=FLAGS.sequence_prefactor)
  training_config = configuration.training_config(
    number_of_steps=FLAGS.number_of_steps)

  tf.logging.info("Building training graph.")
  g = tf.Graph()
  with g.as_default():
    tf.set_random_seed(1234)
    model = skip_thoughts_model.SkipThoughtsModel(
      model_config, mode="train")
    model.build()

    learning_rate = _setup_learning_rate(
      training_config, model.global_step)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    train_tensor = tf.contrib.slim.learning.create_train_op(
      total_loss=model.total_loss,
      optimizer=optimizer,
      global_step=model.global_step,
      clip_gradient_norm=training_config.clip_gradient_norm,
      summarize_gradients=True,
      check_numerics=True)

    saver = tf.train.Saver()

  gpu_options = tf.GPUOptions(
    per_process_gpu_memory_fraction=FLAGS.gpu_fraction)

  tf.contrib.slim.learning.train(
    train_op=train_tensor,
    logdir=train_dir,
    graph=g,
    global_step=model.global_step,
    number_of_steps=training_config.number_of_steps,
    session_config=tf.ConfigProto(gpu_options=gpu_options),
    save_summaries_secs=training_config.save_summaries_secs,
    saver=saver,
    save_interval_secs=training_config.save_model_secs)


if __name__ == "__main__":
  tf.app.run()