TensorFlow | Stock Price Prediction With TensorFlow Estimator

Code snippets for this post is written with TensorFlow2.0. This post is intended to provide information on Tensorflow canned estimators, using this in live trading is not advised.

TensorFlow provides many pre-made estimators that can be used to model and training, evaluation and inference. In this post we will use DNNRegressor for predicting stock close price.

  • Steps for end to end model training, evaluation and prediction with TensorFlow pre-made estimators
    • Get the data available data in CSV
    • Split data in train and evaluation sets
    • Create dataset with data available in CSV and define features and label
    • Create train and evaluate function
    • Create Serving input function for prediction
    • Define Run Config
    • Define model
    • Define Train and Eval spec
    • Create Exporter
    • Execute train_and_evaluate
    • Load the trained model for prediction and get predictions

  • Download the training and evaluation CSV from below links
  • Create dataset with data available in CSV and define features and label. Column name "close_price" is the label that our model will predict after training. Other columns are features.
  • 
    CSV_COLUMN_NAMES = ['close_price','open_price', 'prev_close','prev_low','prev_high','prev_wap' ]
    CSV_DEFAULTS = [[280.0], [280.0], [280.0],[280.0], [280.0], [280.0]]
    
    #returns features and label for single csv record
    def parse_row(row):
        fields = tf.io.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
        features = dict(zip(CSV_COLUMN_NAMES, fields))
        label = features.pop("close_price")
        return features, label
    
    # return dataset containing features and labels
    def read_dataset(csv_path):
        dataset = tf.data.TextLineDataset(filenames = csv_path).skip(count = 1) # skip header
        dataset = dataset.map(map_func = parse_row)
        return dataset
    
    

  • Create train and evaluate function
  • 
    def train_input_fn(csv_path, batch_size = 64):
        dataset = read_dataset(csv_path)
        dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
        return dataset
    
    # ".repeat" and ".shuffle" is not required for evaluation
    def eval_input_fn(csv_path, batch_size = 64):
        dataset = read_dataset(csv_path)
        dataset = dataset.batch(batch_size = batch_size)
        return dataset
    
    

  • Create Serving input function for prediction and create feature columns. Serving input function is required for getting prediction with exported model. Exported model would be in ".pb" format.
  • 
    FEATURE_NAMES = ['open_price', 'prev_close','prev_low','prev_high','prev_wap'] 
    
    '''
    defines type of feature , in this case all features are of numeric type
    '''
    feature_cols = [tf.feature_column.numeric_column(key = k) for k in FEATURE_NAMES]
    print(feature_cols)
    
    def serving_input_receiver_fn():
        receiver_tensors = {
            'open_price' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_close' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_low' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_high' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_wap' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
        }
        
        features = receiver_tensors 
        return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors) 
    
    

  • Define Run Config
  • 
    # directory for saving checkpoints and exported model
    OUTDIR = "trained"
    
    #Run config
    config = tf.estimator.RunConfig(
        model_dir = OUTDIR,
        tf_random_seed = 1,
        save_checkpoints_steps = 100
    )
    
    

  • Define model
  • 
    '''
    DNNRegressor having three hidden layers all are having 64 neurons
    '''
    model = tf.estimator.DNNRegressor(
      hidden_units = [64, 64, 64], 
      feature_columns = feature_cols, 
      config = config,
      batch_norm=False,
      dropout=None
      
    )
    
    

  • Define Train and Eval spec
  • 
    # provide path for training csv
    train_spec = tf.estimator.TrainSpec(
      input_fn = lambda: train_input_fn("./data_train.csv"),
      max_steps = 500
    )
    
    # provide path for evaluation csv
    eval_spec = tf.estimator.EvalSpec(
        input_fn = lambda: eval_input_fn("./data_eval.csv"),
        steps = None,
        start_delay_secs = 1,
        throttle_secs = 1,
        exporters = exporter,
    )
    
    
    

  • Create Exporter
  • 
    # this will save saved model inside "exporter" directory
    exporter = tf.estimator.FinalExporter(name = "exporter",
      serving_input_receiver_fn = serving_input_receiver_fn)
    
    

  • Execute train_and_evaluate
  • 
    # provide parameters to tf.estimator.train_and_evaluate
    tf.estimator.train_and_evaluate(estimator = model, 
      train_spec = train_spec, 
      eval_spec = eval_spec)
    
    

  • Load the trained model for prediction and get predictions. Save the below code in "get_predictions.py" in current directory and execute the code
  • 
    
    import tensorflow as tf 
    #replace dir_name with folder saved created inside /trained/export/exporter 
    loaded = tf.saved_model.load("./trained/export/exporter/{dir_name}")
    infer = loaded.signatures["predict"]  
    
    x = x.split(',')
    x[0] = float(x[0])
    x[1] = float(x[1])
    x[2] = float(x[2])
    x[3] = float(x[3])
    x[4] = float(x[4])
    
    print(x[0], x[1], x[2], x[3], x[4])
    predicted_value = infer(open_price=tf.constant(x[0], dtype=tf.float32), 
                prev_close=tf.constant(x[1], dtype=tf.float32),
                prev_low=tf.constant(x[2], dtype=tf.float32),
                prev_high=tf.constant(x[3], dtype=tf.float32),
                prev_wap=tf.constant(x[4], dtype=tf.float32))
    print(predicted_value)
    
    
    

  • Complete code for training and evaluation
  •  
    
    import tensorflow as tf
    
    print(tf.__version__)
    
    
    CSV_COLUMN_NAMES = ['close_price','open_price', 'prev_close','prev_low','prev_high','prev_wap' ]
    CSV_DEFAULTS = [[280.0], [280.0], [280.0],[280.0], [280.0], [280.0]]
    
    def parse_row(row):
        fields = tf.io.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
        features = dict(zip(CSV_COLUMN_NAMES, fields))
        label = features.pop("close_price")
        return features, label
    
    
    def read_dataset(csv_path):
        dataset = tf.data.TextLineDataset(filenames = csv_path).skip(count = 1) 
        dataset = dataset.map(map_func = parse_row)
        return dataset
    
    
    def train_input_fn(csv_path, batch_size = 64):
        dataset = read_dataset(csv_path)
        dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
        return dataset
    
    def eval_input_fn(csv_path, batch_size = 64):
        dataset = read_dataset(csv_path)
        dataset = dataset.batch(batch_size = batch_size)
        return dataset
    
    
    FEATURE_NAMES = ['open_price', 'prev_close','prev_low','prev_high','prev_wap'] 
    
    feature_cols = [tf.feature_column.numeric_column(key = k) for k in FEATURE_NAMES]
    print(feature_cols)
    
    def serving_input_receiver_fn():
        receiver_tensors = {
            'open_price' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_close' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_low' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_high' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
            'prev_wap' : tf.compat.v1.placeholder(dtype = tf.float32, shape = [None]),
        }
        
        features = receiver_tensors 
        return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors)
    
    
    
    OUTDIR = "trained"
    
    config = tf.estimator.RunConfig(
        model_dir = OUTDIR,
        tf_random_seed = 1,
        save_checkpoints_steps = 100
    )
    
    model = tf.estimator.DNNRegressor(
      hidden_units = [64, 128, 64], 
      feature_columns = feature_cols, 
      config = config,
      batch_norm=False,
      dropout=None
      
    )
    print(model)
    
    train_spec = tf.estimator.TrainSpec(
        input_fn = lambda: train_input_fn("./data_train.csv"),
        max_steps = 500
    )
    
    exporter = tf.estimator.FinalExporter(name = "exporter",
                            serving_input_receiver_fn = serving_input_receiver_fn)
    
    
    eval_spec = tf.estimator.EvalSpec(
        input_fn = lambda: eval_input_fn("./data_eval.csv"),
        steps = None,
        start_delay_secs = 1,
        throttle_secs = 1,
        exporters = exporter,
    )
    
    
    
    
    print(tf.estimator.train_and_evaluate(estimator = model, 
                                    train_spec = train_spec, 
                                    eval_spec = eval_spec))
    
    
    
      
      

  • Complete code for prediction
  • 
    
    import tensorflow as tf 
    #replace dir_name with folder saved created inside /trained/export/exporter 
    loaded = tf.saved_model.load("./trained/export/exporter/1582614351")
    infer = loaded.signatures["predict"]  
    
    # feature columns for getting value of label that is "close_price"
    x = "265.0,265.35,265.0,272.0,268.1108707587025"
    
    x = x.split(',')
    x[0] = float(x[0])
    x[1] = float(x[1])
    x[2] = float(x[2])
    x[3] = float(x[3])
    x[4] = float(x[4])
    
    print(x[0], x[1], x[2], x[3], x[4])
    predicted_value = infer(open_price=tf.constant(x[0], dtype=tf.float32), 
                prev_close=tf.constant(x[1], dtype=tf.float32),
                prev_low=tf.constant(x[2], dtype=tf.float32),
                prev_high=tf.constant(x[3], dtype=tf.float32),
                prev_wap=tf.constant(x[4], dtype=tf.float32))
    print(predicted_value)
    
    

    Category: TensorFlow