diff --git a/In_Sample_Out_Sample.ipynb b/In_Sample_Out_Sample.ipynb
new file mode 100644
index 0000000..6ce9706
--- /dev/null
+++ b/In_Sample_Out_Sample.ipynb
@@ -0,0 +1,942 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "# In-Sample Evaluation and Out-of-Sample Evaluation"
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "In-sample: data is what you have. \n",
+ "Out-of-sample: is the data you do not have and want to forecast or estimate. \n",
+ "\n",
+ "A way to numerically determine how good the model fits on dataset.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Two important measures to determine the fit of a model: \n",
+ "Mean Squared Error (MSE) \n",
+ "R-squared (R^2)"
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
+ "# fix_yahoo_finance is used to fetch data \n",
+ "import fix_yahoo_finance as yf\n",
+ "yf.pdr_override()"
+ ],
+ "outputs": [],
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# input\n",
+ "symbol = 'AMD'\n",
+ "start = '2014-01-01'\n",
+ "end = '2018-08-27'\n",
+ "\n",
+ "# Read data \n",
+ "dataset = yf.download(symbol,start,end)\n",
+ "\n",
+ "# View columns \n",
+ "dataset.head()"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[*********************100%***********************] 1 of 1 downloaded\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "execution_count": 2,
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Open | \n",
+ " High | \n",
+ " Low | \n",
+ " Close | \n",
+ " Adj Close | \n",
+ " Volume | \n",
+ "
\n",
+ " \n",
+ " Date | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2014-01-02 | \n",
+ " 3.85 | \n",
+ " 3.98 | \n",
+ " 3.84 | \n",
+ " 3.95 | \n",
+ " 3.95 | \n",
+ " 20548400 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-03 | \n",
+ " 3.98 | \n",
+ " 4.00 | \n",
+ " 3.88 | \n",
+ " 4.00 | \n",
+ " 4.00 | \n",
+ " 22887200 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-06 | \n",
+ " 4.01 | \n",
+ " 4.18 | \n",
+ " 3.99 | \n",
+ " 4.13 | \n",
+ " 4.13 | \n",
+ " 42398300 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-07 | \n",
+ " 4.19 | \n",
+ " 4.25 | \n",
+ " 4.11 | \n",
+ " 4.18 | \n",
+ " 4.18 | \n",
+ " 42932100 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-08 | \n",
+ " 4.23 | \n",
+ " 4.26 | \n",
+ " 4.14 | \n",
+ " 4.18 | \n",
+ " 4.18 | \n",
+ " 30678700 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Open High Low Close Adj Close Volume\n",
+ "Date \n",
+ "2014-01-02 3.85 3.98 3.84 3.95 3.95 20548400\n",
+ "2014-01-03 3.98 4.00 3.88 4.00 4.00 22887200\n",
+ "2014-01-06 4.01 4.18 3.99 4.13 4.13 42398300\n",
+ "2014-01-07 4.19 4.25 4.11 4.18 4.18 42932100\n",
+ "2014-01-08 4.23 4.26 4.14 4.18 4.18 30678700"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset.shape"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 3,
+ "data": {
+ "text/plain": [
+ "(1172, 6)"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X = np.array(dataset['Open']).reshape(1172,-1)\n",
+ "Y = np.array(dataset['Adj Close']).reshape(1172,-1)"
+ ],
+ "outputs": [],
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "lm = LinearRegression()\n",
+ "\n",
+ "lm.fit(X, Y)\n",
+ "# Find the R^2\n",
+ "lm.score(X, Y)"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 5,
+ "data": {
+ "text/plain": [
+ "0.9976829341182026"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Yhat = lm.predict(X)\n",
+ "Yhat[0:4]"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 6,
+ "data": {
+ "text/plain": [
+ "array([[3.85101685],\n",
+ " [3.98104974],\n",
+ " [4.01105733],\n",
+ " [4.19110287]])"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.metrics import mean_squared_error\n",
+ "\n",
+ "# mean_squared_error(Y_true, Y_predict)\n",
+ "mean_squared_error(dataset['Adj Close'],Yhat)"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 7,
+ "data": {
+ "text/plain": [
+ "0.055301235619033565"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "R-squared (R^2)\n",
+ "\n",
+ "The coefficient of Determination or R-squared. \n",
+ "Measurment to defined how close the data fit the regression line. \n",
+ "R-squared is the percentage of variation of the traget variable (Y) that is explained by the linear model. \n",
+ "Comparing a regression model to a simple model is the mean of the data points. \n",
+ "\n",
+ "R^2 = (1 - (MSE of regression line/MSE of the average of the data))\n",
+ "\n",
+ "Coefficient of Determination (R^2) \n",
+ "The blue line represents the regression line.\n",
+ "\n",
+ "R-squared = Explained variation / Total variation\n",
+ "\n",
+ "R-squared is always between 0 and 100%:\n",
+ "\n",
+ "MSE is close to zero is good fit because the numerator is small and error is small. "
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "lm.score(X,Y) # R^2 (coefficient of determination) regression "
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 8,
+ "data": {
+ "text/plain": [
+ "0.9976829341182026"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = dataset.drop(['Open','High','Low','Close','Volume'], axis=1)"
+ ],
+ "outputs": [],
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 10,
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Adj Close | \n",
+ "
\n",
+ " \n",
+ " Date | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2014-01-02 | \n",
+ " 3.95 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-03 | \n",
+ " 4.00 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-06 | \n",
+ " 4.13 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-07 | \n",
+ " 4.18 | \n",
+ "
\n",
+ " \n",
+ " 2014-01-08 | \n",
+ " 4.18 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Adj Close\n",
+ "Date \n",
+ "2014-01-02 3.95\n",
+ "2014-01-03 4.00\n",
+ "2014-01-06 4.13\n",
+ "2014-01-07 4.18\n",
+ "2014-01-08 4.18"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "plt.plot(df)"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 11,
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "image/png": [
+ "\n"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Out-of-Sample Data \n",
+ "\n",
+ "Create new data (forecast)"
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "from fbprophet import Prophet"
+ ],
+ "outputs": [],
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data = dataset.reset_index()\n",
+ "data = data.drop(['Open','High','Low','Close','Volume'], axis=1)\n",
+ "new_df = data.rename(columns={'Date':'ds', 'Adj Close':'y'})\n",
+ "new_df.head()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 22,
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2014-01-02 | \n",
+ " 3.95 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2014-01-03 | \n",
+ " 4.00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2014-01-06 | \n",
+ " 4.13 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014-01-07 | \n",
+ " 4.18 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2014-01-08 | \n",
+ " 4.18 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds y\n",
+ "0 2014-01-02 3.95\n",
+ "1 2014-01-03 4.00\n",
+ "2 2014-01-06 4.13\n",
+ "3 2014-01-07 4.18\n",
+ "4 2014-01-08 4.18"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Python\n",
+ "m = Prophet(daily_seasonality=True)\n",
+ "m.fit(new_df)"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 26,
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 26,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Out_of_Sample = m.make_future_dataframe(periods=365)\n",
+ "Out_of_Sample.head()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 27,
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2014-01-02 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2014-01-03 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2014-01-06 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014-01-07 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2014-01-08 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds\n",
+ "0 2014-01-02\n",
+ "1 2014-01-03\n",
+ "2 2014-01-06\n",
+ "3 2014-01-07\n",
+ "4 2014-01-08"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 27,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Out_of_Sample.tail()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 28,
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1532 | \n",
+ " 2019-08-23 | \n",
+ "
\n",
+ " \n",
+ " 1533 | \n",
+ " 2019-08-24 | \n",
+ "
\n",
+ " \n",
+ " 1534 | \n",
+ " 2019-08-25 | \n",
+ "
\n",
+ " \n",
+ " 1535 | \n",
+ " 2019-08-26 | \n",
+ "
\n",
+ " \n",
+ " 1536 | \n",
+ " 2019-08-27 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds\n",
+ "1532 2019-08-23\n",
+ "1533 2019-08-24\n",
+ "1534 2019-08-25\n",
+ "1535 2019-08-26\n",
+ "1536 2019-08-27"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 28,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "forecast = m.predict(Out_of_Sample)\n",
+ "forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()"
+ ],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "execution_count": 30,
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ds | \n",
+ " yhat | \n",
+ " yhat_lower | \n",
+ " yhat_upper | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1532 | \n",
+ " 2019-08-23 | \n",
+ " 22.208154 | \n",
+ " 16.870245 | \n",
+ " 28.318758 | \n",
+ "
\n",
+ " \n",
+ " 1533 | \n",
+ " 2019-08-24 | \n",
+ " 22.089846 | \n",
+ " 16.581817 | \n",
+ " 28.007437 | \n",
+ "
\n",
+ " \n",
+ " 1534 | \n",
+ " 2019-08-25 | \n",
+ " 22.078294 | \n",
+ " 16.562700 | \n",
+ " 28.063231 | \n",
+ "
\n",
+ " \n",
+ " 1535 | \n",
+ " 2019-08-26 | \n",
+ " 22.232008 | \n",
+ " 16.780503 | \n",
+ " 28.273691 | \n",
+ "
\n",
+ " \n",
+ " 1536 | \n",
+ " 2019-08-27 | \n",
+ " 22.153669 | \n",
+ " 16.880195 | \n",
+ " 28.122720 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ds yhat yhat_lower yhat_upper\n",
+ "1532 2019-08-23 22.208154 16.870245 28.318758\n",
+ "1533 2019-08-24 22.089846 16.581817 28.007437\n",
+ "1534 2019-08-25 22.078294 16.562700 28.063231\n",
+ "1535 2019-08-26 22.232008 16.780503 28.273691\n",
+ "1536 2019-08-27 22.153669 16.880195 28.122720"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "execution_count": 30,
+ "metadata": {
+ "collapsed": false,
+ "outputHidden": false,
+ "inputHidden": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "fig1 = m.plot(forecast)"
+ ],
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "