Content - 7bc84714c0cb8f9f8ce606dbd7f6528e9f761546 - a1a7a8b/TS/TimeSeriesModel.py

TimeSeriesModel.py
# Copyright (C) 2016 Antoine Carme <Antoine.Carme@Laposte.net>
# All rights reserved.

# This file is part of the Python Automatic Forecasting (PyAF) library and is made available under
# the terms of the 3 Clause BSD license

import pandas as pd
import numpy as np

from . import PredictionIntervals as predint

from . import Plots as tsplot
from . import Perf as tsperf
from . import Utils as tsutil

class cTimeSeriesModel:
    
    def __init__(self, transf, trend, cycle, autoreg):
        self.mTransformation = transf;
        self.mTrend = trend;
        self.mCycle = cycle;
        self.mAR = autoreg;
        self.mFitPerformances = {}
        self.mForecastPerformances = {}
        self.mTestPerformances = {}
        self.mOutName = self.mAR.mOutName;
        self.mOriginalSignal = self.mTransformation.mOriginalSignal;
        self.mTimeInfo = self.mTrend.mTimeInfo;
        self.mTime = self.mTimeInfo.mTime;
        self.mSignal = self.mTimeInfo.mSignal;
        self.mTrainingVersionInfo = self.getVersions();

    def signal_info(self):
        lSignal = self.mTrend.mSignalFrame[self.mOriginalSignal];
        lStr1 = "SignalVariable='" + self.mOriginalSignal +"'";
        lStr1 += " Min=" + str(np.min(lSignal)) + " Max="  + str(np.max(lSignal)) + " ";
        lStr1 += " Mean=" + str(np.mean(lSignal)) + " StdDev="  + str(np.std(lSignal));
        lSignal = self.mTrend.mSignalFrame[self.mSignal];
        lStr2 = "TransformedSignalVariable='" + self.mSignal +"'";
        lStr2 += " Min=" + str(np.min(lSignal)) + " Max="  + str(np.max(lSignal)) + " ";
        lStr2 += " Mean=" + str(np.mean(lSignal)) + " StdDev="  + str(np.std(lSignal));
        return (lStr1 , lStr2);

    def get_model_category(self):
        lModelCategory = (self.mTransformation.__class__.__name__,
                          self.mTrend.__class__.__name__,
                          self.mCycle.__class__.__name__,
                          self.mAR.__class__.__name__)
        lModelCategory = self.mTransformation.mFormula + "_" + self.mTrend.mFormula + "_" + self.mCycle.mFormula + "_" + self.mAR.mFormula
        return str(lModelCategory)
        
    def getComplexity(self):
        lComplexity = 32 * self.mTransformation.mComplexity +  16 * self.mTrend.mComplexity + 4 * self.mCycle.mComplexity + 1 * self.mAR.mComplexity;
        return lComplexity;     

    def updatePerfs(self, compute_all_indicators = False):
        self.mModelFrame = pd.DataFrame();
        lSignal = self.mTrend.mSignalFrame[self.mSignal]
        N = lSignal.shape[0];
        self.mTrend.mTimeInfo.addVars(self.mModelFrame);
        df = self.forecastOneStepAhead(self.mModelFrame , perf_mode = True);
        self.mModelFrame = df.head(N);
        # print(self.mModelFrame.columns);
        lPrefix = self.mSignal + "_";
        lForecastColumnName = str(self.mOriginalSignal) + "_Forecast";
        lFitPerf = tsperf.cPerf();
        lForecastPerf = tsperf.cPerf();
        lTestPerf = tsperf.cPerf();
        # self.mModelFrame.to_csv(self.mOutName + "_model_perf.csv");
        (lFrameFit, lFrameForecast, lFrameTest) = self.mTrend.mSplit.cutFrame(self.mModelFrame);


        if(compute_all_indicators):
            lFitPerf.compute(lFrameFit[self.mOriginalSignal] , lFrameFit[lForecastColumnName] ,
                             self.mOutName + '_Fit')
            lForecastPerf.compute(lFrameForecast[self.mOriginalSignal] ,
                                  lFrameForecast[lForecastColumnName],
                                  self.mOutName + '_Forecast')
            if(lFrameTest.shape[0] > 0):
                lTestPerf.compute(lFrameTest[self.mOriginalSignal] , lFrameTest[lForecastColumnName],
                                  self.mOutName + '_Test')            
            pass
        else:
            lFitPerf.computeCriterion(lFrameFit[self.mOriginalSignal] , lFrameFit[lForecastColumnName] ,
                                      self.mTimeInfo.mOptions.mModelSelection_Criterion,
                                      self.mOutName + '_Fit')
            lForecastPerf.computeCriterion(lFrameForecast[self.mOriginalSignal] , lFrameForecast[lForecastColumnName],
                                           self.mTimeInfo.mOptions.mModelSelection_Criterion,
                                           self.mOutName + '_Forecast')
            if(lFrameTest.shape[0] > 0):
                lTestPerf.computeCriterion(lFrameTest[self.mOriginalSignal] , lFrameTest[lForecastColumnName],
                                           self.mTimeInfo.mOptions.mModelSelection_Criterion,
                                           self.mOutName + '_Test')
            
        self.mFitPerf = lFitPerf
        self.mForecastPerf = lForecastPerf;
        self.mTestPerf = lTestPerf;
        # print("PERF_COMPUTATION" , self.mOutName, self.mFitPerf.mMAPE);
        
    def computePredictionIntervals(self):
        # prediction intervals
        if(self.mTimeInfo.mOptions.mAddPredictionIntervals):
            self.mPredictionIntervalsEstimator = predint.cPredictionIntervalsEstimator();
            self.mPredictionIntervalsEstimator.mModel = self;        
            self.mPredictionIntervalsEstimator.computePerformances();
        pass

    def getFormula(self):
        lFormula = self.mTrend.mFormula + " + ";
        lFormula += self.mCycle.mFormula + " + ";
        lFormula += self.mAR.mFormula;
        return lFormula;


    def getInfo(self):
        logger = tsutil.get_pyaf_logger();
        logger.info("TIME_DETAIL " + self.mTrend.mTimeInfo.info());
        sig_info = self.signal_info()
        logger.info("SIGNAL_DETAIL_ORIG " + sig_info[0]);
        logger.info("SIGNAL_DETAIL_TRANSFORMED " + sig_info[1]);
        logger.info("BEST_TRANSOFORMATION_TYPE '" + self.mTransformation.get_name("") + "'");
        logger.info("BEST_DECOMPOSITION  '" + self.mOutName + "' [" + self.getFormula() + "]");
        logger.info("TREND_DETAIL '" + self.mTrend.mOutName + "' [" + self.mTrend.mFormula + "]");
        logger.info("CYCLE_DETAIL '"+ self.mCycle.mOutName + "' [" + self.mCycle.mFormula + "]");
        logger.info("AUTOREG_DETAIL '" + self.mAR.mOutName + "' [" + self.mAR.mFormula + "]");
        logger.info("MODEL_MAPE MAPE_Fit=" + str(self.mFitPerf.mMAPE) + " MAPE_Forecast=" + str(self.mForecastPerf.mMAPE)  + " MAPE_Test=" + str(self.mTestPerf.mMAPE) );
        logger.info("MODEL_SMAPE SMAPE_Fit=" + str(self.mFitPerf.mSMAPE) + " SMAPE_Forecast=" + str(self.mForecastPerf.mSMAPE)  + " SMAPE_Test=" + str(self.mTestPerf.mSMAPE) );
        logger.info("MODEL_MASE MASE_Fit=" + str(self.mFitPerf.mMASE) + " MASE_Forecast=" + str(self.mForecastPerf.mMASE)  + " MASE_Test=" + str(self.mTestPerf.mMASE) );
        logger.info("MODEL_L1 L1_Fit=" + str(self.mFitPerf.mL1) + " L1_Forecast=" + str(self.mForecastPerf.mL1)  + " L1_Test=" + str(self.mTestPerf.mL1) );
        logger.info("MODEL_L2 L2_Fit=" + str(self.mFitPerf.mL2) + " L2_Forecast=" + str(self.mForecastPerf.mL2)  + " L2_Test=" + str(self.mTestPerf.mL2) );
        logger.info("MODEL_COMPLEXITY " + str(self.getComplexity()) );
        logger.info("AR_MODEL_DETAIL_START");
        self.mAR.dumpCoefficients();
        logger.info("AR_MODEL_DETAIL_END");



    def forecastOneStepAhead(self , df , perf_mode = False):
        assert(self.mTime in df.columns)
        assert(self.mOriginalSignal in df.columns)
        lPrefix = self.mSignal + "_";
        df1 = df;
        # df1.to_csv("before.csv");
        # add new line with new time value, row_number and nromalized time
        # add signal tranformed column
        df1 = self.mTransformation.transformDataset(df1, self.mOriginalSignal);
        # df1.to_csv("after_transformation.csv");
        #print("Transformation update : " , df1.columns);

        df1 = self.mTimeInfo.transformDataset(df1);
        # df1.to_csv("after_time.csv");
        # print("TimeInfo update : " , df1.columns);
        # compute the trend based on the transformed column and compute trend residue
        df1 = self.mTrend.transformDataset(df1);
        #print("Trend update : " , df1.columns);
        # df1.to_csv("after_trend.csv");
        # compute the cycle and its residue based on the trend residue
        df1 = self.mCycle.transformDataset(df1);
        # df1.to_csv("after_cycle.csv");
        #print("Cycle update : " , df1.columns);
        # compute the AR componnet and its residue based on the cycle residue
        df1 = self.mAR.transformDataset(df1);
        # df1.to_csv("after_ar.csv");
        #print("AR update : " , df1.columns);
        # compute the forecast and its residue (forecast = trend  + cycle + AR)
        df2 = df1;
        lTrendColumn = df2[self.mTrend.mOutName]
        lCycleColumn = df2[self.mCycle.mOutName]
        lARColumn = df2[self.mAR.mOutName]
        lSignal = df2[self.mSignal]
        if(not perf_mode):
            df2[lPrefix + 'Trend'] =  lTrendColumn;
            df2[lPrefix + 'Trend_residue'] =  lSignal - lTrendColumn;
            df2[lPrefix + 'Cycle'] =  lCycleColumn;
            df2[lPrefix + 'Cycle_residue'] = df2[lPrefix + 'Trend_residue'] - lCycleColumn;
            df2[lPrefix + 'AR'] =  lARColumn ;
            df2[lPrefix + 'AR_residue'] = df2[lPrefix + 'Cycle_residue'] - lARColumn;

        lPrefix2 = str(self.mOriginalSignal) + "_";
        # print("TimeSeriesModel_forecast_invert");
        df2[lPrefix + 'TransformedForecast'] =  lTrendColumn + lCycleColumn + lARColumn ;
        df2[lPrefix2 + 'Forecast'] = self.mTransformation.invert(df2[lPrefix + 'TransformedForecast']);

        if(not perf_mode):
            df2[lPrefix + 'TransformedResidue'] =  lSignal - df2[lPrefix + 'TransformedForecast']
            lOriginalSignal = df2[self.mOriginalSignal]
            df2[lPrefix2 + 'Residue'] =  lOriginalSignal - df2[lPrefix2 + 'Forecast']
        df2.reset_index(drop=True, inplace=True);
        return df2;


    def forecast(self , df , iHorizon):
        N0 = df.shape[0];
        df1 = self.forecastOneStepAhead(df)
        lForecastColumnName = str(self.mOriginalSignal) + "_Forecast";
        for h in range(0 , iHorizon - 1):
            # print(df1.info());
            N = df1.shape[0];
            # replace the signal with the forecast in the last line  of the dataset
            lPos = df1.index[N - 1];
            lSignal = df1.loc[lPos , lForecastColumnName];
            df1.loc[lPos , self.mOriginalSignal] = lSignal;
            df1 = df1[[self.mTime , self.mOriginalSignal]];
            df1 = self.forecastOneStepAhead(df1)

        assert((N0 + iHorizon) == df1.shape[0])
        N1 = df1.shape[0];
        lPrefix = self.mSignal + "_";
        for h in range(0 , iHorizon):
            df1.loc[N1 - 1 - h, self.mSignal] = np.nan;
            df1.loc[N1 - 1 - h, self.mOriginalSignal] = np.nan;
            df1.loc[N1 - 1 - h, self.mTrend.mOutName + '_residue'] =  np.nan;
            df1.loc[N1 - 1 - h, self.mCycle.mOutName + '_residue'] =  np.nan;
            df1.loc[N1 - 1 - h, self.mAR.mOutName + '_residue'] =  np.nan;
            df1.loc[N1 - 1 - h, lPrefix + 'Trend_residue'] =  np.nan;
            df1.loc[N1 - 1 - h, lPrefix + 'Cycle_residue'] = np.nan;
            df1.loc[N1 - 1 - h, lPrefix + 'AR_residue'] = np.nan;
            df1.loc[N1 - 1 - h, str(self.mOriginalSignal) + '_Residue'] = np.nan;
            df1.loc[N1 - 1 - h, lPrefix + 'TransformedResidue'] = np.nan;
            pass
        # print(df.head())
        # print(df1.head())
        if(self.mTimeInfo.mOptions.mAddPredictionIntervals):
            df1 = self.addPredictionIntervals(df, df1, iHorizon);
        if(self.mTimeInfo.mOptions.mForecastRectifier is not None):
            df1 = self.applyForecastRectifier(df1)
        return df1

    def applyForecastRectifier(self, df):
        df1 = df;
        if(self.mTimeInfo.mOptions.mForecastRectifier == "relu"):
            lForecastColumnName = str(self.mOriginalSignal) + "_Forecast";
            df1[lForecastColumnName] = df1[lForecastColumnName].apply(lambda x : max(x, 0))
        return df1

    def addPredictionIntervals(self, iInputDS, iForecastFrame, iHorizon):
        lSignalColumn = self.mOriginalSignal;

        N = iInputDS.shape[0];
        lForecastColumn = str(lSignalColumn) + "_Forecast";
        lLowerBoundName = lForecastColumn + '_Lower_Bound'
        lUpperBoundName = lForecastColumn + '_Upper_Bound';
        iForecastFrame[lLowerBoundName] = np.nan; 
        iForecastFrame[lUpperBoundName] = np.nan; 

        lConfidence = 1.96 ; # 0.95
        # the prediction intervals are only computed for the training horizon
        lHorizon = min(iHorizon , self.mTimeInfo.mHorizon);
        for h in range(0 , lHorizon):
            lHorizonName = lForecastColumn + "_" + str(h + 1);
            lWidth = lConfidence * self.mPredictionIntervalsEstimator.mForecastPerformances[lHorizonName].mL2;
            iForecastFrame.loc[N + h , lLowerBoundName] = iForecastFrame.loc[N + h , lForecastColumn] - lWidth;
            iForecastFrame.loc[N + h , lUpperBoundName] = iForecastFrame.loc[N + h , lForecastColumn] + lWidth;
            
        return iForecastFrame;


    def plotForecasts(self, df):
        lPrefix = self.mSignal + "_";
        lTime = self.mTimeInfo.mNormalizedTimeColumn;            
        tsplot.decomp_plot(df,
                           lTime, lPrefix + 'Signal',
                           lPrefix + 'Forecast' , lPrefix + 'Residue');

    def to_json(self):
        dict1 = {};
        d1 = { "Time" : self.mTimeInfo.to_json(),
               "Signal" : self.mOriginalSignal,
               "Training_Signal_Length" : self.mModelFrame.shape[0]};
        dict1["Dataset"] = d1;
        lTransformation = self.mTransformation.mFormula;
        d2 = { "Best_Decomposition" : self.mOutName,
               "Signal_Transoformation" : lTransformation,
               "Trend" : self.mTrend.mFormula,
               "Cycle" : self.mCycle.mFormula,
               "AR_Model" : self.mAR.mFormula,
               };
        dict1["Model"] = d2;
        d3 = {"MAPE" : str(self.mForecastPerf.mMAPE),
              "MASE" : str(self.mForecastPerf.mMASE),
              "MAE" : str(self.mForecastPerf.mL1),
              "RMSE" : str(self.mForecastPerf.mL2),
              "COMPLEXITY" : str(self.getComplexity())};
        dict1["Model_Performance"] = d3;
        return dict1;


    def getForecastDatasetForPlots(self):
        lInput = self.mTrend.mSignalFrame;
        lOutput = self.forecast(lInput ,  self.mTimeInfo.mHorizon);
        return lOutput

    def plotResidues(self, name = None, format = 'png'):
        df = self.getForecastDatasetForPlots();
        lTime = self.mTimeInfo.mTime; # NormalizedTimeColumn;
        lPrefix = self.mSignal + "_";
        lPrefix2 = str(self.mOriginalSignal) + "_";
        if(name is not None):
            tsplot.decomp_plot(df, lTime, self.mSignal, lPrefix + 'Trend' , lPrefix + 'Trend_residue', name = name + "_trend" , format=format);
            tsplot.decomp_plot(df, lTime, lPrefix + 'Trend_residue' , lPrefix + 'Cycle', lPrefix + 'Cycle_residue', name = name + "_cycle" , format=format);
            tsplot.decomp_plot(df, lTime, lPrefix + 'Cycle_residue' , lPrefix + 'AR' , lPrefix + 'AR_residue', name = name + "_AR" , format=format);
            tsplot.decomp_plot(df, lTime, self.mSignal, lPrefix + 'TransformedForecast' , lPrefix + 'TransformedResidue', name = name + "_transformed_forecast" , format=format);
            tsplot.decomp_plot(df, lTime, self.mOriginalSignal, lPrefix2 + 'Forecast' , lPrefix2 + 'Residue', name = name + "_forecast" , format=format);
        else:
            tsplot.decomp_plot(df, lTime, self.mSignal, lPrefix + 'Trend' , lPrefix + 'Trend_residue');
            tsplot.decomp_plot(df, lTime, lPrefix + 'Trend_residue' , lPrefix + 'Cycle', lPrefix + 'Cycle_residue');
            tsplot.decomp_plot(df, lTime, lPrefix + 'Cycle_residue' , lPrefix + 'AR' , lPrefix + 'AR_residue');
            tsplot.decomp_plot(df, lTime, self.mSignal, lPrefix + 'TransformedForecast' , lPrefix + 'TransformedResidue');
            tsplot.decomp_plot(df, lTime, self.mOriginalSignal, lPrefix2 + 'Forecast' , lPrefix2 + 'Residue');
        
    def standardPlots(self, name = None, format = 'png'):
        self.plotResidues(name = name, format=format);
        lInput = self.mTrend.mSignalFrame;
        lOutput = self.forecast(lInput ,  self.mTimeInfo.mHorizon);
        # print(lOutput.columns)
        lPrefix = str(self.mOriginalSignal) + "_";
        lForecastColumn = lPrefix + 'Forecast';
        lTime = self.mTimeInfo.mTime;            
        lOutput.set_index(lTime, inplace=True, drop=False);
        # print(lOutput[lTime].dtype);
        tsplot.prediction_interval_plot(lOutput,
                                        lTime, self.mOriginalSignal,
                                        lForecastColumn  ,
                                        lForecastColumn + '_Lower_Bound',
                                        lForecastColumn + '_Upper_Bound',
                                        name = name,
                                        format= format);
        #lOutput.plot()
        
    def getPlotsAsDict(self):
        lDict = {};
        df = self.getForecastDatasetForPlots();
        lTime = self.mTime;
        lSignalColumn = self.mOriginalSignal;
        lPrefix = self.mSignal + "_";
        lPrefix2 = str(self.mOriginalSignal) + "_";
        lDict["Trend"] = tsplot.decomp_plot_as_png_base64(df, lTime, self.mSignal, lPrefix + 'Trend' , lPrefix + 'Trend_residue', name = "trend");
        lDict["Cycle"] = tsplot.decomp_plot_as_png_base64(df, lTime, lPrefix + 'Trend_residue' , lPrefix + 'Cycle', lPrefix + 'Cycle_residue', name = "cycle");
        lDict["AR"] = tsplot.decomp_plot_as_png_base64(df, lTime, lPrefix + 'Cycle_residue' , lPrefix + 'AR' , lPrefix + 'AR_residue', name = "AR");
        lDict["Forecast"] = tsplot.decomp_plot_as_png_base64(df, lTime, lSignalColumn, lPrefix2 + 'Forecast' , lPrefix2 + 'Residue', name = "forecast");

        lInput = self.mModelFrame[[self.mTime, self.mOriginalSignal]];
        lOutput = self.forecast(lInput ,  self.mTimeInfo.mHorizon);
        # print(lOutput.columns)
        lPrefix = str(self.mOriginalSignal) + "_";
        lForecastColumn = lPrefix + 'Forecast';
        lTime = self.mTimeInfo.mTime;
        lOutput.set_index(lTime, inplace=True, drop=False);
        lDict["Prediction_Intervals"] = tsplot.prediction_interval_plot_as_png_base64(lOutput,
                                                                                      lTime, self.mOriginalSignal,
                                                                                      lForecastColumn  ,
                                                                                      lForecastColumn + '_Lower_Bound',
                                                                                      lForecastColumn + '_Upper_Bound',
                                                                                      name = "prediction_intervals");
        return lDict;


    def getVersions(self):
        
        import os, platform, pyaf
        lVersionDict = {};
        lVersionDict["PyAF_version"] = pyaf.__version__;
        lVersionDict["system_platform"] = platform.platform();
        lVersionDict["system_uname"] = platform.uname();
        lVersionDict["system_processor"] = platform.processor();
        lVersionDict["python_implementation"] = platform.python_implementation();
        lVersionDict["python_version"] = platform.python_version();

        import sklearn
        lVersionDict["sklearn_version"] = sklearn.__version__;

        import pandas as pd
        lVersionDict["pandas_version"] = pd.__version__;
    
        import numpy as np
        lVersionDict["numpy_version"] = np.__version__;
    
        import scipy as sc
        lVersionDict["scipy_version"] = sc.__version__;

        import matplotlib
        lVersionDict["matplotlib_version"] = matplotlib.__version__

        import pydot
        lVersionDict["pydot_version"] = pydot.__version__

        import sqlalchemy
        lVersionDict["sqlalchemy_version"] = sqlalchemy.__version__

        # print([(k, lVersionDict[k]) for k in sorted(lVersionDict)]);
        return lVersionDict;