Source code for etna.models.deadline_ma

import warnings
from enum import Enum
from typing import Dict
from typing import List

import numpy as np
import pandas as pd

from etna.models.base import NonPredictionIntervalContextRequiredAbstractModel
from etna.models.mixins import NonPredictionIntervalContextRequiredModelMixin
from etna.models.mixins import PerSegmentModelMixin


[docs]class SeasonalityMode(Enum): """Enum for seasonality mode for DeadlineMovingAverageModel.""" month = "month" year = "year" @classmethod def _missing_(cls, value): raise NotImplementedError( f"{value} is not a valid {cls.__name__}. Only {', '.join([repr(m.value) for m in cls])} seasonality allowed" )
[docs]class _DeadlineMovingAverageModel: """Moving average model that uses exact previous dates to predict.""" def __init__(self, window: int = 3, seasonality: str = "month"): """ Initialize deadline moving average model. Length of the context is equal to the number of ``window`` months or years, depending on the ``seasonality``. Parameters ---------- window: int Number of values taken for forecast for each point. seasonality: str Only allowed monthly or annual seasonality. """ self.name = "target" self.window = window self.seasonality = SeasonalityMode(seasonality) self.freqs_available = {"H", "D"} self._freq = None
[docs] def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_DeadlineMovingAverageModel": """ Fit DeadlineMovingAverageModel model. Parameters ---------- df: pd.DataFrame Data to fit on regressors: List of the columns with regressors(ignored in this model) Raises ------ ValueError If freq of dataframe is not supported ValueError If series is too short for chosen shift value Returns ------- : Fitted model """ freq = pd.infer_freq(df["timestamp"]) if freq not in self.freqs_available: raise ValueError(f"{freq} is not supported! Use daily or hourly frequency!") if set(df.columns) != {"timestamp", "target"}: warnings.warn( message=f"{type(self).__name__} does not work with any exogenous series or features. " f"It uses only target series for predict/\n " ) self._freq = freq return self
@staticmethod def _get_context_beginning( df: pd.DataFrame, prediction_size: int, seasonality: SeasonalityMode, window: int ) -> pd.Timestamp: """ Get timestamp where context begins. Parameters ---------- df: Time series in a long format. prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context for models that require it. seasonality: Seasonality. window: Number of values taken for forecast of each point. Returns ------- : Timestamp with beginning of the context. Raises ------ ValueError: if context isn't big enough """ df_history = df.iloc[:-prediction_size] history_timestamps = df_history["timestamp"] future_timestamps = df["timestamp"].iloc[-prediction_size:] # if we have len(history_timestamps) == 0, then len(df) <= prediction_size if len(history_timestamps) == 0: raise ValueError( "Given context isn't big enough, try to decrease context_size, prediction_size of increase length of given dataframe!" ) if seasonality is SeasonalityMode.month: first_index = future_timestamps.iloc[0] - pd.DateOffset(months=window) elif seasonality is SeasonalityMode.year: first_index = future_timestamps.iloc[0] - pd.DateOffset(years=window) if first_index < history_timestamps.iloc[0]: raise ValueError( "Given context isn't big enough, try to decrease context_size, prediction_size of increase length of given dataframe!" ) return first_index def _make_predictions(self, result_template: pd.Series, context: pd.Series, prediction_size: int) -> np.ndarray: """Make predictions using ``result_template`` as a base and ``context`` as a context.""" index = result_template.index start_idx = len(result_template) - prediction_size end_idx = len(result_template) for i in range(start_idx, end_idx): for w in range(1, self.window + 1): if self.seasonality == SeasonalityMode.month: prev_date = result_template.index[i] - pd.DateOffset(months=w) elif self.seasonality == SeasonalityMode.year: prev_date = result_template.index[i] - pd.DateOffset(years=w) result_template.loc[index[i]] += context.loc[prev_date] result_template.loc[index[i]] = result_template.loc[index[i]] / self.window result_values = result_template.values[-prediction_size:] return result_values
[docs] def forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray: """Compute autoregressive forecasts. Parameters ---------- df: Features dataframe. prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context for models that require it. Returns ------- : Array with predictions. Raises ------ ValueError: if context isn't big enough ValueError: if forecast context contains NaNs """ context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window ) df = df.set_index("timestamp") df_history = df.iloc[:-prediction_size] history = df_history["target"] history = history[history.index >= context_beginning] if np.any(history.isnull()): raise ValueError("There are NaNs in a forecast context, forecast method requires context to be filled!") index = pd.date_range(start=context_beginning, end=df.index[-1], freq=self._freq) result_template = np.append(history.values, np.zeros(prediction_size)) result_template = pd.Series(result_template, index=index) result_values = self._make_predictions( result_template=result_template, context=result_template, prediction_size=prediction_size ) return result_values
[docs] def predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray: """Compute predictions using true target data as context. Parameters ---------- df: Features dataframe. prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context for models that require it. Returns ------- : Array with predictions. Raises ------ ValueError: if context isn't big enough ValueError: if there are NaNs in a target column on timestamps that are required to make predictions """ context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window ) df = df.set_index("timestamp") context = df["target"] context = context[context.index >= context_beginning] if np.any(np.isnan(context)): raise ValueError("There are NaNs in a target column, predict method requires target to be filled!") index = pd.date_range(start=df.index[-prediction_size], end=df.index[-1], freq=self._freq) result_template = pd.Series(np.zeros(prediction_size), index=index) result_values = self._make_predictions( result_template=result_template, context=context, prediction_size=prediction_size ) return result_values
@property def context_size(self) -> int: """Upper bound to context size of the model.""" cur_value = None if self.seasonality is SeasonalityMode.year: cur_value = 366 elif self.seasonality is SeasonalityMode.month: cur_value = 31 if self._freq is None: raise ValueError("Model is not fitted! Fit the model before trying the find out context size!") if self._freq == "H": cur_value *= 24 cur_value *= self.window return cur_value
[docs]class DeadlineMovingAverageModel( PerSegmentModelMixin, NonPredictionIntervalContextRequiredModelMixin, NonPredictionIntervalContextRequiredAbstractModel, ): """Moving average model that uses exact previous dates to predict.""" def __init__(self, window: int = 3, seasonality: str = "month"): """ Initialize deadline moving average model. Length of the context is equal to the number of ``window`` months or years, depending on the ``seasonality``. Parameters ---------- window: int Number of values taken for forecast for each point. seasonality: str Only allowed monthly or annual seasonality. """ self.window = window self.seasonality = seasonality super(DeadlineMovingAverageModel, self).__init__( base_model=_DeadlineMovingAverageModel(window=window, seasonality=seasonality) ) @property def context_size(self) -> int: """Upper bound to context size of the model.""" models = self.get_model() model = next(iter(models.values())) return model.context_size
[docs] def get_model(self) -> Dict[str, "DeadlineMovingAverageModel"]: """Get internal model. Returns ------- : Internal model """ return self._get_model()
__all__ = ["DeadlineMovingAverageModel"]