Source code for etna.transforms.math.lags

from typing import List
from typing import Optional
from typing import Union

import pandas as pd

from etna.transforms.base import FutureMixin
from etna.transforms.base import Transform


[docs]class LagTransform(Transform, FutureMixin): """Generates series of lags from given dataframe.""" def __init__(self, in_column: str, lags: Union[List[int], int], out_column: Optional[str] = None): """Create instance of LagTransform. Parameters ---------- in_column: name of processed column lags: int value or list of values for lags computation; if int, generate range of lags from 1 to given value out_column: base for the name of created columns; * if set the final name is '{out_column}_{lag_number}'; * if don't set, name will be ``transform.__repr__()``, repr will be made for transform that creates exactly this column Raises ------ ValueError: if lags value contains non-positive values """ if isinstance(lags, int): if lags < 1: raise ValueError(f"{type(self).__name__} works only with positive lags values, {lags} given") self.lags = list(range(1, lags + 1)) else: if any(lag_value < 1 for lag_value in lags): raise ValueError(f"{type(self).__name__} works only with positive lags values") self.lags = lags self.in_column = in_column self.out_column = out_column def _get_column_name(self, lag: int) -> str: if self.out_column is None: temp_transform = LagTransform(in_column=self.in_column, out_column=self.out_column, lags=[lag]) return repr(temp_transform) else: return f"{self.out_column}_{lag}"
[docs] def fit(self, df: pd.DataFrame) -> "LagTransform": """Fit method does nothing and is kept for compatibility. Parameters ---------- df: dataframe with data. Returns ------- result: LagTransform """ return self
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame: """Add lags to the dataset. Parameters ---------- df: dataframe with data to transform. Returns ------- result: pd.Dataframe transformed dataframe """ result = df.copy() segments = sorted(set(df.columns.get_level_values("segment"))) all_transformed_features = [] features = df.loc[:, pd.IndexSlice[:, self.in_column]] for lag in self.lags: column_name = self._get_column_name(lag) transformed_features = features.shift(lag) transformed_features.columns = pd.MultiIndex.from_product([segments, [column_name]]) all_transformed_features.append(transformed_features) result = pd.concat([result] + all_transformed_features, axis=1) result = result.sort_index(axis=1) return result