Source code for etna.transforms.timestamp.fourier
import math
from typing import Optional
from typing import Sequence
import numpy as np
import pandas as pd
from etna.transforms.base import FutureMixin
from etna.transforms.base import Transform
[docs]class FourierTransform(Transform, FutureMixin):
"""Adds fourier features to the dataset.
Notes
-----
To understand how transform works we recommend:
`Fourier series <https://otexts.com/fpp2/useful-predictors.html#fourier-series>`_.
* Parameter ``period`` is responsible for the seasonality we want to capture.
* Parameters ``order`` and ``mods`` define which harmonics will be used.
Parameter ``order`` is a more user-friendly version of ``mods``.
For example, ``order=2`` can be represented as ``mods=[1, 2, 3, 4]`` if ``period`` > 4 and
as ``mods=[1, 2, 3]`` if 3 <= ``period`` <= 4.
"""
def __init__(
self,
period: float,
order: Optional[int] = None,
mods: Optional[Sequence[int]] = None,
out_column: Optional[str] = None,
):
"""Create instance of FourierTransform.
Parameters
----------
period:
the period of the seasonality to capture in frequency units of time series;
``period`` should be >= 2
order:
upper order of Fourier components to include;
``order`` should be >= 1 and <= ceil(period/2))
mods:
alternative and precise way of defining which harmonics will be used,
for example ``mods=[1, 3, 4]`` means that sin of the first order
and sin and cos of the second order will be used;
``mods`` should be >= 1 and < period
out_column:
* if set, name of added column, the final name will be '{out_columnt}_{mod}';
* if don't set, name will be ``transform.__repr__()``,
repr will be made for transform that creates exactly this column
Raises
------
ValueError:
if period < 2
ValueError:
if both or none of order, mods is set
ValueError:
if order is < 1 or > ceil(period/2)
ValueError:
if at least one mod is < 1 or >= period
"""
if period < 2:
raise ValueError("Period should be at least 2")
self.period = period
self.mods: Sequence[int]
if order is not None and mods is None:
if order < 1 or order > math.ceil(period / 2):
raise ValueError("Order should be within [1, ceil(period/2)] range")
self.mods = [mod for mod in range(1, 2 * order + 1) if mod < period]
elif mods is not None and order is None:
if min(mods) < 1 or max(mods) >= period:
raise ValueError("Every mod should be within [1, int(period)) range")
self.mods = mods
else:
raise ValueError("There should be exactly one option set: order or mods")
self.order = None
self.out_column = out_column
[docs] def fit(self, df: pd.DataFrame) -> "FourierTransform":
"""Fit method does nothing and is kept for compatibility.
Parameters
----------
df:
dataframe with data.
Returns
-------
result: FourierTransform
"""
return self
def _get_column_name(self, mod: int) -> str:
if self.out_column is None:
return f"{FourierTransform(period=self.period, mods=[mod]).__repr__()}"
else:
return f"{self.out_column}_{mod}"
@staticmethod
def _construct_answer(df: pd.DataFrame, features: pd.DataFrame) -> pd.DataFrame:
dataframes = []
for seg in df.columns.get_level_values("segment").unique():
tmp = df[seg].join(features)
_idx = tmp.columns.to_frame()
_idx.insert(0, "segment", seg)
tmp.columns = pd.MultiIndex.from_frame(_idx)
dataframes.append(tmp)
result = pd.concat(dataframes, axis=1).sort_index(axis=1)
result.columns.names = ["segment", "feature"]
return result
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add harmonics to the dataset.
Parameters
----------
df:
dataframe with data to transform.
Returns
-------
result: pd.Dataframe
transformed dataframe
"""
features = pd.DataFrame(index=df.index)
elapsed = np.arange(features.shape[0]) / self.period
for mod in self.mods:
order = (mod + 1) // 2
is_cos = mod % 2 == 0
features[self._get_column_name(mod)] = np.sin(2 * np.pi * order * elapsed + np.pi / 2 * is_cos)
return self._construct_answer(df, features)