Source code for etna.transforms.decomposition.change_points_segmentation
from typing import List
from typing import Optional
import pandas as pd
from etna.transforms.base import FutureMixin
from etna.transforms.base import PerSegmentWrapper
from etna.transforms.base import Transform
from etna.transforms.decomposition.base_change_points import BaseChangePointsModelAdapter
from etna.transforms.decomposition.base_change_points import TTimestampInterval
[docs]class _OneSegmentChangePointsSegmentationTransform(Transform):
"""_OneSegmentChangePointsSegmentationTransform make label encoder to change points."""
def __init__(self, in_column: str, out_column: str, change_point_model: BaseChangePointsModelAdapter):
"""Init _OneSegmentChangePointsSegmentationTransform.
Parameters
----------
in_column:
name of column to apply transform to
out_column:
result column name. If not given use ``self.__repr__()``
change_point_model:
model to get change points
"""
self.in_column = in_column
self.out_column = out_column
self.intervals: Optional[List[TTimestampInterval]] = None
self.change_point_model = change_point_model
def _fill_per_interval(self, series: pd.Series) -> pd.Series:
"""Fill values in resulting series."""
if self.intervals is None:
raise ValueError("Transform is not fitted! Fit the Transform before calling transform method.")
result_series = pd.Series(index=series.index)
for k, interval in enumerate(self.intervals):
tmp_series = series[interval[0] : interval[1]]
if tmp_series.empty:
continue
result_series[tmp_series.index] = k
return result_series.astype(int).astype("category")
[docs] def fit(self, df: pd.DataFrame) -> "_OneSegmentChangePointsSegmentationTransform":
"""Fit _OneSegmentChangePointsSegmentationTransform: find change points in ``df`` and build intervals.
Parameters
----------
df:
one segment dataframe indexed with timestamp
Returns
-------
:
instance with trained change points
Raises
------
ValueError
If series contains NaNs in the middle
"""
self.intervals = self.change_point_model.get_change_points_intervals(df=df, in_column=self.in_column)
return self
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Split df to intervals.
Parameters
----------
df:
one segment dataframe
Returns
-------
df:
df with new column
"""
series = df[self.in_column]
result_series = self._fill_per_interval(series=series)
df.loc[:, self.out_column] = result_series
return df
[docs]class ChangePointsSegmentationTransform(PerSegmentWrapper, FutureMixin):
"""ChangePointsSegmentationTransform make label encoder to change points.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
"""
def __init__(
self,
in_column: str,
change_point_model: BaseChangePointsModelAdapter,
out_column: Optional[str] = None,
):
"""Init ChangePointsSegmentationTransform.
Parameterss
----------
in_column:
name of column to fit change point model
out_column:
result column name. If not given use ``self.__repr__()``
change_point_model:
model to get change points
"""
self.in_column = in_column
self.out_column = out_column
self.change_point_model = change_point_model
if self.out_column is None:
self.out_column = repr(self)
super().__init__(
transform=_OneSegmentChangePointsSegmentationTransform(
in_column=self.in_column,
out_column=self.out_column,
change_point_model=self.change_point_model,
)
)