Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,553 changes: 1,553 additions & 0 deletions docs/examples/tutorial-multiseries.ipynb

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions timecopilot/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,7 @@ def _call_models(
# to cross validation
# (the initial model)
res_df_model = res_df_model.drop(columns=["y"])
res_df = res_df.merge(
res_df_model,
on=merge_on,
)
res_df = res_df.merge(res_df_model, on=merge_on, how="left")
return res_df

def forecast(
Expand Down
109 changes: 107 additions & 2 deletions timecopilot/models/utils/gluonts_forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,90 @@

from .forecaster import Forecaster, QuantileConverter

_COARSE_FREQ_PREFIXES = ("B", "D", "W", "M", "Q", "A", "Y")


def _maybe_align_for_gluonts(
df: pd.DataFrame,
freq: str,
*,
id_col: str = "unique_id",
ts_col: str = "ds",
coarse_only: bool = True,
verbose: bool = True,
):
"""
GluonTS requires a regular grid. If timestamps are consistently "end-of-bin"
(e.g., 23:59:59 for daily close), GluonTS will anchor to the bin boundary
(midnight) and your output ds can look "shifted".

This function:
- builds an internal copy aligned to the freq grid (floor to bin boundary)
- stores the most common within-bin offset per series (anchor)
- returns (df_gluonts, anchor_by_id) so you can restore the offset in outputs

Returns:
df_gluonts: df aligned to grid (or original df if no action)
anchor: Series[timedelta] indexed by unique_id, or None if not applied
"""
f = str(freq)

if coarse_only and not f.startswith(_COARSE_FREQ_PREFIXES):
return df, None

offset = pd.tseries.frequencies.to_offset(f)

g = df.sort_values([id_col, ts_col]).copy(deep=False)
ds = pd.to_datetime(g[ts_col])

# grid boundary for each timestamp, given the offset
base = ds.dt.floor(offset)
within = ds - base # timedelta inside the bin (time-of-day for daily)

# use the anchor provided function
anchor = within.groupby(g[id_col]).apply(_compute_anchor_with_guard).dropna()

if anchor.empty:
return df, None

# apply only if any series is non-zero offset
if not (anchor != pd.Timedelta(0)).any():
return df, None

if verbose:
print(
f"[gluonts-align] Applied internal time alignment for freq='{f}': "
f"removed dominant within-bin offset(s) "
f"{', '.join(str(o) for o in anchor.value_counts().index)} "
f"from {anchor.shape[0]}/{g[id_col].nunique()} series; "
f"input data unchanged, offsets restored in forecasts."
)

df_gluonts = df.copy(deep=False)
df_gluonts[ts_col] = pd.to_datetime(df_gluonts[ts_col]).dt.floor(offset)

return df_gluonts, anchor


def _compute_anchor_with_guard(
within: pd.Series, min_frac: float = 0.8, min_count: int = 10
):
vc = within.value_counts()
top_offset = vc.index[0]
top_count = vc.iloc[0]
total = vc.sum()

if top_offset == pd.Timedelta(0):
return None

if top_count < min_count:
return None

if top_count / total < min_frac:
return None

return top_offset


def fix_freq(freq: str) -> str:
# see https://github.com/awslabs/gluonts/pull/2462/files
Expand Down Expand Up @@ -166,28 +250,49 @@ def forecast(
df = maybe_convert_col_to_float32(df, "y")
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)

df_gluonts, anchor = _maybe_align_for_gluonts(
df,
fix_freq(freq),
coarse_only=True, # set False if you want this
# to handle hourly/minutely end-stamps too
verbose=True,
)

gluonts_dataset = PandasDataset.from_long_dataframe(
df.copy(deep=False),
target="y",
item_id="unique_id",
timestamp="ds",
freq=fix_freq(freq),
)

with self.get_predictor(prediction_length=h) as predictor:
fcsts = predictor.predict(
gluonts_dataset,
num_samples=self.num_samples,
)

fcsts_list = list(fcsts) # materialize iterator once

fcst_df = self.gluonts_fcsts_to_df(
fcsts,
freq=freq,
fcsts_list,
freq=fix_freq(freq),
model_name=self.alias,
quantiles=qc.quantiles,
)

if anchor is not None:
fcst_df["ds"] = pd.to_datetime(fcst_df["ds"]) + fcst_df["unique_id"].map(
anchor
)

if qc.quantiles is not None:
fcst_df = qc.maybe_convert_quantiles_to_level(
fcst_df,
models=[self.alias],
)
print("ayo")
print(fcst_df)

return fcst_df
Loading