import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
[docs]
def annual_mean(ts: pd.Series) -> float:
"""
Calculates the mean of annual means from a time series.
The annual mean is computed for each year using daily values.
Parameters
----------
ts : pd.Series
Time series of daily values, indexed by datetime.
Returns
-------
float
Mean of the annual mean values across all years.
"""
if ts.empty:
return float('nan')
# Group by year and calculate mean surface area for each year
annual_means = ts.groupby(ts.index.year).mean()
# Return the mean of these annual means
return annual_means.mean()
[docs]
def annual_std(ts: pd.Series) -> float:
"""
Calculates the mean annual standard deviation from a time series.
Standard deviation is computed for each year using daily values.
Parameters
----------
ts : pd.Series
Time series of daily values, indexed by datetime.
Returns
-------
float
Mean standard deviation across all years.
"""
if ts.empty:
return float('nan')
# Group by year and compute standard deviation for each year
annual_std_values = ts.groupby(ts.index.year).std()
# Return the mean standard deviation across years
return annual_std_values.mean()
# Skewness
[docs]
def skewness(ts: pd.Series) -> float:
"""
Calculates skewness of the given time series.
Parameters
----------
ts : pd.Series
Time series, indexed by datetime.
Returns
-------
float
Skewness of the time series (unitless).
"""
if ts.empty:
return float('nan')
return skew(ts.dropna())
# Kurtosis
[docs]
def kurtosis_val(ts: pd.Series) -> float:
"""
Calculates kurtosis of the given time series.
Parameters
----------
ts : pd.Series
Time series, indexed by datetime.
Returns
-------
float
Kurtosis of the time series (excess kurtosis, unitless).
"""
if ts.empty:
return float('nan')
return kurtosis(ts.dropna(), fisher=True)
# COV
[docs]
def coefficient_of_variation(ts: pd.Series) -> float:
"""
Calculates coefficient of variation (CV) of the given time series.
Parameters
----------
ts : pd.Series
Time series, indexed by datetime.
Returns
-------
float
Coefficient of variation (std/mean, unitless).
"""
if ts.empty:
return float('nan')
mean_val = ts.mean()
if mean_val == 0:
return float('nan')
return ts.std() / mean_val
[docs]
def max_days_above_90th(ts: pd.Series) -> float:
"""
Calculates the maximum number of days per year where the daily values
exceed the 90th percentile threshold (computed over the entire time series).
Parameters
----------
ts : pd.Series
Time series of daily values, indexed by datetime.
Returns
-------
float
Maximum count of days above the 90th percentile across years.
"""
if ts.empty:
return float('nan')
# Compute global 90th percentile threshold
threshold = np.nanpercentile(ts, 90)
# Boolean series: True if value > threshold
above_threshold = ts > threshold
# Count per year
annual_counts = above_threshold.groupby(ts.index.year).sum()
# Return maximum count across years
return float(annual_counts.max()) if not annual_counts.empty else float('nan')
[docs]
def max_annual_persistence(timeseries, threshold=1/np.e, min_periods=30):
"""
Compute the persistence (decorrelation time) of high values in a time series annually.
Parameters
----------
timeseries : pd.Series
A datetime-indexed series of daily values.
threshold : float, optional
Autocorrelation cutoff (default=1/e ~ 0.367).
min_periods : int, optional
Minimum number of days required in a year to compute autocorrelation.
Returns
-------
int
Maximum persistence (days) across all years.
"""
results = {}
# group by year
for year, group in timeseries.groupby(timeseries.index.year):
if len(group) < min_periods:
continue
# normalize (remove mean, divide std)
x = (group - group.mean()) / group.std()
n = len(x)
# compute autocorrelation using np.correlate
acf = np.correlate(x, x, mode='full') / n
acf = acf[n-1:] / acf[n-1] # keep positive lags, normalize at lag 0 = 1
# find first lag where acf < threshold
persistence = np.argmax(acf < threshold)
if persistence == 0: # if acf never drops below threshold
persistence = len(acf) - 1
results[year] = persistence
if not results:
return float('nan')
return max(results.values())