#!/usr/bin/env python3
"""
Compute intensity–duration–frequency (IDF) curves for capacity factor (CF) droughts.

This script applies a rolling average to a daily CF time series to detect
the N most intense low-generation events (droughts) for each duration D.

Methodology follows:
- Moving averages for durations from dmin to dmax (e.g., 1–90 days)
- Events are ranked by severity (lowest intensity = most severe)
- Return period T is computed as T = (nyears + 1) / rank
- Dates are assigned to the **center** of each event window

#######################################################################
## Usage: python compute_idf.py daily_CF.txt 1 90 10 idf_events.txt
#######################################################################

# Command-line arguments
#
# input_file
#     Input daily time series in plain-text format with two whitespace-
#     separated columns:
#
#         YYYY-MM-DD    value
#
#     where 'value' is the daily capacity factor (or any other variable
#     to be analysed). No header is expected.
#
# dmin
#     Minimum event duration (days) used to construct the IDF curves.
#
# dmax
#     Maximum event duration (days) used to construct the IDF curves.
#
# N
#     Number of events retained for each duration. Events are ranked by
#     severity (lowest intensity values) and assigned return periods using
#     the Weibull plotting position.
#
# output_file
#     Output text file containing the identified events with columns:
#
#         duration
#         value
#         date
#         severity
#         return_period
#
#     The output is written as a tab-separated text file.

"""

import sys
import pandas as pd
import numpy as np
from datetime import timedelta

# --- Parse arguments ---
if len(sys.argv) != 6:
    print("Usage: compute_idf.py input_file dmin dmax N output_file")
    sys.exit(1)

input_file = sys.argv[1]
dmin = int(sys.argv[2])
dmax = int(sys.argv[3])
N = int(sys.argv[4])
output_file = sys.argv[5]

# --- Read input series ---
df = pd.read_csv(input_file, sep=r"\s+", header=None, names=["date", "cf"])
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)
df.sort_index(inplace=True)

# --- Compute number of years (based on date range) ---
start_date = df.index.min()
end_date = df.index.max()
nyears = round((end_date - start_date).days / 365.25)

# --- Store all events ---
all_events = []

for D in range(dmin, dmax + 1):
    # Shift to assign the rolling mean to the center of the window
    cf_rolling = df["cf"].rolling(window=D, min_periods=D).mean().shift(-D // 2)

    # Create a working copy to mask events
    available = cf_rolling.copy()

    events = []

    for i in range(N):
        if available.isna().all():
            break  # no more data available

        idx_min = available.idxmin()
        val_min = available.loc[idx_min]

        if pd.isna(val_min):
            break  # all remaining values are NaN

        # Save the event
        rank = len(events) + 1
        return_period = (nyears + 1) / rank
        events.append({
            "duration": D,
            "value": val_min,
            "date": idx_min.date().isoformat(),
            "severity": rank,
            "return_period": return_period
        })

        # Mask the D-day window centered at idx_min
        start = idx_min - timedelta(days=(D // 2))
        end = start + timedelta(days=D - 1)
        available.loc[start:end] = np.nan

    all_events.extend(events)

# --- Write output ---
df_out = pd.DataFrame(all_events)

# Round return period to 1 decimal (but keep as numeric)
df_out["return_period"] = df_out["return_period"].round(1)

# Write header manually, then rows with per-column formatting
with open(output_file, "w") as f:
    f.write("duration\tvalue\tdate\tseverity\treturn_period\n")
    for _, row in df_out.iterrows():
        f.write(f"{row['duration']}\t{row['value']:.6f}\t{row['date']}\t{row['severity']}\t{row['return_period']:.1f}\n")