import pandas as pd

from miscore import Model, processes, Universe
from miscore.tools import cea
from miscore.tools.cea.data import population_norms

# Birth process
birth = processes.Birth(
    name="birth",
    year=1975
)

# OC process
oc = processes.OC(
    name="oc",
    life_table=processes.oc.data.us_2009.life_table,
)

# EC process
ec = processes.EC.from_data(processes.ec.data.us)

# EC screening process
ec_screening = processes.EC_screening.from_data(processes.ec_screening.data.example)

# Universes
no_screening = Universe(
    name="no_screening",
    processes=[
        birth,
        oc,
        ec
    ]
)

screening = Universe(
    name="screening",
    processes=[
        birth,
        oc,
        ec,
        ec_screening
    ]
)

# Model
model = Model(
    universes=[
        no_screening,
        screening
    ]
)

# Run model
result = model.run(
    n=1e5,
    seed=123,
    event_ages=range(0, 101, 1),
    duration_ages=range(0, 101, 1),
    event_years=range(1975, 2075, 1),
    duration_years=range(1975, 2075, 1),
    verbose=True
)

# Specify output labels and discounting
labels = ("costs", "LY", "QALY", "n_hysterectomies")
discount_rates = (0.03, 0.03, 0.03, 0.00)

# We declare a cost-effectiveness dataset, associating tags of interest with benefits and harms.
# The dataset consists of two parts: lifelong costs/utilities and utilities for women aged 45-.
cea_data = {
    "Lifelong": {
        "conditions": [],
        "durations": {
            # Total number of Life Years
            "life": (0, 1, 1, 0),
        },

        "events": {
            # Costs of treatment
            "ec_clinical": (35763, 0, 0, 1),
            "ec_screening_hysterectomy": (15276, 0, 0, 1),

            # Count number of deaths in population
            ".*death": "deaths"
        },
    },
    "Menopause": {
        "conditions": [("age", "<", 45)],
        "durations": {
            # Disutilities due to surgically induced menopause
            ("ec_clinical_month1", "ec_screening_hysterectomy_month1"): (0, 0, -0.44, 0),
            ("ec_clinical_months2-3", "ec_screening_hysterectomy_months2-3"): (0, 0, -0.26, 0),
            ("ec_clinical_cont", "ec_screening_hysterectomy_cont"): (0, 0, -0.12, 0),
        },
        "events": {}
    },
}

# Retrieve the events and durations tables obtained by simulation
durations = result.durations
events = result.events

ce_result = cea.cost_effectiveness_analysis(durations=durations,
                                            events=events,
                                            cea_data=cea_data,
                                            cea_labels=labels,
                                            division_factor=100000,
                                            discount_by="age",
                                            discount_start=40,
                                            discount_rate=discount_rates,
                                            group_by=["universe"],
                                            use_regex=True,
                                            verbose=True
                                            )

print(ce_result.cea_output)

# Now we include a reference strategy. There are two methods:
reference_strategy = (8289.90057, 60.96018, 60.89849, 0.35930, 1.0)
reference_strategy = "no_screening"

ce_result = cea.cost_effectiveness_analysis(durations=durations,
                                            events=events,
                                            cea_data=cea_data,
                                            cea_labels=labels,
                                            division_factor=1000000,
                                            discount_by="age",
                                            discount_start=40,
                                            discount_rate=discount_rates,
                                            group_by=["universe"],
                                            use_regex=True,
                                            reference=reference_strategy
                                            )

print(ce_result.cea_output)

# Now we group the results by universe and year only.
group_by = ["universe", "year"]
ce_result = cea.cost_effectiveness_analysis(durations=durations,
                                            events=events,
                                            cea_data=cea_data,
                                            cea_labels=labels,
                                            division_factor=1000000,
                                            discount_by="age",
                                            discount_start=40,
                                            discount_rate=discount_rates,
                                            group_by=group_by,
                                            use_regex=True)
print(ce_result.cea_output)

# Pivot results by universe to search for efficient universes
universe_table = pd.pivot_table(ce_result.cea_output, aggfunc="sum",
                                columns="label", values="number", index="universe")
print(universe_table)

# Here we search for the dominating strategies
dominating_strategies = cea.find_pareto(universe_table)
efficient_strategies = cea.find_efficient(universe_table)
print(efficient_strategies)

# The scatter_ce function allows us to plot the universes on the cost-QALY plane
cea.scatter_ce(universe_table, x_label="costs", y_label="QALY", tags="index")
cea.show()

# Next, we do an example of a probabilistic sensitivity analysis
labels = ["costs", "LY", "QALY", "n_hysterectomy"]
discount_rates = (0.03, 0.01, 0.01, 0.00)

# PSA-like input
# We declare a cost-effectiveness dataset, associating tags of interest with costs and effects
psa_data = {
    "Lifelong": {
        "conditions": [],
        "durations": {
            # Total number of Life Years
            "life": (0, 1, (1, 0.8) * 1000, 0),
        },
        "events": {
            # Costs of treatment
            "ec_clinical": (("norm", {"loc": 35763, "scale": 5960}), 0, 0, 1),
            "ec_screening_hysterectomy": (("norm", {"loc": 15276, "scale": 2546}), 0, 0, 1),

            # Count number of deaths in population
            ".*death": "deaths"
        },
    },
    "Menopause": {
        "conditions": [("age", "<", 45)],
        "durations": {
            # Disutilities due to surgically induced menopause
            ("ec_clinical_month1", "ec_screening_hysterectomy_month1"): (0, 0, -0.44, 0),
            ("ec_clinical_months2-3", "ec_screening_hysterectomy_months2-3"): (0, 0, -0.26, 0),
            ("ec_clinical_cont", "ec_screening_hysterectomy_cont"):
                (0, 0, ("uniform", {"loc": -0.12, "scale": 0.10}), 0),
        },
        "events": {}
    },
}

# Calculate costs and effects for each PSA draw
ce_result = cea.cost_effectiveness_analysis(result=result,
                                            cea_data=psa_data, cea_labels=labels,
                                            division_factor=1e5, discount_by="age",
                                            discount_start=40,
                                            discount_rate=discount_rates,
                                            group_by=["universe"],
                                            psa_n_samples=2000,
                                            psa_seed=42195)
print(ce_result.cea_output)

# Alternatively, we may perform a Univariate Sensitivity Analysis, in which values
# are changed one by one.
ce_result = cea.cost_effectiveness_analysis(result=result,
                                            cea_data=psa_data, cea_labels=labels,
                                            division_factor=1e5, discount_by="age",
                                            discount_start=40,
                                            discount_rate=discount_rates,
                                            reference="no_screening",
                                            group_by=["universe"],
                                            perform_usa=True)
# The output is now grouped by `usa_set` rather than `psa_set`
print(ce_result.cea_output)

# The `usa_values` DataFrame shows which input corresponds to which `usa_set`
print(ce_result.usa_values)

# Optionally correct our cost-effectiveness input data for population norm values
cea_data['Lifelong']['durations']['ec_clinical'] = (0, 0, -0.1, 0)
ce_input = population_norms.add_population_norms(
    data_quantities=cea_data,
    country="nl",
    sex="total",
    data_labels=processes.ec_screening.data.cea.labels,
    disutility_tags=['ec_clinical'],
    disutility_method="nominal"
)
print(ce_input[('Lifelong', 63)]['conditions'])
print(ce_input[('Lifelong', 63)]['durations'])