"""
This module contains population norm utility values by age group and sex, as reported by
'Szende A, Janssen B, Cabases J. Self-reported population health: an international perspective
based on EQ-5D. Dordrecht: Springer; 2014.' - doi: https://doi.org/10.1007/978-94-007-7596-1

For Switzerland, values are taken from 'General Population Reference Values for the French Version
of the EuroQol EQ-5D Health Utility Instrument Perneger, Thomas V. et al. Value in Health, Volume
13, Issue 5, 631 - 635' - doi: https://doi.org/10.1111/j.1524-4733.2010.00727.x.
Values are not supplied for individuals under 18, who are assumed to have health utilities equal
to the 18-30 category.

The values can be retrieved with the :func:`~miscore.tools.cea.add_population_norms` function.
"""
from copy import deepcopy
from importlib.resources import files
import random
from typing import Dict, Iterable, List, Literal
import warnings

import numpy as np
import pandas as pd
from scipy import stats

data = pd.read_csv(
    files('miscore.tools.cea.data').joinpath("population_norms.csv")
)


def add_population_norms(
        data_quantities: Dict[str, Dict],
        data_labels: List,
        country: str = None,
        sex: str = None,
        norm_data: pd.DataFrame = None,
        sex_as_condition: bool = None,
        method: Literal["EQ5D", "VAS"] = "EQ5D",
        disutility_tags: Iterable = None,
        disutility_method: Literal["nominal", "nominal_censored", "proportional"] = "nominal",
        n_psa_draws: int = None,
        return_usa_draws: bool = False,
        usa_quantiles: List = [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975],
        psa_seed: int = None):
    """
    add_population_norms adds population norm utility values to CEA datasets. This allows the
    analysis to offset particular health state utilities against population reference values by age
    and sex category. Health utilities are read from the `population_norms.csv` file in the
    MISCore CEA data folder. Currently, values by age and sex are included for Switzerland,
    Netherlands, UK, and United States.

    :param data_quantities: CEA data set.
    :param data_labels: CEA data_labels for `data_quantities`.
    :param country: Country code of country, as specified in `population_norms.csv`.
    :param sex: String to denote Sex pertaining to dataset, can be 'male' or 'female'. Can also
        be 'total' to return a population average, although this is currently not available for
        Switzerland.
    :param norm_data: If not using the norm utilities integrated in `population_norms.csv`,
        DataFrame with population norm health utilities. Requisite columns are 'Agemin', 'Agemax',
        'Yearmin', 'Yearmax', 'Method', and 'Utility'.
    :param sex_as_condition: Boolean whether to include sex as a condition in data_quantities.
        Ensure that your `durations` and `events` DataFrame has a `sex` column with `male` and
        `female` categories.
    :param method: Method of data utility, currently supports 'EQ5D' or 'VAS'.
    :param disutility_tags: Tags in data_quantities for which QALY inputs are given as disutility,
        relative to the `life` utility, which may need to be adjusted to reflect the category
        norm utility. Often, large tuples of tags can relate to a single disutility
        value. In this case, also one single element of the tuple can be supplied to identify
        the disutility tag. Ensure that in this case no singular tags reoccur in multiple
        tuples.
    :param disutility_method: String to denote the method of how to adjust the given
        disutility_tags. Options are "nominal" (i.e. disutility of -0.3 is adjusted to
        -0.2 for a norm utility of 0.9 compared to 1.0), "nominal_censored"
        (i.e. nominal method but health-state specific disutilities cannot exceed the
        population norm disutility), or "proportional" (i.e. disutility of -0.3 is adjusted to
        -0.27 for a norm utility of 0.9 compared to 1.0). Defaults to "nominal".
    :param n_psa_draws: Int value to define if you want to draw random values from the
        distributions of the disutility and the norm utility values, consistently with the
        disutility_tags argument.
    :param return_usa_draws: Whether to return quantile draws from the supplied distributions
        of the disutilities and the norm utilities.
    :param usa_quantiles: Quantiles to use for the USA draws, if applicable.
    :param psa_seed: Seed to use for the PSA draws, if applicable.
    :return: New CEA dataset which uses `conditions` to include population norms.
    """
    assert sex in [None, 'male', 'female', 'total'], \
        "Invalid entry for 'sex', enter 'male', 'female' or 'total'."
    assert sex or sex_as_condition, "Need to supply either `sex` or `sex_as_condition`."
    if country:
        assert country in list(np.unique(data['Country'])), "Country code currently not" \
                                                            "supported, check" \
                                                            "'population_norms.csv'."
    assert (country is not None) or (norm_data is not None), "Either a country code or a" \
                                                             "DataFrame with input data must be" \
                                                             "supplied."
    assert "QALY" in data_labels, "'QALY' not in the data_labels of the supplied CEA dataset."

    utility_index = data_labels.index("QALY")
    # If no data was supplied by the user, get the proper country- and sex-specific data
    # from the data stored in population_norms.py
    if norm_data is None:
        norm_data = data[
            (data['Country'] == country)
            & (data['Method'] == method)
        ]
    # If `sex` is not added as a condition in the output CE data, filter our data also by sex.
    if sex_as_condition:
        norm_data = norm_data[norm_data['Sex'] != 'total']
    else:
        norm_data = norm_data[norm_data['Sex'] == sex]

    assert disutility_method in ['proportional', 'nominal', 'nominal_censored'], \
        "Invalid entry for 'disutility_method', enter 'proportional', 'nominal' or " \
        "'nominal_censored'."

    # Define an inner function which applies the discounting to the `durations` or
    # `events` tags
    def apply_utility(ce_durations: Dict, ce_events: Dict, utility: float,
                      disutility_method: str, SE: float = None,
                      seed: int = None, psa_draws: Dict = None):
        # Deepcopy to prevent referencing issues
        new_durations = deepcopy(ce_durations)
        new_events = deepcopy(ce_events)
        # Get the old base utility value from the durations part of the ce input file

        # Convert `life` entry to (mutable) list, set new utility value, and reset to tuple
        if "life" in new_durations:
            old_utility = ce_durations["life"][utility_index]
            new_durations["life"] = list(new_durations["life"])
        else:
            old_utility = ce_durations[("life",)][utility_index]
            new_durations["life"] = list(new_durations[("life",)])
            del new_durations[("life",)]
        if n_psa_draws is None:
            new_durations["life"][utility_index] = utility
            new_durations["life"] = tuple(new_durations["life"])
        else:
            np.random.seed(seed)
            utility = np.clip(stats.norm.rvs(
                utility,
                SE,
                n_psa_draws
            ), 0., 1.)
            new_durations["life"][utility_index] = utility

        # For any tag in disutility_tags, adjust the QALY disutility to be relative to the new
        # norm utility value.
        if disutility_tags is not None:
            for new_ in (new_durations, new_events):
                for k, v in new_.items():
                    new_[k] = list(new_[k])
                    if k in disutility_tags:
                        # Disutility of events cannot be adjusted nominally.
                        assert not (disutility_method != "proportional" and k in
                                    new_events and new_events[k][utility_index] != 0), \
                            ("Disutility of events cannot be adjusted nominally. See "
                             "documentation for more information")
                        if k in psa_draws and (n_psa_draws is not None or return_usa_draws):
                            # If a no distribution is specified for tag `k`, it will not appear in
                            # the psa_draws dictionary. In this case, the disutility will be
                            # constant and only the norm utility draws will be substituted with
                            # psa_draws.
                            if disutility_method == "proportional":
                                new_[k][utility_index] = psa_draws[k] * (utility / old_utility)
                            else:
                                new_[k][utility_index] = psa_draws[k] + (old_utility - utility)
                        else:
                            if disutility_method == "proportional":
                                new_[k][utility_index] *= (utility / old_utility)
                            else:
                                new_[k][utility_index] += (old_utility - utility)

                        if disutility_method == "nominal_censored":
                            new_[k][utility_index] = np.minimum(
                                new_[k][utility_index], 0.)
                    new_[k] = tuple(new_[k])
        return new_durations, new_events

    new_data_quantities = {}
    # For every part of the data_quantities entry (which may contain numerous datasets),
    # apply the adjustment to norm utilities
    psa_draws = {}
    psa_seed_generator = random.Random(psa_seed)

    # Generate usa draws
    if return_usa_draws:
        quantile_draws = {}
        # For every disutility key in each dataset, draw the quantile values per the specified
        # distribution and quantiles. If a the second entry is as
        for key, dataset in data_quantities.items():
            for tag_type in ("durations", "events"):
                if tag_type not in dataset:
                    continue
                for d_tag in disutility_tags:
                    if d_tag in dataset[tag_type]:
                        if hasattr(dataset[tag_type][d_tag][utility_index], "__iter__"):
                            if isinstance(dataset[tag_type][d_tag][utility_index][0], str):
                                quantile_draws[(key, tag_type, d_tag)] = (eval(
                                    f"stats.{dataset[tag_type][d_tag][utility_index][0]}("
                                    f"**dataset[{tag_type!r}][d_tag][utility_index][1])"
                                    f".isf({usa_quantiles})"
                                ), eval(
                                    f"stats.{dataset[tag_type][d_tag][utility_index][0]}("
                                    f"**dataset[{tag_type!r}][d_tag][utility_index][1])"
                                    f".mean()"
                                ))
                            else:
                                quantile_draws[(key, d_tag)] = (np.quantile(
                                    dataset[tag_type][d_tag][utility_index][0],
                                    usa_quantiles
                                ), np.mean(dataset[tag_type][d_tag][utility_index][0]))
        for index, cat in norm_data.iterrows():
            quantile_draws[("Norm_Utility", index, 'life')] = (
                np.clip(stats.norm(cat["Utility"], cat["SE"]).isf(usa_quantiles), 0, 1),
                cat["Utility"]
            )

        usa_i = 0
        usa_draws = {}
        norm_draws = {}
        usa_len = len(usa_quantiles) * len(quantile_draws)
        for key, dataset in data_quantities.items():
            if "conditions" not in dataset:
                dataset['conditions'] = {}
            if "durations" not in dataset:
                new_data_quantities[key] = dataset
                continue
            if "events" not in dataset:
                new_data_quantities[key] = dataset
                continue
            if ("life",) not in dataset["durations"] and "life" not in dataset["durations"]:
                new_data_quantities[key] = dataset
                continue
            usa_draws[key] = {}
            norm_draws[key] = {}
            for usa_draw, quantile_values in quantile_draws.items():
                if usa_draw[0] == "Norm_Utility":
                    norm_draws[key][usa_draw[1]] = np.ones(usa_len) * quantile_values[1]
                    norm_draws[key][usa_draw[1]][usa_i: (usa_i + len(usa_quantiles))] = \
                        quantile_values[0]
                    usa_i += len(usa_quantiles)
                else:
                    usa_draws[key][usa_draw[2]] = np.ones(usa_len) * quantile_values[1]
                    usa_draws[key][usa_draw[2]][usa_i: (usa_i + len(usa_quantiles))] = \
                        quantile_values[0]
                    usa_i += len(usa_quantiles)
            # For every entry in the set of age- and sex-specific norm utility values, create a
            # copy of the dataset which is adjusted for a specific age and sex group, including
            # the proper conditions` list so that they are only applied to the specific group.
            for index, cat in norm_data.iterrows():
                durations, events = apply_utility(ce_durations=dataset["durations"],
                                                  ce_events=dataset["events"],
                                                  utility=norm_draws[key][index],
                                                  disutility_method=disutility_method,
                                                  psa_draws=usa_draws[key])
                new_data_quantities[(key, index)] = {
                    "conditions": [
                        *dataset["conditions"],
                        ("age", ">=", cat['Agemin']),
                        ("age", "<", cat['Agemax']),
                        ("year", ">=", cat['Yearmin']),
                        ("year", "<", cat['Yearmax']),
                    ],
                    "durations": durations,
                    "events": events
                }
                if sex_as_condition:
                    new_data_quantities[(key, index)]["conditions"]. \
                        append(("sex", "==", cat['Sex']))
        usa_summary = pd.DataFrame(
            {k: v[0] for k, v in quantile_draws.items()}
        ).melt()
        usa_summary.index.names = ["usa_set"]
        usa_summary.columns = (["variable", "norm_utility_index", "usa_tag", "usa_value"])
        usa_summary["usa_quantile"] = np.tile(usa_quantiles, len(quantile_draws))
        return new_data_quantities, usa_summary

    # If not performing usa, draw psa values or correct single values
    for key, dataset in data_quantities.items():
        if "conditions" not in dataset:
            dataset['conditions'] = {}
        if "durations" not in dataset:
            new_data_quantities[key] = dataset
            continue
        if "events" not in dataset:
            new_data_quantities[key] = dataset
            continue
        if ("life",) not in dataset["durations"] and "life" not in dataset["durations"]:
            new_data_quantities[key] = dataset
            continue
        psa_draws[key] = {}
        if n_psa_draws is not None:
            if disutility_tags is not None:
                for tag_type in ("durations", "events"):
                    for k, v in dataset[tag_type].items():
                        if hasattr(v[utility_index], "__iter__"):
                            seed = psa_seed_generator.getrandbits(32)  # noqa: F841
                            # User specified a probability distribution
                            if isinstance(v[utility_index][0], str):
                                psa_draws[key][k] = eval(f"stats.{v[utility_index][0]}.rvs("
                                                         f"size=n_psa_draws, random_state=seed,"
                                                         f" **v[utility_index][1])")
                            # User specified list of custom PSA values
                            else:
                                psa_draws[key][k] = np.array(v[utility_index])
                        else:
                            warnings.warn(f"No distribution specified for tag `{k}` in "
                                          f"dataset `{key}`.")
        # For every entry in the set of age- and sex-specific norm utility values, create a copy
        # of the dataset which is adjusted for a specific age and sex group, including the proper
        # conditions` list so that they are only applied to the specific group.
        for index, cat in norm_data.iterrows():
            _conditions = [
                ("age", ">=", cat['Agemin']),
                ("age", "<", cat['Agemax']),
                ("year", ">=", cat['Yearmin']),
                ("year", "<", cat['Yearmax'])
            ]
            seed = psa_seed_generator.getrandbits(32)
            durations, events = apply_utility(ce_durations=dataset["durations"],
                                              ce_events=dataset["events"],
                                              utility=cat['Utility'],
                                              disutility_method=disutility_method,
                                              SE=cat['SE'],
                                              psa_draws=psa_draws[key],
                                              seed=seed)
            new_data_quantities[(key, index)] = {
                "conditions": [
                    *dataset["conditions"],
                    *[_c for _c in _conditions if not np.isnan(_c[2])]
                ],
                "durations": durations,
                "events": events
            }
            if sex_as_condition:
                new_data_quantities[(key, index)]["conditions"]. \
                    append(("sex", "==", cat['Sex']))
    return new_data_quantities