""" This module contains population norm utility values by age group and sex, as reported by 'Szende A, Janssen B, Cabases J. Self-reported population health: an international perspective based on EQ-5D. Dordrecht: Springer; 2014.' - doi: https://doi.org/10.1007/978-94-007-7596-1 For Switzerland, values are taken from 'General Population Reference Values for the French Version of the EuroQol EQ-5D Health Utility Instrument Perneger, Thomas V. et al. Value in Health, Volume 13, Issue 5, 631 - 635' - doi: https://doi.org/10.1111/j.1524-4733.2010.00727.x. Values are not supplied for individuals under 18, who are assumed to have health utilities equal to the 18-30 category. The values can be retrieved with the :func:`~miscore.tools.cea.add_population_norms` function. """ from copy import deepcopy from importlib.resources import files import random from typing import Dict, Iterable, List, Literal import warnings import numpy as np import pandas as pd from scipy import stats data = pd.read_csv( files('miscore.tools.cea.data').joinpath("population_norms.csv") ) def add_population_norms( data_quantities: Dict[str, Dict], data_labels: List, country: str = None, sex: str = None, norm_data: pd.DataFrame = None, sex_as_condition: bool = None, method: Literal["EQ5D", "VAS"] = "EQ5D", disutility_tags: Iterable = None, disutility_method: Literal["nominal", "nominal_censored", "proportional"] = "nominal", n_psa_draws: int = None, return_usa_draws: bool = False, usa_quantiles: List = [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975], psa_seed: int = None): """ add_population_norms adds population norm utility values to CEA datasets. This allows the analysis to offset particular health state utilities against population reference values by age and sex category. Health utilities are read from the `population_norms.csv` file in the MISCore CEA data folder. Currently, values by age and sex are included for Switzerland, Netherlands, UK, and United States. :param data_quantities: CEA data set. :param data_labels: CEA data_labels for `data_quantities`. :param country: Country code of country, as specified in `population_norms.csv`. :param sex: String to denote Sex pertaining to dataset, can be 'male' or 'female'. Can also be 'total' to return a population average, although this is currently not available for Switzerland. :param norm_data: If not using the norm utilities integrated in `population_norms.csv`, DataFrame with population norm health utilities. Requisite columns are 'Agemin', 'Agemax', 'Yearmin', 'Yearmax', 'Method', and 'Utility'. :param sex_as_condition: Boolean whether to include sex as a condition in data_quantities. Ensure that your `durations` and `events` DataFrame has a `sex` column with `male` and `female` categories. :param method: Method of data utility, currently supports 'EQ5D' or 'VAS'. :param disutility_tags: Tags in data_quantities for which QALY inputs are given as disutility, relative to the `life` utility, which may need to be adjusted to reflect the category norm utility. Often, large tuples of tags can relate to a single disutility value. In this case, also one single element of the tuple can be supplied to identify the disutility tag. Ensure that in this case no singular tags reoccur in multiple tuples. :param disutility_method: String to denote the method of how to adjust the given disutility_tags. Options are "nominal" (i.e. disutility of -0.3 is adjusted to -0.2 for a norm utility of 0.9 compared to 1.0), "nominal_censored" (i.e. nominal method but health-state specific disutilities cannot exceed the population norm disutility), or "proportional" (i.e. disutility of -0.3 is adjusted to -0.27 for a norm utility of 0.9 compared to 1.0). Defaults to "nominal". :param n_psa_draws: Int value to define if you want to draw random values from the distributions of the disutility and the norm utility values, consistently with the disutility_tags argument. :param return_usa_draws: Whether to return quantile draws from the supplied distributions of the disutilities and the norm utilities. :param usa_quantiles: Quantiles to use for the USA draws, if applicable. :param psa_seed: Seed to use for the PSA draws, if applicable. :return: New CEA dataset which uses `conditions` to include population norms. """ assert sex in [None, 'male', 'female', 'total'], \ "Invalid entry for 'sex', enter 'male', 'female' or 'total'." assert sex or sex_as_condition, "Need to supply either `sex` or `sex_as_condition`." if country: assert country in list(np.unique(data['Country'])), "Country code currently not" \ "supported, check" \ "'population_norms.csv'." assert (country is not None) or (norm_data is not None), "Either a country code or a" \ "DataFrame with input data must be" \ "supplied." assert "QALY" in data_labels, "'QALY' not in the data_labels of the supplied CEA dataset." utility_index = data_labels.index("QALY") # If no data was supplied by the user, get the proper country- and sex-specific data # from the data stored in population_norms.py if norm_data is None: norm_data = data[ (data['Country'] == country) & (data['Method'] == method) ] # If `sex` is not added as a condition in the output CE data, filter our data also by sex. if sex_as_condition: norm_data = norm_data[norm_data['Sex'] != 'total'] else: norm_data = norm_data[norm_data['Sex'] == sex] assert disutility_method in ['proportional', 'nominal', 'nominal_censored'], \ "Invalid entry for 'disutility_method', enter 'proportional', 'nominal' or " \ "'nominal_censored'." # Define an inner function which applies the discounting to the `durations` or # `events` tags def apply_utility(ce_durations: Dict, ce_events: Dict, utility: float, disutility_method: str, SE: float = None, seed: int = None, psa_draws: Dict = None): # Deepcopy to prevent referencing issues new_durations = deepcopy(ce_durations) new_events = deepcopy(ce_events) # Get the old base utility value from the durations part of the ce input file # Convert `life` entry to (mutable) list, set new utility value, and reset to tuple if "life" in new_durations: old_utility = ce_durations["life"][utility_index] new_durations["life"] = list(new_durations["life"]) else: old_utility = ce_durations[("life",)][utility_index] new_durations["life"] = list(new_durations[("life",)]) del new_durations[("life",)] if n_psa_draws is None: new_durations["life"][utility_index] = utility new_durations["life"] = tuple(new_durations["life"]) else: np.random.seed(seed) utility = np.clip(stats.norm.rvs( utility, SE, n_psa_draws ), 0., 1.) new_durations["life"][utility_index] = utility # For any tag in disutility_tags, adjust the QALY disutility to be relative to the new # norm utility value. if disutility_tags is not None: for new_ in (new_durations, new_events): for k, v in new_.items(): new_[k] = list(new_[k]) if k in disutility_tags: # Disutility of events cannot be adjusted nominally. assert not (disutility_method != "proportional" and k in new_events and new_events[k][utility_index] != 0), \ ("Disutility of events cannot be adjusted nominally. See " "documentation for more information") if k in psa_draws and (n_psa_draws is not None or return_usa_draws): # If a no distribution is specified for tag `k`, it will not appear in # the psa_draws dictionary. In this case, the disutility will be # constant and only the norm utility draws will be substituted with # psa_draws. if disutility_method == "proportional": new_[k][utility_index] = psa_draws[k] * (utility / old_utility) else: new_[k][utility_index] = psa_draws[k] + (old_utility - utility) else: if disutility_method == "proportional": new_[k][utility_index] *= (utility / old_utility) else: new_[k][utility_index] += (old_utility - utility) if disutility_method == "nominal_censored": new_[k][utility_index] = np.minimum( new_[k][utility_index], 0.) new_[k] = tuple(new_[k]) return new_durations, new_events new_data_quantities = {} # For every part of the data_quantities entry (which may contain numerous datasets), # apply the adjustment to norm utilities psa_draws = {} psa_seed_generator = random.Random(psa_seed) # Generate usa draws if return_usa_draws: quantile_draws = {} # For every disutility key in each dataset, draw the quantile values per the specified # distribution and quantiles. If a the second entry is as for key, dataset in data_quantities.items(): for tag_type in ("durations", "events"): if tag_type not in dataset: continue for d_tag in disutility_tags: if d_tag in dataset[tag_type]: if hasattr(dataset[tag_type][d_tag][utility_index], "__iter__"): if isinstance(dataset[tag_type][d_tag][utility_index][0], str): quantile_draws[(key, tag_type, d_tag)] = (eval( f"stats.{dataset[tag_type][d_tag][utility_index][0]}(" f"**dataset[{tag_type!r}][d_tag][utility_index][1])" f".isf({usa_quantiles})" ), eval( f"stats.{dataset[tag_type][d_tag][utility_index][0]}(" f"**dataset[{tag_type!r}][d_tag][utility_index][1])" f".mean()" )) else: quantile_draws[(key, d_tag)] = (np.quantile( dataset[tag_type][d_tag][utility_index][0], usa_quantiles ), np.mean(dataset[tag_type][d_tag][utility_index][0])) for index, cat in norm_data.iterrows(): quantile_draws[("Norm_Utility", index, 'life')] = ( np.clip(stats.norm(cat["Utility"], cat["SE"]).isf(usa_quantiles), 0, 1), cat["Utility"] ) usa_i = 0 usa_draws = {} norm_draws = {} usa_len = len(usa_quantiles) * len(quantile_draws) for key, dataset in data_quantities.items(): if "conditions" not in dataset: dataset['conditions'] = {} if "durations" not in dataset: new_data_quantities[key] = dataset continue if "events" not in dataset: new_data_quantities[key] = dataset continue if ("life",) not in dataset["durations"] and "life" not in dataset["durations"]: new_data_quantities[key] = dataset continue usa_draws[key] = {} norm_draws[key] = {} for usa_draw, quantile_values in quantile_draws.items(): if usa_draw[0] == "Norm_Utility": norm_draws[key][usa_draw[1]] = np.ones(usa_len) * quantile_values[1] norm_draws[key][usa_draw[1]][usa_i: (usa_i + len(usa_quantiles))] = \ quantile_values[0] usa_i += len(usa_quantiles) else: usa_draws[key][usa_draw[2]] = np.ones(usa_len) * quantile_values[1] usa_draws[key][usa_draw[2]][usa_i: (usa_i + len(usa_quantiles))] = \ quantile_values[0] usa_i += len(usa_quantiles) # For every entry in the set of age- and sex-specific norm utility values, create a # copy of the dataset which is adjusted for a specific age and sex group, including # the proper conditions` list so that they are only applied to the specific group. for index, cat in norm_data.iterrows(): durations, events = apply_utility(ce_durations=dataset["durations"], ce_events=dataset["events"], utility=norm_draws[key][index], disutility_method=disutility_method, psa_draws=usa_draws[key]) new_data_quantities[(key, index)] = { "conditions": [ *dataset["conditions"], ("age", ">=", cat['Agemin']), ("age", "<", cat['Agemax']), ("year", ">=", cat['Yearmin']), ("year", "<", cat['Yearmax']), ], "durations": durations, "events": events } if sex_as_condition: new_data_quantities[(key, index)]["conditions"]. \ append(("sex", "==", cat['Sex'])) usa_summary = pd.DataFrame( {k: v[0] for k, v in quantile_draws.items()} ).melt() usa_summary.index.names = ["usa_set"] usa_summary.columns = (["variable", "norm_utility_index", "usa_tag", "usa_value"]) usa_summary["usa_quantile"] = np.tile(usa_quantiles, len(quantile_draws)) return new_data_quantities, usa_summary # If not performing usa, draw psa values or correct single values for key, dataset in data_quantities.items(): if "conditions" not in dataset: dataset['conditions'] = {} if "durations" not in dataset: new_data_quantities[key] = dataset continue if "events" not in dataset: new_data_quantities[key] = dataset continue if ("life",) not in dataset["durations"] and "life" not in dataset["durations"]: new_data_quantities[key] = dataset continue psa_draws[key] = {} if n_psa_draws is not None: if disutility_tags is not None: for tag_type in ("durations", "events"): for k, v in dataset[tag_type].items(): if hasattr(v[utility_index], "__iter__"): seed = psa_seed_generator.getrandbits(32) # noqa: F841 # User specified a probability distribution if isinstance(v[utility_index][0], str): psa_draws[key][k] = eval(f"stats.{v[utility_index][0]}.rvs(" f"size=n_psa_draws, random_state=seed," f" **v[utility_index][1])") # User specified list of custom PSA values else: psa_draws[key][k] = np.array(v[utility_index]) else: warnings.warn(f"No distribution specified for tag `{k}` in " f"dataset `{key}`.") # For every entry in the set of age- and sex-specific norm utility values, create a copy # of the dataset which is adjusted for a specific age and sex group, including the proper # conditions` list so that they are only applied to the specific group. for index, cat in norm_data.iterrows(): _conditions = [ ("age", ">=", cat['Agemin']), ("age", "<", cat['Agemax']), ("year", ">=", cat['Yearmin']), ("year", "<", cat['Yearmax']) ] seed = psa_seed_generator.getrandbits(32) durations, events = apply_utility(ce_durations=dataset["durations"], ce_events=dataset["events"], utility=cat['Utility'], disutility_method=disutility_method, SE=cat['SE'], psa_draws=psa_draws[key], seed=seed) new_data_quantities[(key, index)] = { "conditions": [ *dataset["conditions"], *[_c for _c in _conditions if not np.isnan(_c[2])] ], "durations": durations, "events": events } if sex_as_condition: new_data_quantities[(key, index)]["conditions"]. \ append(("sex", "==", cat['Sex'])) return new_data_quantities