Source code for vivarium_public_health.metrics.stratification

"""
==================
Results Stratifier
==================

This module contains tools for stratifying observed quantities
by specified characteristics through the vivarium results interface.
"""

import pandas as pd
from vivarium import Component
from vivarium.framework.engine import Builder


[docs]class ResultsStratifier(Component): ##################### # Lifecycle methods # ##################### # noinspection PyAttributeOutsideInit
[docs] def setup(self, builder: Builder) -> None: self.age_bins = self.get_age_bins(builder) self.start_year = builder.configuration.time.start.year self.end_year = builder.configuration.time.end.year self.register_stratifications(builder)
################# # Setup methods # #################
[docs] def register_stratifications(self, builder: Builder) -> None: builder.results.register_stratification( "age_group", self.age_bins["age_group_name"].to_list(), self.map_age_groups, is_vectorized=True, requires_columns=["age"], ) builder.results.register_stratification( "current_year", [str(year) for year in range(self.start_year, self.end_year + 1)], self.map_year, is_vectorized=True, requires_columns=["current_time"], ) # TODO [MIC-4232]: simulants occasionally have event year of end_year_year+1 if the end time plus step size # lands in the next year. possible solution detailed in ticket # builder.results.register_stratification( # "event_year", # [str(year) for year in range(self.start_year, self.end_year + 1)], # self.map_year, # is_vectorized=True, # requires_columns=["event_time"], # ) # TODO [MIC-3892]: simulants occasionally have entrance year of start_year-1 if the start time minus step size # lands in the previous year. possible solution detailed in ticket # builder.results.register_stratification( # "entrance_year", # [str(year) for year in range(self.start_year, self.end_year + 1)], # self.map_year, # is_vectorized=True, # requires_columns=["entrance_time"], # ) # TODO [MIC-4083]: Known bug with this registration # builder.results.register_stratification( # "exit_year", # [str(year) for year in range(self.start_year, self.end_year + 1)] + ["nan"], # self.map_year, # is_vectorized=True, # requires_columns=["exit_time"], # ) builder.results.register_stratification( "sex", ["Female", "Male"], requires_columns=["sex"] )
########### # Mappers # ###########
[docs] def map_age_groups(self, pop: pd.DataFrame) -> pd.Series: """Map age with age group name strings Parameters ---------- pop A DataFrame with one column, an age to be mapped to an age group name string Returns ------ pandas.Series A pd.Series with age group name string corresponding to the pop passed into the function """ bins = self.age_bins["age_start"].to_list() + [self.age_bins["age_end"].iloc[-1]] labels = self.age_bins["age_group_name"].to_list() age_group = pd.cut(pop.squeeze(axis=1), bins, labels=labels).rename("age_group") return age_group
[docs] @staticmethod def map_year(pop: pd.DataFrame) -> pd.Series: """Map datetime with year Parameters ---------- pop A DataFrame with one column, a datetime to be mapped to year Returns ------ pandas.Series A pd.Series with years corresponding to the pop passed into the function """ return pop.squeeze(axis=1).dt.year.apply(str)
[docs] @staticmethod def get_age_bins(builder: Builder) -> pd.DataFrame: raw_age_bins = builder.data.load("population.age_bins") age_start = builder.configuration.population.initialization_age_min exit_age = builder.configuration.population.untracking_age age_start_mask = age_start < raw_age_bins["age_end"] exit_age_mask = raw_age_bins["age_start"] < exit_age if exit_age else True age_bins = raw_age_bins.loc[age_start_mask & exit_age_mask, :].copy() age_bins["age_group_name"] = ( age_bins["age_group_name"].str.replace(" ", "_").str.lower() ) return age_bins