Source code for vivarium.framework.lookup.manager

"""
=============
Lookup Tables
=============

Simulations tend to require a large quantity of data to run.  :mod:`vivarium`
provides the :class:`Lookup Table <vivarium.framework.lookup.table.LookupTable>`
abstraction to ensure that accurate data can be retrieved when it's needed. It's
a callable object that takes in a population index and returns data specific to
the individuals represented by that index. See the
:ref:`lookup concept note <lookup_concept>` for more.

"""
from datetime import datetime, timedelta
from numbers import Number
from typing import TYPE_CHECKING, List, Tuple, Union

import pandas as pd

from vivarium.framework.lookup.table import (
    CategoricalTable,
    InterpolatedTable,
    LookupTable,
    LookupTableData,
    ScalarTable,
)
from vivarium.manager import Manager

if TYPE_CHECKING:
    from vivarium.framework.engine import Builder


[docs] class LookupTableManager(Manager): """Manages complex data in the simulation. Notes ----- Client code should never access this class directly. Use ``lookup`` on the builder during setup to get references to LookupTable objects. """ CONFIGURATION_DEFAULTS = { "interpolation": {"order": 0, "validate": True, "extrapolate": True} } @property def name(self) -> str: return "lookup_table_manager"
[docs] def setup(self, builder: "Builder") -> None: self.tables = {} self._pop_view_builder = builder.population.get_view self.clock = builder.time.clock() self._interpolation_order = builder.configuration.interpolation.order self._extrapolate = builder.configuration.interpolation.extrapolate self._validate = builder.configuration.interpolation.validate self._add_constraint = builder.lifecycle.add_constraint builder.lifecycle.add_constraint(self.build_table, allow_during=["setup"])
[docs] def build_table( self, data: LookupTableData, key_columns: Union[List[str], Tuple[str, ...]], parameter_columns: Union[List[str], Tuple[str, ...]], value_columns: Union[List[str], Tuple[str, ...]], ) -> LookupTable: """Construct a lookup table from input data.""" table = self._build_table(data, key_columns, parameter_columns, value_columns) self._add_constraint( table.call, restrict_during=["initialization", "setup", "post_setup"] ) return table
def _build_table( self, data: LookupTableData, key_columns: Union[List[str], Tuple[str, ...]], parameter_columns: Union[List[str], Tuple[str, ...]], value_columns: Union[List[str], Tuple[str, ...]], ) -> LookupTable: # We don't want to require explicit names for tables, but giving them # generic names is useful for introspection. table_number = len(self.tables) if self._validate: validate_build_table_parameters( data, key_columns, parameter_columns, value_columns ) # Note datetime catches pandas timestamps if isinstance(data, (Number, datetime, timedelta, list, tuple)): table_type = ScalarTable elif parameter_columns: table_type = InterpolatedTable else: table_type = CategoricalTable table = table_type( table_number=table_number, data=data, population_view_builder=self._pop_view_builder, key_columns=key_columns, parameter_columns=parameter_columns, value_columns=value_columns, interpolation_order=self._interpolation_order, clock=self.clock, extrapolate=self._extrapolate, validate=self._validate, ) self.tables[table_number] = table return table def __repr__(self) -> str: return "LookupTableManager()"
[docs] class LookupTableInterface: """The lookup table management system. Simulations tend to require a large quantity of data to run. ``vivarium`` provides the :class:`Lookup Table <vivarium.framework.lookup.table.LookupTable>` abstraction to ensure that accurate data can be retrieved when it's needed. For more information, see :ref:`here <lookup_concept>`. """ def __init__(self, manager: LookupTableManager): self._manager = manager
[docs] def build_table( self, data: LookupTableData, key_columns: Union[List[str], Tuple[str, ...]] = (), parameter_columns: Union[List[str], Tuple[str, ...]] = (), value_columns: Union[List[str], Tuple[str, ...]] = (), ) -> LookupTable: """Construct a LookupTable from input data. If data is a :class:`pandas.DataFrame`, an interpolation function of the order specified in the simulation :term:`configuration <Configuration>` will be calculated for each permutation of the set of key_columns. The columns in parameter_columns will be used as parameters for the interpolation functions which will estimate all remaining columns in the table. If data is a number, time, list, or tuple, a scalar table will be constructed with the values in data as the values in each column of the table, named according to value_columns. Parameters ---------- data The source data which will be used to build the resulting :class:`Lookup Table <vivarium.framework.lookup.table.LookupTable>`. key_columns Columns used to select between interpolation functions. These should be the non-continuous variables in the data. For example 'sex' in data about a population. parameter_columns The columns which contain the parameters to the interpolation functions. These should be the continuous variables. For example 'age' in data about a population. value_columns The data columns that will be in the resulting LookupTable. Columns to be interpolated over if interpolation or the names of the columns in the scalar table. Returns ------- LookupTable """ return self._manager.build_table(data, key_columns, parameter_columns, value_columns)
[docs] def validate_build_table_parameters( data: LookupTableData, key_columns: Union[List[str], Tuple[str, ...]], parameter_columns: Union[List[str], Tuple[str, ...]], value_columns: Union[List[str], Tuple[str, ...]], ) -> None: """Makes sure the data format agrees with the provided column layout.""" if ( data is None or (isinstance(data, pd.DataFrame) and data.empty) or (isinstance(data, (list, tuple)) and not data) ): raise ValueError("Must supply some data") acceptable_types = (Number, datetime, timedelta, list, tuple, pd.DataFrame) if not isinstance(data, acceptable_types): raise TypeError( f"The only allowable types for data are {acceptable_types}. " f"You passed {type(data)}." ) if isinstance(data, (list, tuple)): if not value_columns: raise ValueError( "To invoke scalar view with multiple values, you must supply value_columns" ) if len(value_columns) != len(data): raise ValueError( "The number of value columns must match the number of values." f"You supplied values: {data} and value_columns: {value_columns}" ) if key_columns: raise ValueError( f"key_columns are not allowed for scalar view: Provided {key_columns}." ) if parameter_columns: raise ValueError( "parameter_columns are not allowed for scalar view: " f"Provided {parameter_columns}." ) if isinstance(data, pd.DataFrame): if not key_columns and not parameter_columns: raise ValueError( "Must supply either key_columns or parameter_columns with a DataFrame." ) bin_edge_columns = [] for p in parameter_columns: bin_edge_columns.extend([f"{p}_start", f"{p}_end"]) all_parameter_columns = set(parameter_columns) | set(bin_edge_columns) if set(key_columns).intersection(all_parameter_columns): raise ValueError( f"There should be no overlap between key columns: {key_columns} " f"and parameter columns: {parameter_columns}." ) lookup_columns = set(key_columns) | all_parameter_columns if set(value_columns).intersection(lookup_columns): raise ValueError( f"There should be no overlap between value columns: {value_columns} " f"and key or parameter columns: {lookup_columns}." ) specified_columns = set(key_columns) | set(bin_edge_columns) | set(value_columns) if specified_columns.difference(data.columns): raise ValueError( f"The columns supplied: {specified_columns} must all be " f"present in the passed data: {data.columns}" )