"""
======================
The Population Manager
======================
The manager and :ref:`builder <builder_concept>` interface for the
:ref:`population management system <population_concept>`.
"""
from types import MethodType
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Union
import pandas as pd
from vivarium import Component
from vivarium.framework.population.exceptions import PopulationError
from vivarium.framework.population.population_view import PopulationView
from vivarium.manager import Manager
[docs]
class SimulantData(NamedTuple):
"""Data to help components initialize simulants.
Any time simulants are added to the simulation, each initializer is called
with this structure containing information relevant to their
initialization.
"""
#: The index representing the new simulants being added to the simulation.
index: pd.Index
#: A dictionary of extra data passed in by the component creating the
#: population.
user_data: Dict[str, Any]
#: The time when the simulants enter the simulation.
creation_time: pd.Timestamp
#: The span of time over which the simulants are created. Useful for,
#: e.g., distributing ages over the window.
creation_window: pd.Timedelta
[docs]
class InitializerComponentSet:
"""Set of unique components with population initializers."""
def __init__(self):
self._components = {}
self._columns_produced = {}
[docs]
def add(self, initializer: Callable, columns_produced: List[str]):
"""Adds an initializer and columns to the set, enforcing uniqueness.
Parameters
----------
initializer
The population initializer to add to the set.
columns_produced
The columns the initializer produces.
Raises
------
TypeError
If the initializer is not an object method.
AttributeError
If the object bound to the method does not have a name attribute.
PopulationError
If the component bound to the method already has an initializer
registered or if the columns produced are duplicates of columns
another initializer produces.
"""
if not isinstance(initializer, MethodType):
raise TypeError(
"Population initializers must be methods of vivarium Components "
"or the simulation's PopulationManager. "
f"You provided {initializer} which is of type {type(initializer)}."
)
component = initializer.__self__
# TODO: consider if we can initialize the tracked column with a component instead
# TODO: raise error once all active Component implementations have been refactored
# if not (isinstance(component, Component) or isinstance(component, PopulationManager)):
# raise AttributeError(
# "Population initializers must be methods of vivarium Components "
# "or the simulation's PopulationManager. "
# f"You provided {initializer} which is bound to {component} that "
# f"is of type {type(component)} which does not inherit from "
# "Component."
# )
if not hasattr(component, "name"):
raise AttributeError(
"Population initializers must be methods of named simulation components. "
f"You provided {initializer} which is bound to {component} that has no "
f"name attribute."
)
component_name = component.name
if component_name in self._components:
raise PopulationError(
f"Component {component_name} has multiple population initializers. "
"This is not allowed."
)
for column in columns_produced:
if column in self._columns_produced:
raise PopulationError(
f"Component {component_name} and component "
f"{self._columns_produced[column]} have both registered initializers "
f"for column {column}."
)
self._columns_produced[column] = component_name
self._components[component_name] = columns_produced
def __repr__(self):
return repr(self._components)
def __str__(self):
return str(self._components)
[docs]
class PopulationManager(Manager):
"""Manages the state of the simulated population."""
# TODO: Move the configuration for initial population creation to
# user components.
CONFIGURATION_DEFAULTS = {
"population": {
"population_size": 100,
},
}
def __init__(self):
self._population = None
self._initializer_components = InitializerComponentSet()
self.creating_initial_population = False
self.adding_simulants = False
self._last_id = -1
############################
# Normal Component Methods #
############################
@property
def name(self):
"""The name of this component."""
return "population_manager"
[docs]
def setup(self, builder):
"""Registers the population manager with other vivarium systems."""
self.clock = builder.time.clock()
self.step_size = builder.time.step_size()
self.resources = builder.resources
self._add_constraint = builder.lifecycle.add_constraint
builder.lifecycle.add_constraint(
self.get_view,
allow_during=[
"setup",
"post_setup",
"population_creation",
"simulation_end",
"report",
],
)
builder.lifecycle.add_constraint(self.get_simulant_creator, allow_during=["setup"])
builder.lifecycle.add_constraint(
self.register_simulant_initializer, allow_during=["setup"]
)
self.register_simulant_initializer(
self.on_initialize_simulants, creates_columns=["tracked"]
)
self._view = self.get_view(["tracked"])
[docs]
def on_initialize_simulants(self, pop_data: SimulantData):
"""Adds a ``tracked`` column to the state table for new simulants."""
status = pd.Series(True, index=pop_data.index)
self._view.update(status)
@property
def columns(self) -> List[str]:
"""The columns that currently exist in the state table."""
return list(self._population.columns)
def __repr__(self):
return "PopulationManager()"
###########################
# Builder API and helpers #
###########################
[docs]
def get_view(
self, columns: Union[List[str], Tuple[str]], query: str = None
) -> PopulationView:
"""Get a time-varying view of the population state table.
The requested population view can be used to view the current state or
to update the state with new values.
If the column 'tracked' is not specified in the ``columns`` argument,
the query string 'tracked == True' will be added to the provided
query argument. This allows components to ignore untracked simulants
by default. If the columns argument is empty, the population view will
have access to the entire state table.
Parameters
----------
columns
A subset of the state table columns that will be available in the
returned view. If empty, this view will have access to the entire
state table.
query
A filter on the population state. This filters out particular
simulants (rows in the state table) based on their current state.
The query should be provided in a way that is understood by the
:meth:`pandas.DataFrame.query` method and may reference state
table columns not requested in the ``columns`` argument.
Returns
-------
PopulationView
A filtered view of the requested columns of the population state
table.
"""
view = self._get_view(columns, query)
self._add_constraint(
view.get, restrict_during=["initialization", "setup", "post_setup"]
)
self._add_constraint(
view.update,
restrict_during=[
"initialization",
"setup",
"post_setup",
"simulation_end",
"report",
],
)
return view
def _get_view(self, columns: Union[List[str], Tuple[str]], query: str = None):
if columns and "tracked" not in columns:
if query is None:
query = "tracked == True"
elif "tracked" not in query:
query += " and tracked == True"
self._last_id += 1
return PopulationView(self, self._last_id, columns, query)
[docs]
def register_simulant_initializer(
self,
initializer: Callable,
creates_columns: List[str] = (),
requires_columns: List[str] = (),
requires_values: List[str] = (),
requires_streams: List[str] = (),
):
"""Marks a source of initial state information for new simulants.
Parameters
----------
initializer
A callable that adds or updates initial state information about
new simulants.
creates_columns
A list of the state table columns that the given initializer
provides the initial state information for.
requires_columns
A list of the state table columns that already need to be present
and populated in the state table before the provided initializer
is called.
requires_values
A list of the value pipelines that need to be properly sourced
before the provided initializer is called.
requires_streams
A list of the randomness streams necessary to initialize the
simulant attributes.
"""
self._initializer_components.add(initializer, creates_columns)
dependencies = (
[f"column.{name}" for name in requires_columns]
+ [f"value.{name}" for name in requires_values]
+ [f"stream.{name}" for name in requires_streams]
)
if creates_columns != ["tracked"]:
# The population view itself uses the tracked column, so include
# to be safe.
dependencies += ["column.tracked"]
self.resources.add_resources(
"column", list(creates_columns), initializer, dependencies
)
[docs]
def get_simulant_creator(self) -> Callable[[int, Optional[Dict[str, Any]]], pd.Index]:
"""Gets a function that can generate new simulants.
Returns
-------
Callable
The simulant creator function. The creator function takes the
number of simulants to be created as it's first argument and a dict
population configuration that will be available to simulant
initializers as it's second argument. It generates the new rows in
the population state table and then calls each initializer
registered with the population system with a data
object containing the state table index of the new simulants, the
configuration info passed to the creator, the current simulation
time, and the size of the next time step.
"""
return self._create_simulants
def _create_simulants(
self, count: int, population_configuration: Dict[str, Any] = None
) -> pd.Index:
population_configuration = (
population_configuration if population_configuration else {}
)
if self._population is None:
self.creating_initial_population = True
self._population = pd.DataFrame()
new_index = range(len(self._population) + count)
new_population = self._population.reindex(new_index)
index = new_population.index.difference(self._population.index)
self._population = new_population
self.adding_simulants = True
for initializer in self.resources:
initializer(
SimulantData(index, population_configuration, self.clock(), self.step_size())
)
self.creating_initial_population = False
self.adding_simulants = False
return index
###############
# Context API #
###############
[docs]
def get_population(self, untracked: bool) -> pd.DataFrame:
"""Provides a copy of the full population state table.
Parameters
----------
untracked
Whether to include untracked simulants in the returned population.
Returns
-------
pandas.DataFrame
A copy of the population table.
"""
pop = self._population.copy() if self._population is not None else pd.DataFrame()
if not untracked and "tracked" in pop.columns:
pop = pop[pop.tracked]
return pop
[docs]
class PopulationInterface:
"""Provides access to the system for reading and updating the population.
The most important aspect of the simulation state is the ``population
table`` or ``state table``. It is a table with a row for every
individual or cohort (referred to as a simulant) being simulated and a
column for each of the attributes of the simulant being modeled. All
access to the state table is mediated by
:class:`population views <vivarium.framework.population.population_view.PopulationView>`,
which may be requested from this system during setup time.
The population system itself manages a single attribute of simulants
called ``tracked``. This attribute allows global control of which
simulants are available to read and update in the state table by
default.
For example, in a simulation of childhood illness, we might not
need information about individuals or cohorts once they reach five years
of age, and so we can have them "age out" of the simulation at five years
old by setting the ``tracked`` attribute to ``False``.
"""
def __init__(self, manager: PopulationManager):
self._manager = manager
[docs]
def get_view(
self, columns: Union[List[str], Tuple[str]], query: str = None
) -> PopulationView:
"""Get a time-varying view of the population state table.
The requested population view can be used to view the current state or
to update the state with new values.
If the column 'tracked' is not specified in the ``columns`` argument,
the query string 'tracked == True' will be added to the provided
query argument. This allows components to ignore untracked simulants
by default. If the columns argument is empty, the population view will
have access to the entire state table.
Parameters
----------
columns
A subset of the state table columns that will be available in the
returned view. If empty, this view will have access to the entire
state table.
query
A filter on the population state. This filters out particular
simulants (rows in the state table) based on their current state.
The query should be provided in a way that is understood by the
:meth:`pandas.DataFrame.query` method and may reference state
table columns not requested in the ``columns`` argument.
Returns
-------
PopulationView
A filtered view of the requested columns of the population state
table.
"""
return self._manager.get_view(columns, query)
[docs]
def get_simulant_creator(self) -> Callable[[int, Optional[Dict[str, Any]]], pd.Index]:
"""Gets a function that can generate new simulants.
Returns
-------
The simulant creator function. The creator function takes the
number of simulants to be created as it's first argument and a dict
population configuration that will be available to simulant
initializers as it's second argument. It generates the new rows in
the population state table and then calls each initializer
registered with the population system with a data
object containing the state table index of the new simulants, the
configuration info passed to the creator, the current simulation
time, and the size of the next time step.
"""
return self._manager.get_simulant_creator()
[docs]
def initializes_simulants(
self,
initializer: Callable[[SimulantData], None],
creates_columns: List[str] = (),
requires_columns: List[str] = (),
requires_values: List[str] = (),
requires_streams: List[str] = (),
):
"""Marks a source of initial state information for new simulants.
Parameters
----------
initializer
A callable that adds or updates initial state information about
new simulants.
creates_columns
A list of the state table columns that the given initializer
provides the initial state information for.
requires_columns
A list of the state table columns that already need to be present
and populated in the state table before the provided initializer
is called.
requires_values
A list of the value pipelines that need to be properly sourced
before the provided initializer is called.
requires_streams
A list of the randomness streams necessary to initialize the
simulant attributes.
"""
self._manager.register_simulant_initializer(
initializer, creates_columns, requires_columns, requires_values, requires_streams
)