"""
Classes that describe results of simulations of models
.. codeauthor:: David Zwicker <david.zwicker@ds.mpg.de>
"""
from __future__ import annotations
import collections
import inspect
import itertools
import logging
import warnings
from pathlib import Path
from typing import Any, Iterator, List
import numpy as np
from tqdm.auto import tqdm
from ..model.base import ModelBase
from ..storage import Location, StorageGroup, StorageID, open_storage, storage_actions
from ..storage.access_modes import ModeType
from ..storage.attributes import Attrs
from ..storage.utils import encode_class
[docs]class MockModel(ModelBase):
"""helper class to store parameter values when the original model is not present"""
def __init__(self, parameters: dict[str, Any] | None = None):
"""
Args:
parameters (dict): A dictionary of parameters
"""
self.parameters = self._parse_parameters(parameters, check_validity=False)
def __call__(self):
raise RuntimeError(f"{self.__class__.__name__} cannot be called")
def __repr__(self):
return f"{self.__class__.__name__}({self.parameters})"
[docs]class Result:
"""describes the result of a single model run together with auxillary information
Besides storing the final outcome of the model in
:attr:`~modelrunner.run.results.Result.result`, the class also stores information
about the original model in :attr:`~modelrunner.run.results.Result.model`,
additional information in :attr:`~modelrunner.run.results.Result.info`, and
potentially arbitrary objects that were added during the model run in
:attr:`~modelrunner.run.results.Result.storage`.
.. note::
The result is represented as a hierarchical structure when safed using the
:mod:`~modelrunner.storage`. The actual result is stored in the `result` group,
whereas the model information can be found in `_model` group. Additional
information is stored in the root attribute. Thus, the full :class:`Result` can
be read using :code:`storage[loc]`, where `loc` denotes the result location. If
only the actual result is needed, :code:`storage[loc + "/result"]` can be read.
"""
_format_version = 3
"""int: number indicating the version of the file format"""
model: ModelBase
""":class:`ModelBase`: Model that was run. This is a
:class:`~modelrunner.run.results.MockModel` instance if details are not available"""
result: Any
"""the final outcome of the model"""
storage: StorageGroup | None
""":class:`StorageGroup`: Storage that might contain additional information, e.g.,
stored during the model run"""
info: dict[str, Any] | None
"""dict: Additional information for this result"""
def __init__(
self,
model: ModelBase,
result: Any,
*,
storage: StorageGroup | None = None,
info: dict[str, Any] | None = None,
):
"""
Args:
model (:class:`ModelBase`):
The model from which the result was obtained
result:
The actual result
storage:
A storage containing additional data from the model run
info (dict):
Additional information for this result
"""
if not isinstance(model, ModelBase):
raise TypeError("The model should be of type `ModelBase`")
self.result = result
self.model = model
self.storage = storage
self.info: Attrs = {} if info is None else info
@property
def data(self):
"""direct access to the underlying state data"""
# deprecated on 2024-04-13
warnings.warn("`.data` attribute was renamed to `.result`", DeprecationWarning)
return self.result
[docs] @classmethod
def from_data(
cls,
model_data: dict[str, Any],
result,
*,
model: ModelBase | None = None,
storage: StorageGroup | None = None,
info: dict[str, Any] | None = None,
) -> Result:
"""create result from data
Args:
model_data (dict):
The data identifying the model
result:
The actual result data
model (:class:`ModelBase`):
The model from which the result was obtained
storage:
A storage containing additional data from the model run
info (dict):
Additional information for this result
Returns:
:class:`Result`: The result object
"""
if model is None:
model_cls: type[ModelBase] = MockModel
else:
model_cls = model if inspect.isclass(model) else model.__class__
if not model_data:
warnings.warn("Model data not found")
model = model_cls(model_data.get("parameters", {}))
model.name = model_data.get("name")
model.description = model_data.get("description")
return cls(model, result, storage=storage, info=info)
@property
def parameters(self) -> dict[str, Any]:
return self.model.parameters
[docs] @classmethod
def from_file(
cls,
storage: StorageID,
loc: Location = None,
*,
model: ModelBase | None = None,
):
"""load object from a file
This function loads the results from a hierachical storage. It also attempts to
read information about the model that was used to create this result and
additional data that might have been stored in a
:attr:`~modelrunner.results.Result.storage` while the model was running.
Args:
store (str or :class:`zarr.Store`):
Path or instance describing the storage, which is either a file path or
a :class:`zarr.Storage`.
loc:
The location where the result is stored in the storage. This should
rarely be modified.
model (:class:`~modelrunner.model.ModelBase`):
The model which lead to this result
"""
if isinstance(storage, (str, Path)) and (isinstance(loc, str) or loc is None):
# check whether the file was written with an old format version
from .compatibility.triage import result_check_load_old_version
result = result_check_load_old_version(Path(storage), loc=loc, model=model)
if result is not None:
return result # Result created from old version
# assume that file was written with latest format version
with open_storage(storage, mode="read") as storage_obj:
attrs = storage_obj.read_attrs(loc)
format_version = attrs.pop("format_version", None)
if format_version == cls._format_version:
# current version of storing results
if "storage" in storage_obj:
data_storage = open_storage(storage, loc="storage", mode="read")
else:
data_storage = None
return cls.from_data(
model_data=storage_obj.get("_model", {}),
result=storage_obj.read_item("result", use_class=False),
model=model,
storage=data_storage,
info=attrs.pop("info", {}), # load additional info,
)
else:
raise RuntimeError(f"Cannot read format version {format_version}")
[docs] def to_file(
self, storage: StorageID, loc: Location = None, *, mode: ModeType = "insert"
) -> None:
"""write the results to a file
Note that this does only write the actual `results` but omits additional data
that might have been stored in a storage that is associated with the results.
Args:
storage (:class:`StorageBase` or :class:`StorageGroup`):
The storage where the group is defined. If this is a
:class:`StorageGroup` itself, `loc` is interpreted relative to that
group
loc (str or list of str):
Denotes the location (path) of the group within the storage
mode (str or :class:`~modelrunner.storage.access_modes.ModeType`):
The file mode with which the storage is accessed, which determines the
allowed operations. Common options are "read", "full", "append", and
"truncate".
"""
with open_storage(storage, loc=loc, mode=mode) as storage_obj:
# collect attributes from the result
attrs: Attrs = {
# "model": dict(self.model._state_attributes),
"format_version": self._format_version,
"__class__": encode_class(self.__class__),
}
if self.info:
attrs["info"] = self.info
# write the actual data
storage_obj.write_attrs([], attrs=attrs)
storage_obj.write_object("_model", dict(self.model._state_attributes))
storage_obj.write_object("result", self.result)
storage_actions.register("read_item", Result, Result.from_file)
storage_actions.register(
"write_item", Result, lambda store, loc, result: result.to_file(store, loc)
)
[docs]class ResultCollection(List[Result]):
"""represents a collection of results"""
[docs] @classmethod
def from_folder(
cls,
folder: str | Path,
pattern: str = "*.*",
model: ModelBase | None = None,
*,
strict: bool = False,
progress: bool = False,
):
"""create results collection from a folder
args:
folder (str):
Path to the folder that is scanned
pattern (str):
Filename pattern that is used to detect result files
model (:class:`~modelrunner.model.ModelBase`):
Base class from which models are initialized
strict (bool):
Whether to raise an exception or just emit a warning when a file cannot
be read
progress (bool):
Flag indicating whether a progress bar is shown
"""
logger = logging.getLogger(cls.__name__)
folder = Path(folder)
if not folder.is_dir():
logger.warning(f"{folder} is not a directory")
# iterate over all files and load them as a Result
results = []
for path in tqdm(list(folder.glob(pattern)), disable=not progress):
if path.is_file():
try:
result = Result.from_file(path, model=model)
except Exception as err:
if strict:
err.args = (str(err) + f"\nError reading file `{path}`",)
raise
else:
logger.warning(f"Error reading file `{path}`")
else:
results.append(result)
# raise a warning if no results were detected
if not results:
if pattern == "*.*":
logger.warning("Did not find any files")
else:
logger.warning(
f"Did not find any files. Is pattern `{pattern}` too restrictive?"
)
return cls(results)
def __repr__(self):
return f"{self.__class__.__name__}(<{len(self)} Results>)"
__str__ = __repr__
def __add__(self, other: ResultCollection) -> ResultCollection: # type: ignore
if isinstance(other, ResultCollection):
return ResultCollection(super().__add__(other))
@property
def same_model(self) -> bool:
"""bool: flag determining whether all results are from the same model"""
if len(self) < 2:
return True
model_cls = self[0].model.__class__
keys = self[0].model.parameters.keys()
return all(
res.model.__class__ == model_cls and res.model.parameters.keys() == keys
for res in self
)
@property
def parameters(self) -> dict[str, set[Any]]:
"""dict: the parameter values in this result collection
Note that parameters that are lists in the individual models are turned into
tuples, so they can be handled efficiently, e.g., in sets.
"""
params = collections.defaultdict(set)
for result in self:
for k, v in result.model.parameters.items():
if isinstance(v, list):
v = tuple(v) # work around to make lists hashable
params[k].add(v)
return dict(params)
@property
def constant_parameters(self) -> dict[str, Any]:
"""dict: the parameters that are constant in this result collection"""
return {
k: next(iter(v)) # get the single item from the set
for k, v in self.parameters.items()
if len(v) == 1
}
@property
def varying_parameters(self) -> dict[str, list[Any]]:
"""dict: the parameters that vary in this result collection"""
return {k: sorted(v) for k, v in self.parameters.items() if len(v) > 1}
[docs] def get(self, **kwargs) -> Result:
"""return a single result with the given parameters
Warning:
If there are multiple results compatible with the specified parameters, only
the first one is returned.
Args:
**kwargs: Specify parameter values of result that is returned
Returns:
:class:`Result`: A single result from the collection
"""
# return the first result that matches the requirements
for item in self:
if all(item.parameters[k] == v for k, v in kwargs.items()):
return item
raise ValueError("Result not contained in collection")
[docs] def filtered(self, **kwargs) -> ResultCollection:
r"""return a subset of the results
Args:
**kwargs: Specify parameter values of results that are retained
Returns:
:class:`ResultColelction`: The filtered collection
"""
# return a filtered result collection
return self.__class__(
item
for item in self
if all(item.parameters[k] == v for k, v in kwargs.items())
)
[docs] def groupby(self, *args) -> Iterator[tuple[dict[str, list[Any]], ResultCollection]]:
r"""group results according to the given variables
Args:
*args: Specify parameters according to which the results are sorted
Returns:
generator that allows iterating over the groups. Each iteration returns a
dictionary with the current parameters and the associated
:class:`ResultCollection`.
"""
group_values = [self.parameters[name] for name in args]
for group_value in itertools.product(*group_values):
group_parameters = dict(zip(args, group_value))
subset = self.filtered(**group_parameters)
if len(subset) > 0:
yield group_parameters, subset
[docs] def sorted(self, *args, reverse: bool = False) -> ResultCollection:
r"""return a sorted version of the results
Args:
*args: Specify parameters according to which the results are sorted
reverse (bool): If True, sort in descending order
Returns:
:class:`ResultColelction`: The filtered collection
"""
def sort_func(item):
"""helper function for ordering the results"""
return [item.parameters[name] for name in args]
return self.__class__(sorted(self, key=sort_func, reverse=reverse))
[docs] def remove_duplicates(self) -> ResultCollection:
"""remove duplicates in the result collection"""
# we cannot use a set for `seen`, since parameters might not always be hashable
unique_results, seen = [], []
for result in self:
if result.parameters not in seen:
unique_results.append(result)
seen.append(result.parameters)
return self.__class__(unique_results)
@property
def dataframe(self):
"""create a pandas dataframe summarizing the data"""
# deprecated on 2023-10-19
warnings.warn("Property `dataframe` deprecated; use method `as_dataframe`")
return self.as_dataframe()
[docs] def as_dataframe(self, *, enforce_same_model: bool = True):
"""create a pandas dataframe summarizing the data
Args:
enforce_same_model (bool):
If True, forces all model results to derive from the same model
"""
import pandas as pd
if enforce_same_model and not self.same_model:
raise RuntimeError("Results are not from the same model")
def get_data(result):
"""helper function to extract the data"""
df_data = result.parameters.copy()
# try obtaining the name of the result
if result.info.get("name"):
df_data.setdefault("name", result.info["name"])
elif hasattr(result.model, "name"):
df_data.setdefault("name", result.model.name)
# try interpreting the result data in a format understood by pandas
data = result.result
if np.isscalar(data):
df_data["result"] = data
elif isinstance(data, dict):
for key, value in data.items():
if np.isscalar(value):
df_data[key] = value
elif isinstance(value, (list, tuple, np.ndarray)):
df_data[key] = np.asarray(value)
else:
raise RuntimeError("Do not know how to interpret result")
return df_data
return pd.DataFrame([get_data(result) for result in self])