Source code for modelrunner.run.compatibility.triage

"""
Contains code necessary for deciding which format version was used to write a file

.. codeauthor:: David Zwicker <david.zwicker@ds.mpg.de>
"""

from __future__ import annotations

import json
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any, Mapping, Union

from ...model import ModelBase
from ..results import Result

if TYPE_CHECKING:
    from zarr.storage import BaseStore


Store = Union[str, Path, "BaseStore"]


[docs]def guess_format(path: Path) -> str: """guess the format of a given store Args: path (str or :class:`~pathlib.Path`): Path pointing to a file Returns: str: The store format """ # guess format from path extension ext = Path(path).suffix.lower() if ext == ".json": return "json" elif ext in {".yml", ".yaml"}: return "yaml" elif ext in {".h5", ".hdf", ".hdf5"}: return "hdf" else: return "zarr" # fallback to the default storage method based on zarr
[docs]def normalize_zarr_store(store: Store, mode: str = "a") -> Store | None: """determine best file format for zarr storage In particular, we use a :class:`~zarr.storage.ZipStore` when a path looking like a file is given. Args: store: User-provided store mode (str): The mode with which the file will be opened Returns: """ import zipfile import zarr if isinstance(store, (str, Path)): store = Path(store) if store.is_file(): try: store = zarr.storage.ZipStore(store, mode=mode) except zipfile.BadZipfile: return None else: return None return store
def _find_version(data: Mapping[str, Any], label: str) -> int | None: """try finding version information in different places in `data` Args: data (dict): A mapping that contains attribute information label (str): The label of the data that should be loaded Returns: int: The format version or None if it could not be found """ def read_version(item) -> str | None: """try reading attribute from a particular item""" if hasattr(item, "attrs"): return read_version(item.attrs) elif "__version__" in item: return item["__version__"] # type: ignore elif "format_version" in item: return item["format_version"] # type: ignore elif "__attrs__" in item: return read_version(item["__attrs__"]) elif "attributes" in item: return read_version(item["attributes"]) else: return None format_version = read_version(data) if format_version is None and label in data: format_version = read_version(data[label]) if format_version is None and "state" in data: format_version = read_version(data["state"]) if format_version is None and "result" in data: format_version = read_version(data["result"]) if isinstance(format_version, str): return json.loads(format_version) # type: ignore else: return format_version def _get_format_version(path: Path, label: str) -> int | None: """determine format version of the file in `path` Args: path (str or :class:`~pathlib.Path`): The path to the resource to be loaded label (str): Label of the item to be loaded """ format_version = None # check for compatibility fmt = guess_format(path) if fmt == "json": with open(path) as fp: format_version = _find_version(json.load(fp), label) elif fmt == "yaml": import yaml with open(path) as fp: format_version = _find_version(yaml.safe_load(fp), label) elif fmt == "hdf": import h5py with h5py.File(path, mode="r") as root: format_version = _find_version(root, label) elif fmt == "zarr": import zarr store = normalize_zarr_store(path, mode="r") if store is None: raise RuntimeError with zarr.open_group(store, mode="r") as root: format_version = _find_version(root, label) if format_version is None and label != "data": format_version = _find_version(root, "data") if format_version is not None: label = "data" else: raise RuntimeError return format_version
[docs]def result_check_load_old_version( path: Path, loc: str | None, *, model: ModelBase | None = None ) -> Result | None: """check whether the resource can be loaded with an older version of the package Args: path (str or :class:`~pathlib.Path`): The path to the resource to be loaded loc (str): Label, key, or location of the item to be loaded model (:class:`~modelrunner.model.ModelBase`, optional): Optional model that was used to write this result Returns: :class:`~modelrunner.result.Result`: The loaded result or `None` if we cannot load it with the old versions """ label = "data" if loc is None else loc try: format_version = _get_format_version(path, label) except RuntimeError: return None # could not determine format version if format_version in {0, None}: # load result written with format version 0 from .version0 import result_from_file_v0 logger = logging.getLogger("modelrunner.compatiblity") logger.info("Load data with format version 0") return result_from_file_v0(path, model=model) elif format_version == 1: # load result written with format version 1 from .version1 import result_from_file_v1 logger = logging.getLogger("modelrunner.compatiblity") logger.info("Load data with format version 1") return result_from_file_v1(path, label=label, model=model) elif format_version == 2: # load result written with format version 1 from .version2 import result_from_file_v2 logger = logging.getLogger("modelrunner.compatiblity") logger.info("Load data with format version 2") return result_from_file_v2(path, label=label, model=model) elif not isinstance(format_version, int): raise RuntimeError(f"Unsupported format version {format_version}") return None