Source code for NekUpload.validate.geometry

import os
import h5py
from types import MappingProxyType

from .hdf5_definitions import HDF5GroupDefinition,HDF5DatasetDefinition
from .exceptions import GeometryFileException,HDF5SchemaExtraDefinitionException,HDF5SchemaExistenceException
from .exceptions import HDF5SchemaMissingDatasetException,HDF5SchemaInconsistentException,HDF5SchemaMissingDefinitionException
from NekUpload.utils import parsing

[docs] class ValidateGeometry: """Class responsible for all geometry validation checks """ def __init__(self, file_path: str): """Class initialiser Args: file_path (str): Path to file """ self.file = file_path self.file_name = os.path.basename(self.file)
[docs] def check_schema(self) -> bool: """Checks whether file conforms to HDF5 geometry schema Raises: GeometryFileException: _description_ Returns: bool: Passed """ try: with h5py.File(self.file, 'r') as f: self.schema_checker = GeometrySchemaHDF5Validator(f) self.schema_checker.validate() except OSError as e: raise GeometryFileException(self.file,f"Geometry file either does not exist or is not in HDF5 format {e}") return True
[docs] class GeometrySchemaHDF5Validator: """Schema validator for HDF5 geometry .nekg files. Checks whether all valid groups and datasets are there. Raises: HDF5SchemaExtraDefinitionException: _description_ HDF5SchemaExtraDefinitionException: _description_ HDF5SchemaMissingDatasetException: _description_ HDF5SchemaInconsistentException: _description_ HDF5SchemaMissingDefinitionException: _description_ HDF5SchemaMissingDefinitionException: _description_ HDF5SchemaMissingDefinitionException: _description_ HDF5SchemaMissingDefinitionException: _description_ """ NO_DIM_CONSTRAINTS = -1 #helper #using immutable dictionary to define what structure of each group and dataset should look like regardless of geometry file #dict to help associate each set with a useful descriptor, which will be beneficial later on BASE_GROUPS: MappingProxyType[str,HDF5GroupDefinition] = MappingProxyType({"NEKTAR": HDF5GroupDefinition("NEKTAR"), "GEOMETRY": HDF5GroupDefinition("NEKTAR/GEOMETRY",attributes=["FORMAT_VERSION"]), "MAPS": HDF5GroupDefinition("NEKTAR/GEOMETRY/MAPS"), "MESH": HDF5GroupDefinition("NEKTAR/GEOMETRY/MESH")}) DATASETS_MANDATORY_MAPS: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"VERT": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/VERT",(NO_DIM_CONSTRAINTS,)), "DOMAIN": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/DOMAIN",(NO_DIM_CONSTRAINTS,)), "COMPOSITE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/COMPOSITE",(NO_DIM_CONSTRAINTS,)) }) DATASETS_MANDATORY_MESH: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"CURVE_NODES": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/CURVE_NODES",(NO_DIM_CONSTRAINTS,3)), "VERT": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/VERT",(NO_DIM_CONSTRAINTS,3)), "DOMAIN": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/DOMAIN",(NO_DIM_CONSTRAINTS,)), "COMPOSITE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/COMPOSITE",(NO_DIM_CONSTRAINTS,)) }) DATASETS_1D_MAPS: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"SEG": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/SEG",(NO_DIM_CONSTRAINTS,)), "CURVE_EDGE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/CURVE_EDGE",(NO_DIM_CONSTRAINTS,)) }) DATASETS_1D_MESH: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"SEG": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/SEG",(NO_DIM_CONSTRAINTS,2)), "CURVE_EDGE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/CURVE_EDGE",(NO_DIM_CONSTRAINTS,3)) }) DATASETS_2D_MAPS: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"TRI": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/TRI",(NO_DIM_CONSTRAINTS,)), "QUAD": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/QUAD",(NO_DIM_CONSTRAINTS,)), "CURVE_FACE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/CURVE_FACE",(NO_DIM_CONSTRAINTS,)) }) DATASETS_2D_MESH: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"TRI": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/TRI",(NO_DIM_CONSTRAINTS,3)), "QUAD": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/QUAD",(NO_DIM_CONSTRAINTS,4)), "CURVE_FACE": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/CURVE_FACE",(NO_DIM_CONSTRAINTS,3)) }) DATASETS_3D_MAPS: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"HEX": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/HEX",(NO_DIM_CONSTRAINTS,)), "TET": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/TET",(NO_DIM_CONSTRAINTS,)), "PYR": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/PYR",(NO_DIM_CONSTRAINTS,)), "PRISM": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MAPS/PRISM",(NO_DIM_CONSTRAINTS,)) }) DATASETS_3D_MESH: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType( {"HEX": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/HEX",(NO_DIM_CONSTRAINTS,6)), "TET": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/TET",(NO_DIM_CONSTRAINTS,4)), "PYR": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/PYR",(NO_DIM_CONSTRAINTS,5)), "PRISM": HDF5DatasetDefinition("NEKTAR/GEOMETRY/MESH/PRISM",(NO_DIM_CONSTRAINTS,5)) }) DATASETS_MAPS: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType({**DATASETS_MANDATORY_MAPS,**DATASETS_1D_MAPS, **DATASETS_2D_MAPS,**DATASETS_3D_MAPS}) DATASETS_MESH: MappingProxyType[str,HDF5DatasetDefinition] = MappingProxyType({**DATASETS_MANDATORY_MESH,**DATASETS_1D_MESH, **DATASETS_2D_MESH,**DATASETS_3D_MESH}) def __init__(self,f: h5py.File): """Class initialiser Args: f (h5py.File): Opened HDF5 file """ self.file: h5py.File = f self.datasets_present: set[str] = set() self.element_number: dict[str] = {}
[docs] def validate(self) -> bool: """Check whether the provided file conforms to the geometry HDF5 schema Returns: bool: Valid """ #check mandatory groups for group in GeometrySchemaHDF5Validator.BASE_GROUPS.values(): group.validate(self.file) #check all datasets self.datasets_present.update(self._check_mandatory_dataset(GeometrySchemaHDF5Validator.DATASETS_MANDATORY_MAPS)) self.datasets_present.update(self._check_mandatory_dataset(GeometrySchemaHDF5Validator.DATASETS_MANDATORY_MESH)) self.datasets_present.update(self._check_mandatory_dataset(GeometrySchemaHDF5Validator.DATASETS_1D_MAPS)) self.datasets_present.update(self._check_mandatory_dataset(GeometrySchemaHDF5Validator.DATASETS_1D_MESH)) self.datasets_present.update(self._check_optional_dataset(GeometrySchemaHDF5Validator.DATASETS_2D_MAPS)) self.datasets_present.update(self._check_optional_dataset(GeometrySchemaHDF5Validator.DATASETS_2D_MESH)) self.datasets_present.update(self._check_optional_dataset(GeometrySchemaHDF5Validator.DATASETS_3D_MAPS)) self.datasets_present.update(self._check_optional_dataset(GeometrySchemaHDF5Validator.DATASETS_3D_MESH)) self._check_consistent_maps_mesh_definition(self.datasets_present,GeometrySchemaHDF5Validator.DATASETS_MAPS,GeometrySchemaHDF5Validator.DATASETS_MESH) self.element_number = self._get_number_of_elements(self.datasets_present,GeometrySchemaHDF5Validator.DATASETS_MESH) self._check_element_construction(self.element_number) #finally check no extra unexpected payload in file valid_groups_keys: list[str] = [group.get_path() for group in GeometrySchemaHDF5Validator.BASE_GROUPS.values()] self._check_only_valid_groups_exist(valid_groups_keys) valid_dataset_keys: list[str] = [dataset.get_path() for dataset in GeometrySchemaHDF5Validator.DATASETS_MESH.values()] + \ [dataset.get_path() for dataset in GeometrySchemaHDF5Validator.DATASETS_MAPS.values()] self._check_only_valid_datasets_exist(valid_dataset_keys) return True
def _check_only_valid_groups_exist(self,valid_groups: list[str]): """Check that only valid HDF5 groups exist. Args: valid_groups (str): list of valid groups """ #plus one to search for any extra invalid groups #"" is a valid group too, and is provided in function call valid_groups.append("") max_groups = len(valid_groups) + 1 groups = parsing.get_hdf5_groups_with_depth_limit(self.file,3,max_groups=max_groups) for group in groups: if group not in valid_groups: raise HDF5SchemaExtraDefinitionException(self.file,f"Encountered unkown group: {group}") def _check_only_valid_datasets_exist(self,valid_datasets: list[str]): """Check that only valid HDF5 datasets exist. Args: valid_datasets (str): list of valid datasets """ max_datasets = len(valid_datasets) + 1 datasets = parsing.get_hdf5_datasets_with_depth_limit(self.file,3,max_datasets=max_datasets) for dataset in datasets: if dataset not in valid_datasets: raise HDF5SchemaExtraDefinitionException(self.file,f"Encountered unkown dataset: {dataset}") def _check_mandatory_dataset(self,mandatory_datasets: MappingProxyType[str,HDF5DatasetDefinition]) -> set[str]: """Helper function. Checks mandatory datasets and if all valid, return the keys of the present datasets Args: mandatory_datasets (MappingProxyType[str,HDF5DatasetDefinition]): Dictionary of datasets that should be present Returns: set[str]: set of keys denoting which datasets are present """ datasets_present_key: set[str] = set() for key,dataset in mandatory_datasets.items(): if dataset.validate(self.file): datasets_present_key.add(key) return datasets_present_key def _check_optional_dataset(self,optional_dataset: MappingProxyType[str,HDF5DatasetDefinition]) -> set[str]: """Helper function. Checks optional datasets and valid datasets will have their keys added to present datasets, which is returned. Args: optional_dataset (MappingProxyType[str,HDF5DatasetDefinition]): Dataset definitions that are optional Returns: set[str]: set of keys denoting which datasets are present Raises: HDF5SchemaException: _description_ """ datasets_present_key: set[str] = set() for key,dataset in optional_dataset.items(): try: dataset.validate(self.file) datasets_present_key.add(key) except HDF5SchemaExistenceException: pass #optional, so allow if doesn't exist, but any other definition error should be re-raised except Exception: raise return datasets_present_key def _check_consistent_maps_mesh_definition(self, present_datasets_keys: set[str], dataset_maps: dict[str,HDF5DatasetDefinition], dataset_mesh: dict[str,HDF5DatasetDefinition]) -> None: """Check that for all present dataset keys, there is a consistent definition between the MAPS and MESH Args: present_datasets_keys (set[str]): list of keys denoting datasets that are present dataset_maps (set[str,HDF5DatasetDefinition]): Definitions of MAPS datasets dataset_mesh (dict[str,HDF5DatasetDefinition]): Definitions of MESH datasets """ #now check that each pair exists and have consistent shapes #maps can't be defined without corresponding mesh and vice versa for key in present_datasets_keys: #curve nodes only exception to above rule if key != "CURVE_NODES": self._check_pair_of_datasets(dataset_maps.get(key),dataset_mesh.get(key)) def _check_pair_of_datasets(self, dataset_map: HDF5DatasetDefinition, dataset_mesh: HDF5DatasetDefinition) -> None: """Helper funcion for checking whether a map and mesh dataset have consistent definitions Args: dataset_1 (HDF5DatasetDefinition): First HDF5 dataset dataset_2 (HDF5DatasetDefinition): Second HDF5 dataset Raises: HDF5SchemaException: _description_ """ data_map = self.file.get(dataset_map.get_path()) data_mesh = self.file.get(dataset_mesh.get_path()) if (data_map is not None and data_mesh is None) or (data_mesh is not None and data_map is None): raise HDF5SchemaMissingDatasetException(self.file, f"HDF5 Schema Error: {dataset_map} and {dataset_mesh} should be defined together, \ but one exists and other doesn't") if data_map is not None and data_mesh is not None: if isinstance(data_map, h5py.Dataset) and isinstance(data_mesh, h5py.Dataset): shape_map = data_map.shape shape_mesh = data_mesh.shape if shape_map[0] != shape_mesh[0]: raise HDF5SchemaInconsistentException(self.file, f"HDF5 Schema Error: {dataset_map} has shape {shape_map} and {dataset_mesh} \ has shape {shape_mesh}. Inconsistent lengths {shape_map[0]} != {shape_mesh[0]}") def _get_number_of_elements(self, present_datasets_keys: set[str], dataset_mesh: dict[str,HDF5DatasetDefinition]) -> dict[str,int]: """For all datasets present in the geometry file, generate a dictionary mapping dataset keys to number of elements defined. Assumes consistency between maps and meshes, so meshes will be used as it contains CURVE_NODES Args: present_datasets_keys (set[str]): set to track number of datasets that are present dataset_mesh (dict[str,HDF5DatasetDefinition]): list of valid dataset MESH definitions Returns: dict[str,int]: Number of elements in the geometry dataset for each element type """ number_elements: dict[str,int] = {} for dataset_key in present_datasets_keys: dataset_definition: HDF5DatasetDefinition = dataset_mesh[dataset_key] data = self.file.get(dataset_definition.get_path()) shape = data.shape elmt_num = shape[0] number_elements[dataset_key] = elmt_num return number_elements def _check_element_construction(self,num_elements: dict[str,int]): """Make sure element construction is consistent Args: num_elements (dict[str,int]): Mapping of HDF5 dataset to number of elements defined in that dataset Raises: HDF5SchemaMissingDefinitionException: _description_ HDF5SchemaMissingDefinitionException: _description_ HDF5DatasetDefinition: _description_ """ #3D elements can only be defined if corresponding 2D elements are present quads = num_elements.get("QUAD",0) tris = num_elements.get("TRI",0) if num_elements.get("HEX",None) and quads < 6: raise HDF5SchemaMissingDefinitionException(self.file,f"HDF5 Schema Error: HEX requires quads. There are only {quads} QUADS defined") if num_elements.get("TET",None) and tris < 4: raise HDF5SchemaMissingDefinitionException(self.file,f"HDF5 Schema Error: TET requires tris. There are only {tris} TRIS defined") if num_elements.get("PYR",None) and (tris < 4 or quads < 1): raise HDF5SchemaMissingDefinitionException(self.file,f"HDF5 Schema Error: PYR requires quads and tris. There are only {tris} TRIS and {quads} QUADS defined") if num_elements.get("PRISM",None) and (tris < 2 or quads < 4): raise HDF5SchemaMissingDefinitionException(self.file,f"HDF5 Schema Error: PRISM requires quads and tris. There are only {tris} TRIS and {quads} QUADS defined")