Source code for NekUpload.metadata.extractor
from __future__ import annotations
import h5py
import numpy as np
from dataclasses import dataclass,field
from NekUpload.validate.files import NekSessionFile
[docs]
class HDF5Extractor:
"""Class for extracting features from a HDF5 file
"""
def __init__(self):
pass
[docs]
@staticmethod
def extract_attribute(file: h5py.File,group_path: str, attribute: str) -> str:
"""Extract an attribute from a HDF5 file
Args:
file (h5py.File): An opened HDF5 file
group_path (str): Path to the HDF5 Group containing the attribute
attribute (str): Attribute to look for
Raises:
ValueError: _description_
Returns:
str: Value of the attribute stored in the file
"""
try:
group = file[group_path]
if not isinstance(group,h5py.Group):
raise ValueError
return group.attrs[attribute].strip()
except Exception:
return None
[docs]
def extract_min_max_coords(file: h5py.File,dataset_path: str) -> tuple[np.ndarray,np.ndarray]:
"""Read a set of 3d coordinates from the HDF5 file and keep only the minimum and maximum coordinates
Args:
file (h5py.File): HDF5 file containing coordinates
dataset_path (str): Datasets containing the 3D coordinates
Raises:
ValueError: _description_
Returns:
Tuple[np.ndarray,np.ndarray]: Minimum coordinate and Maximum coordinate
"""
dataset: h5py.Dataset = file[dataset_path]
shape = dataset.shape
# Ensure dataset is at least 2D and has 3 columns
if len(shape) < 2 or shape[1] != 3:
raise ValueError(f"Expected a dataset with shape (N,3), but got {shape}")
#initialise
CHUNK_SIZE = 1000 #TODO where to put chunk_size???
min_coord = np.full(3, np.inf)
max_coord = np.full(3, -np.inf)
for chunk_start in range(0,shape[0],CHUNK_SIZE):
chunk_end = min(chunk_start + CHUNK_SIZE, shape[0])
data_chunk = dataset[chunk_start:chunk_end, :]
min_coord_in_chunk: np.ndarray = np.amin(data_chunk,axis=0)
max_coord_in_chunk: np.ndarray = np.amax(data_chunk,axis=0)
#in-place as memory allocations are expensive in Python
np.minimum(min_coord,min_coord_in_chunk,out=min_coord)
np.maximum(max_coord,max_coord_in_chunk,out=max_coord)
return min_coord, max_coord
[docs]
class NekAutoExtractor:
"""Nektar auto extractor of fields in Nektar dataset files
"""
def __init__(self,session_file: str,geometry_file: str,output_file: list[str]):
"""Class initialiser
Args:
session_file (str): Session file path
geometry_file (str): Geometry file path
output_file (List[str]): List of output file paths
"""
self.session_file = session_file
self.geometry_file = geometry_file
self.output_file = output_file
[docs]
def extract_data(self) -> NekAutoExtractData:
"""Extract data from the files
Returns:
Dict[str,str]: Data extracted from the Nektar++ datasets
"""
results = NekAutoExtractData()
with h5py.File(self.output_file) as f:
if version := HDF5Extractor.extract_attribute(f,"NEKTAR/Metadata/Provenance","NektarVersion"):
results.nektar_version = str(version)
if git_hash := HDF5Extractor.extract_attribute(f,"NEKTAR/Metadata/Provenance","GitSHA1"):
results.gitsha = str(git_hash)
with h5py.File(self.geometry_file) as f:
min_coords,max_coords = HDF5Extractor.extract_min_max_coords(f,"NEKTAR/GEOMETRY/MESH/VERT")
#convert from numpy to float
results.max_coord = [float(n) for n in max_coords]
results.min_coord = [float(n) for n in min_coords]
with NekSessionFile(self.session_file) as f:
params = f.get_parameters()
if reynolds := params.get("Re",None):
results.reynolds = float(reynolds)
if kinvis := params.get("Kinvis",None):
results.kinvis = float(kinvis)
return results
[docs]
@dataclass
class NekAutoExtractData:
nektar_version: str=None
gitsha: str=None
max_coord: list[float] = field(default_factory=list)
min_coord: list[float] = field(default_factory=list)
reynolds: float | None=None
kinvis: float | None=None