# -*- coding: utf-8 -*- """ Defining Array Schema ===================== Array schema are json documents that act as metadata describing the structure of array-like data. The submodule provides some python bindings for generating the schema in python. See """ # %% # Make a Registry # --------------- # # First make a registry to store a collection of schema you want to # use. We will also import some example functions including in the # rmellipse package, as well as the json package to format the # json documents. import rmellipse.arrschema as arrschema import rmellipse.arrschema.examples as examples import xarray as xr import numpy as np import json REGISTRY = arrschema.ArrayClassRegistry() # %% # We will define a basic schema of an arry with arbitrary shape # made of floats called "float_zeros". When the schema is made with # the arrschema function a uid is automatically added. Then we # add it to the registry. # a schema is just a dictionary that specifies # what the expected structure of a dataset is float_zeros_schema = arrschema.ArraySchema( name='float_zeros', shape=(...,), dims=(...,), dtype=float ) # you can use that dictionary to generate a # python class object within the context of a registry class FloatZeros(arrschema.AnnotatedArray): registry = REGISTRY schema = float_zeros_schema print(FloatZeros) # %% # Validation # ---------- # # Requires that the array conforms to the # AnnotatedArray subclass specification. # cast an array into the type to check it conforms # to the specification, my_data = xr.DataArray(np.zeros((4, 4), dtype=float)) my_data = FloatZeros(my_data) my_data.validate() # this will fail because the dtype isn't correct my_data_fails = xr.DataArray(np.zeros((4, 4), dtype=complex)) try: my_data_fails = FloatZeros(my_data_fails) my_data_fails.validate() except arrschema.ValidationError as e: print('caught error: \n', e) # succesfully validated datasets store the associated schema in the metadata print(my_data.attrs['ARRSCHEMA']) # %% # Loaders and Savers # ------------------ # # Array schema provide a system for organizing and envoking different # encoding and decoding functions for various schema. Often times # we work with a single in memory representation of a particular object, # but may need to be able to read/write to and from multiple different # methods of storing that data on disc. We do this by associating # a loader/saver with a function using a module spec, and related # file extensions. # supply the module pathspec to the function REGISTRY.add_loader( 'rmellipse.arrschema.examples:load_group_saveable', ['.h5', '.hdf5'], loader_type='group_saveable', schema=FloatZeros.schema, ) REGISTRY.add_saver( 'rmellipse.arrschema.examples:save_group_saveable', ['.h5', '.hdf5'], saver_type='group_saveable', schema=FloatZeros.schema, ) # %% # Encoding and decoding functions are expected have function signatures # that look like ``fun(path, data, *args, **kwargs)`` my_data.save('example.h5', 'my_data_name') my_data_read = FloatZeros.load('example.h5', group='my_data_name') print(my_data_read) # %% # Conversion # ---------- # # Converters can be assigned as well. Converters are functions that take in # a single data set with an associated schema, and returns a new data set # with an associated schema. # define a new format we care about int_zeros_schema = arrschema.ArraySchema( name='int_zeros', shape=(...,), dims=(...,), dtype=int ) class IntZeros(arrschema.AnnotatedArray): registry = REGISTRY schema = int_zeros_schema REGISTRY.add_converter( 'rmellipse.arrschema.examples:convert_float_to_int', input_schema=FloatZeros.schema, output_schema=IntZeros.schema, ) converted = my_data.convert_to(IntZeros) print(converted)