Source code for dioptra_builtins.data.tensorflow

# This Software (Dioptra) is being made available as a public service by the
# National Institute of Standards and Technology (NIST), an Agency of the United
# States Department of Commerce. This software was developed in part by employees of
# NIST and in part by NIST contractors. Copyright in portions of this software that
# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant
# to Title 17 United States Code Section 105, works of NIST employees are not
# subject to copyright protection in the United States. However, NIST may hold
# international copyright in software created by its employees and domestic
# copyright (or licensing rights) in portions of software that were assigned or
# licensed to NIST. To the extent that NIST holds copyright in this software, it is
# being made available under the Creative Commons Attribution 4.0 International
# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts
# of the software developed or licensed by NIST.
#
# ACCESS THE FULL CC BY 4.0 LICENSE HERE:
# https://creativecommons.org/licenses/by/4.0/legalcode
"""A task plugin module for preparing Tensorflow-specific dataset iterators.

.. |flow_from_directory| replace:: :py:meth:`tensorflow.keras.preprocessing.image\\
   .ImageDataGenerator.flow_from_directory`
.. |directory_iterator| replace:: :py:class:`~tensorflow.keras.preprocessing.image\\
   .DirectoryIterator`
"""

from __future__ import annotations

from typing import Optional, Tuple

import structlog
from structlog.stdlib import BoundLogger

from dioptra import pyplugs
from dioptra.sdk.exceptions import TensorflowDependencyError
from dioptra.sdk.utilities.decorators import require_package

LOGGER: BoundLogger = structlog.stdlib.get_logger()

try:
    from tensorflow.keras.preprocessing.image import (
        DirectoryIterator,
        ImageDataGenerator,
    )

except ImportError:  # pragma: nocover
    LOGGER.warn(
        "Unable to import one or more optional packages, functionality may be reduced",
        package="tensorflow",
    )


[docs]@pyplugs.register @require_package("tensorflow", exc_type=TensorflowDependencyError) def create_image_dataset( data_dir: str, subset: Optional[str], image_size: Tuple[int, int, int], seed: int, rescale: float = 1.0 / 255, validation_split: Optional[float] = 0.2, batch_size: int = 32, label_mode: str = "categorical", ) -> DirectoryIterator: """Yields an iterator for generating batches of real-time augmented image data. Args: data_dir: The directory containing the image dataset. subset: The subset of data (`"training"` or `"validation"`) to use if `validation_split` is not `None`. If `None`, then `validation_split` must also be `None`. image_size: A tuple of integers `(height, width, channels)` used to preprocess the images so that they all have the same dimensions and number of color channels. `channels=3` means RGB color images and `channels=1` means grayscale images. Images with different dimensions will be resized. If `channels=1`, color images will be converted into grayscale. seed: Sets the random seed used for shuffling and transformations. rescale: The rescaling factor for the pixel vectors. If `None` or `0`, no rescaling is applied, otherwise multiply the data by the value provided (after applying all other transformations). The default is `1.0 / 255`. validation_split: The fraction of the data to set aside for validation. If not `None`, the value given here must be between `0` and `1`. If `None`, then there is no validation set. The default is `0.2`. batch_size: The size of the batch on which adversarial samples are generated. The default is `32`. label_mode: Determines how the label arrays for the dataset will be returned. The available choices are: `"categorical"`, `"binary"`, `"sparse"`, `"input"`, `None`. For information on the meaning of each choice, see the documentation for |flow_from_directory|. The default is `"categorical"`. Returns: A :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator` object. See Also: - |flow_from_directory| - :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator` """ color_mode: str = ( "rgb" if image_size[2] == 3 else "rgba" if image_size[2] == 4 else "grayscale" ) target_size: Tuple[int, int] = image_size[:2] data_generator: ImageDataGenerator = ImageDataGenerator( rescale=rescale, validation_split=validation_split, ) return data_generator.flow_from_directory( directory=data_dir, target_size=target_size, color_mode=color_mode, class_mode=label_mode, batch_size=batch_size, seed=seed, subset=subset, )
[docs]@pyplugs.register @require_package("tensorflow", exc_type=TensorflowDependencyError) def get_n_classes_from_directory_iterator(ds: DirectoryIterator) -> int: """Returns the number of unique labels found by the |directory_iterator|. Args: ds: A |directory_iterator| object. Returns: The number of unique labels in the dataset. """ return len(ds.class_indices)