Source code for dioptra_builtins.data.tensorflow
# This Software (Dioptra) is being made available as a public service by the
# National Institute of Standards and Technology (NIST), an Agency of the United
# States Department of Commerce. This software was developed in part by employees of
# NIST and in part by NIST contractors. Copyright in portions of this software that
# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant
# to Title 17 United States Code Section 105, works of NIST employees are not
# subject to copyright protection in the United States. However, NIST may hold
# international copyright in software created by its employees and domestic
# copyright (or licensing rights) in portions of software that were assigned or
# licensed to NIST. To the extent that NIST holds copyright in this software, it is
# being made available under the Creative Commons Attribution 4.0 International
# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts
# of the software developed or licensed by NIST.
#
# ACCESS THE FULL CC BY 4.0 LICENSE HERE:
# https://creativecommons.org/licenses/by/4.0/legalcode
"""A task plugin module for preparing Tensorflow-specific dataset iterators.
.. |flow_from_directory| replace:: :py:meth:`tensorflow.keras.preprocessing.image\\
.ImageDataGenerator.flow_from_directory`
.. |directory_iterator| replace:: :py:class:`~tensorflow.keras.preprocessing.image\\
.DirectoryIterator`
"""
from __future__ import annotations
from typing import Optional, Tuple
import structlog
from structlog.stdlib import BoundLogger
from dioptra import pyplugs
from dioptra.sdk.exceptions import TensorflowDependencyError
from dioptra.sdk.utilities.decorators import require_package
LOGGER: BoundLogger = structlog.stdlib.get_logger()
try:
from tensorflow.keras.preprocessing.image import (
DirectoryIterator,
ImageDataGenerator,
)
except ImportError: # pragma: nocover
LOGGER.warn(
"Unable to import one or more optional packages, functionality may be reduced",
package="tensorflow",
)
[docs]@pyplugs.register
@require_package("tensorflow", exc_type=TensorflowDependencyError)
def create_image_dataset(
data_dir: str,
subset: Optional[str],
image_size: Tuple[int, int, int],
seed: int,
rescale: float = 1.0 / 255,
validation_split: Optional[float] = 0.2,
batch_size: int = 32,
label_mode: str = "categorical",
) -> DirectoryIterator:
"""Yields an iterator for generating batches of real-time augmented image data.
Args:
data_dir: The directory containing the image dataset.
subset: The subset of data (`"training"` or `"validation"`) to use if
`validation_split` is not `None`. If `None`, then `validation_split` must
also be `None`.
image_size: A tuple of integers `(height, width, channels)` used to preprocess
the images so that they all have the same dimensions and number of color
channels. `channels=3` means RGB color images and `channels=1` means
grayscale images. Images with different dimensions will be resized. If
`channels=1`, color images will be converted into grayscale.
seed: Sets the random seed used for shuffling and transformations.
rescale: The rescaling factor for the pixel vectors. If `None` or `0`, no
rescaling is applied, otherwise multiply the data by the value provided
(after applying all other transformations). The default is `1.0 / 255`.
validation_split: The fraction of the data to set aside for validation. If not
`None`, the value given here must be between `0` and `1`. If `None`, then
there is no validation set. The default is `0.2`.
batch_size: The size of the batch on which adversarial samples are generated.
The default is `32`.
label_mode: Determines how the label arrays for the dataset will be returned.
The available choices are: `"categorical"`, `"binary"`, `"sparse"`,
`"input"`, `None`. For information on the meaning of each choice, see
the documentation for |flow_from_directory|. The default is `"categorical"`.
Returns:
A :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator` object.
See Also:
- |flow_from_directory|
- :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator`
"""
color_mode: str = (
"rgb" if image_size[2] == 3 else "rgba" if image_size[2] == 4 else "grayscale"
)
target_size: Tuple[int, int] = image_size[:2]
data_generator: ImageDataGenerator = ImageDataGenerator(
rescale=rescale,
validation_split=validation_split,
)
return data_generator.flow_from_directory(
directory=data_dir,
target_size=target_size,
color_mode=color_mode,
class_mode=label_mode,
batch_size=batch_size,
seed=seed,
subset=subset,
)
[docs]@pyplugs.register
@require_package("tensorflow", exc_type=TensorflowDependencyError)
def get_n_classes_from_directory_iterator(ds: DirectoryIterator) -> int:
"""Returns the number of unique labels found by the |directory_iterator|.
Args:
ds: A |directory_iterator| object.
Returns:
The number of unique labels in the dataset.
"""
return len(ds.class_indices)