AutoencoderAlgorithm

  1#!/usr/bin/python3
  2# -*- coding: utf-8 -*-
  3
  4__author__ = 'unknown'
  5__email__ = 'unknown@unknown.com.br'
  6__version__ = '{1}.{0}.{0}'
  7__initial_data__ = '2022/06/01'
  8__last_update__ = '2023/08/03'
  9__credits__ = ['unknown']
 10
 11try:
 12    import os
 13    import sys
 14    import json
 15    import numpy
 16
 17    import tensorflow
 18
 19    from tensorflow.keras.metrics import Mean
 20    from tensorflow.keras.models import Model
 21
 22    from tensorflow.keras.utils import to_categorical
 23
 24except ImportError as error:
 25    print(error)
 26    sys.exit(-1)
 27
 28
 29class AutoencoderAlgorithm(Model):
 30    """
 31    An abstract class for AutoEncoder models.
 32
 33    This class provides a foundation for AutoEncoder models with methods for training,
 34    generating synthetic data, saving and loading models.
 35
 36    Args:
 37        @encoder_model (Model, optional):
 38            The encoder part of the AutoEncoder.
 39        @decoder_model (Model, optional):
 40            The decoder part of the AutoEncoder.
 41        @loss_function (Loss, optional):
 42            The loss function for training.
 43        @file_name_encoder (str, optional):
 44            The file name for saving the encoder model.
 45        @file_name_decoder (str, optional):
 46            The file name for saving the decoder model.
 47        @models_saved_path (str, optional):
 48            The path to save the models.
 49        @latent_mean_distribution (float, optional):
 50            Mean of the latent space distribution.
 51        @latent_stander_deviation (float, optional):
 52            Standard deviation of the latent space distribution.
 53        @latent_dimension (int, optional):
 54            The dimensionality of the latent space.
 55
 56    Attributes:
 57        @_encoder (Model):
 58            The encoder part of the AutoEncoder.
 59        @_decoder (Model):
 60            The decoder part of the AutoEncoder.
 61        @_loss_function (Loss):
 62            Loss function for training.
 63        @_total_loss_tracker (Mean):
 64            Metric for tracking total loss.
 65        @_file_name_encoder (str):
 66            File name for saving the encoder model.
 67        @_file_name_decoder (str):
 68            File name for saving the decoder model.
 69        @_models_saved_path (str):
 70            Path to save the models.
 71        @_encoder_decoder_model (Model):
 72            Combined encoder-decoder model.
 73
 74    Example:
 75        >>> encoder_model = build_encoder(input_shape=(128, 128, 3), latent_dimension=64)
 76        >>> decoder_model = build_decoder(latent_dimension=64, output_shape=(128, 128, 3))
 77        ...     autoencoder = AutoencoderAlgorithm(
 78        ...     encoder_model=encoder_model,
 79        ...     decoder_model=decoder_model,
 80        ...     loss_function=tensorflow.keras.losses.MeanSquaredError(),
 81        ...     file_name_encoder="encoder_model.h5",
 82        ...     file_name_decoder="decoder_model.h5",
 83        ...     models_saved_path="./autoencoder_models/",
 84        ...     latent_mean_distribution=0.0,
 85        ...     latent_stander_deviation=1.0,
 86        ...     latent_dimension=64
 87        ...     )
 88        ...     autoencoder.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001))
 89        >>> autoencoder.fit(train_dataset, epochs=50)
 90    """
 91
 92    def __init__(self,
 93                 encoder_model,
 94                 decoder_model,
 95                 loss_function,
 96                 file_name_encoder,
 97                 file_name_decoder,
 98                 models_saved_path,
 99                 latent_mean_distribution,
100                 latent_stander_deviation,
101                 latent_dimension):
102
103        super().__init__()
104        """
105        Initializes an AutoEncoder model with an encoder, decoder, and necessary configurations.
106
107        Args:
108            @encoder_model (Model):
109                The encoder part of the AutoEncoder.
110            @decoder_model (Model):
111                The decoder part of the AutoEncoder.
112            @loss_function (Loss):
113                The loss function used for training.
114            @file_name_encoder (str):
115                The filename for saving the trained encoder model.
116            @file_name_decoder (str):
117                The filename for saving the trained decoder model.
118            @models_saved_path (str):
119                The directory path where models should be saved.
120            @latent_mean_distribution (float):
121                The mean of the latent noise distribution.
122            @latent_standard_deviation (float):
123                The standard deviation of the latent noise distribution.
124            @latent_dimension (int):
125                The number of dimensions in the latent space.
126
127        Attributes:
128            @_encoder (Model):
129                The encoder model.
130            @_decoder (Model):
131                The decoder model.
132            @_loss_function (Loss):
133                The loss function used for optimization.
134            @_total_loss_tracker (Mean):
135                Metric for tracking total loss during training.
136            @_latent_mean_distribution (float):
137                The mean of the latent space distribution.
138            @_latent_standard_deviation (float):
139                The standard deviation of the latent space distribution.
140            @_latent_dimension (int):
141                The dimensionality of the latent space.
142            @_file_name_encoder (str):
143                Name of the file where the encoder model is saved.
144            @_file_name_decoder (str): 
145                Name of the file where the decoder model is saved.
146            @_models_saved_path (str):
147                Path where models are saved.
148            @_encoder_decoder_model (Model):
149                A combined model that links the encoder and decoder.
150
151        """
152        if not isinstance(encoder_model, tensorflow.keras.Model):
153            raise TypeError("encoder_model must be a tf.keras.Model instance.")
154
155        if not isinstance(decoder_model, tensorflow.keras.Model):
156            raise TypeError("decoder_model must be a tf.keras.Model instance.")
157
158        if not isinstance(file_name_encoder, str) or not file_name_encoder:
159            raise ValueError("file_name_encoder must be a non-empty string.")
160
161        if not isinstance(file_name_decoder, str) or not file_name_decoder:
162            raise ValueError("file_name_decoder must be a non-empty string.")
163
164        if not isinstance(models_saved_path, str) or not models_saved_path:
165            raise ValueError("models_saved_path must be a non-empty string.")
166
167        if not isinstance(latent_mean_distribution, (int, float)):
168            raise TypeError("latent_mean_distribution must be a number.")
169
170        if not isinstance(latent_stander_deviation, (int, float)):
171            raise TypeError("latent_stander_deviation must be a number.")
172
173        if latent_stander_deviation <= 0:
174            raise ValueError("latent_stander_deviation must be greater than 0.")
175
176        if not isinstance(latent_dimension, int) or latent_dimension <= 0:
177            raise ValueError("latent_dimension must be a positive integer.")
178
179        # Initialize the encoder and decoder models
180        self._encoder = encoder_model
181        self._decoder = decoder_model
182
183        # Loss function and metric for tracking total loss
184        self._loss_function = loss_function
185        self._total_loss_tracker = Mean(name="loss")
186        self._latent_mean_distribution = latent_mean_distribution
187        self._latent_stander_deviation = latent_stander_deviation
188        self._latent_dimension = latent_dimension
189
190        # File names for saving models
191        self._file_name_encoder = file_name_encoder
192        self._file_name_decoder = file_name_decoder
193
194        # Path for saving models
195        self._models_saved_path = models_saved_path
196
197        # Combined encoder-decoder model
198        self._encoder_decoder_model = Model(self._encoder.input, self._decoder(self._encoder.output))
199
200    @tensorflow.function
201    def train_step(self, batch):
202        """
203        Perform a training step for the AutoEncoder.
204
205        Args:
206            batch: Input data batch.
207
208        Returns:
209            dict: Dictionary containing the loss value.
210        """
211        batch_x, batch_y = batch
212
213        # Use tf.function decorator for improved TensorFlow performance
214
215        with tensorflow.GradientTape() as gradient_ae:
216            # Forward pass: Generate reconstructed data using the encoder-decoder model
217            reconstructed_data = self._encoder_decoder_model(batch_x, training=True)
218
219            # Calculate the mean squared error loss between input batch and reconstructed data
220            update_gradient_loss = tensorflow.reduce_mean(tensorflow.square(batch_y - reconstructed_data))
221
222        # Calculate gradients of the loss with respect to trainable variables
223        gradient_update = gradient_ae.gradient(update_gradient_loss, self._encoder_decoder_model.trainable_variables)
224
225        # Apply gradients using the optimizer
226        self.optimizer.apply_gradients(zip(gradient_update, self._encoder_decoder_model.trainable_variables))
227
228        # Update the total loss metric
229        self._total_loss_tracker.update_state(update_gradient_loss)
230
231        # Return a dictionary containing the current loss value
232        return {"loss": self._total_loss_tracker.result()}
233
234    def get_samples(self, number_samples_per_class):
235        """
236        Generates synthetic data samples for each specified class using the trained decoder.
237        This function creates synthetic samples conditioned on class labels, typically used
238        when working with conditional generative models (like conditional VAEs or conditional GANs).
239
240        Args:
241            number_samples_per_class (dict):
242                A dictionary specifying how many synthetic samples should be generated per class.
243                Expected structure:
244                {
245                    "classes": {class_label: number_of_samples, ...},
246                    "number_classes": total_number_of_classes
247                }
248
249        Returns:
250            dict:
251                A dictionary where each key is a class label and the value is an array of generated samples.
252                Each array contains the synthetic samples generated for the corresponding class.
253        """
254
255        # Initialize an empty dictionary to store generated samples grouped by class label
256        generated_data = {}
257
258        # Loop through each class label and the corresponding number of samples to generate
259        for label_class, number_instances in number_samples_per_class["classes"].items():
260            # Create a batch of one-hot encoded class labels, all set to the current class
261            # Example: if label_class = 1 and number_instances = 3, this creates:
262            # [[0, 1], [0, 1], [0, 1]]
263            label_samples_generated = to_categorical(
264                [label_class] * number_instances,
265                num_classes=number_samples_per_class["number_classes"]
266            )
267
268            # Generate random noise vectors (latent space vectors) for each sample
269            # Shape: (number_instances, latent_dimension)
270            latent_noise = numpy.random.normal(
271                self._latent_mean_distribution,  # Mean of the latent distribution
272                self._latent_stander_deviation,  # Standard deviation of the latent distribution
273                (number_instances, self._latent_dimension)
274            )
275
276            # Use the decoder to generate synthetic samples from the latent space and class labels
277            # Inputs: (latent vectors, class labels)
278            # 'verbose=0' suppresses any output from the decoder's predict method
279            generated_samples = self._decoder.predict([latent_noise, label_samples_generated], verbose=0)
280
281            # Round the output values to the nearest integer
282            # This is useful if the output is binary (like 0/1) or for discrete data types
283            generated_samples = numpy.rint(generated_samples)
284
285            # Store the generated samples in the dictionary under the corresponding class label
286            generated_data[label_class] = generated_samples
287
288        # Return the dictionary containing all generated samples, organized by class
289        return generated_data
290
291
292    def save_model(self, directory, file_name):
293        """
294        Save the encoder and decoder models in both JSON and H5 formats.
295
296        Args:
297            directory (str): Directory where models will be saved.
298            file_name (str): Base file name for saving models.
299        """
300        if not os.path.exists(directory):
301            os.makedirs(directory)
302
303        # Construct file names for encoder and decoder models
304        encoder_file_name = os.path.join(directory, f"fold_{file_name}_encoder")
305        decoder_file_name = os.path.join(directory, f"fold_{file_name}_decoder")
306
307        # Save encoder model
308        self._save_model_to_json(self._encoder, f"{encoder_file_name}.json")
309        self._encoder.save_weights(f"{encoder_file_name}.weights.h5")
310
311        # Save decoder model
312        self._save_model_to_json(self._decoder, f"{decoder_file_name}.json")
313        self._decoder.save_weights(f"{decoder_file_name}.weights.h5")
314
315
316    @staticmethod
317    def _save_model_to_json(model, file_path):
318        """
319        Save model architecture to a JSON file.
320
321        Args:
322            model (Model): Model to save.
323            file_path (str): Path to the JSON file.
324        """
325        with open(file_path, "w") as json_file:
326            json.dump(model.to_json(), json_file)
327
328
329    def load_models(self, directory, file_name):
330        """
331        Load the encoder and decoder models from a directory.
332
333        Args:
334            directory (str): Directory where models are stored.
335            file_name (str): Base file name for loading models.
336        """
337
338        # Construct file names for encoder and decoder models
339        encoder_file_name = "{}_encoder".format(file_name)
340        decoder_file_name = "{}_decoder".format(file_name)
341
342        # Load the encoder and decoder models from the specified directory
343        self._encoder = self._save_neural_network_model(encoder_file_name, directory)
344        self._decoder = self._save_neural_network_model(decoder_file_name, directory)
345
346    @property
347    def decoder(self):
348        return self._decoder
349
350    @property
351    def encoder(self):
352        return self._encoder
353
354    @decoder.setter
355    def decoder(self, decoder):
356        self._decoder = decoder
357
358    @encoder.setter
359    def encoder(self, encoder):
360        self._encoder = encoder
class AutoencoderAlgorithm(keras.src.models.model.Model):
 30class AutoencoderAlgorithm(Model):
 31    """
 32    An abstract class for AutoEncoder models.
 33
 34    This class provides a foundation for AutoEncoder models with methods for training,
 35    generating synthetic data, saving and loading models.
 36
 37    Args:
 38        @encoder_model (Model, optional):
 39            The encoder part of the AutoEncoder.
 40        @decoder_model (Model, optional):
 41            The decoder part of the AutoEncoder.
 42        @loss_function (Loss, optional):
 43            The loss function for training.
 44        @file_name_encoder (str, optional):
 45            The file name for saving the encoder model.
 46        @file_name_decoder (str, optional):
 47            The file name for saving the decoder model.
 48        @models_saved_path (str, optional):
 49            The path to save the models.
 50        @latent_mean_distribution (float, optional):
 51            Mean of the latent space distribution.
 52        @latent_stander_deviation (float, optional):
 53            Standard deviation of the latent space distribution.
 54        @latent_dimension (int, optional):
 55            The dimensionality of the latent space.
 56
 57    Attributes:
 58        @_encoder (Model):
 59            The encoder part of the AutoEncoder.
 60        @_decoder (Model):
 61            The decoder part of the AutoEncoder.
 62        @_loss_function (Loss):
 63            Loss function for training.
 64        @_total_loss_tracker (Mean):
 65            Metric for tracking total loss.
 66        @_file_name_encoder (str):
 67            File name for saving the encoder model.
 68        @_file_name_decoder (str):
 69            File name for saving the decoder model.
 70        @_models_saved_path (str):
 71            Path to save the models.
 72        @_encoder_decoder_model (Model):
 73            Combined encoder-decoder model.
 74
 75    Example:
 76        >>> encoder_model = build_encoder(input_shape=(128, 128, 3), latent_dimension=64)
 77        >>> decoder_model = build_decoder(latent_dimension=64, output_shape=(128, 128, 3))
 78        ...     autoencoder = AutoencoderAlgorithm(
 79        ...     encoder_model=encoder_model,
 80        ...     decoder_model=decoder_model,
 81        ...     loss_function=tensorflow.keras.losses.MeanSquaredError(),
 82        ...     file_name_encoder="encoder_model.h5",
 83        ...     file_name_decoder="decoder_model.h5",
 84        ...     models_saved_path="./autoencoder_models/",
 85        ...     latent_mean_distribution=0.0,
 86        ...     latent_stander_deviation=1.0,
 87        ...     latent_dimension=64
 88        ...     )
 89        ...     autoencoder.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001))
 90        >>> autoencoder.fit(train_dataset, epochs=50)
 91    """
 92
 93    def __init__(self,
 94                 encoder_model,
 95                 decoder_model,
 96                 loss_function,
 97                 file_name_encoder,
 98                 file_name_decoder,
 99                 models_saved_path,
100                 latent_mean_distribution,
101                 latent_stander_deviation,
102                 latent_dimension):
103
104        super().__init__()
105        """
106        Initializes an AutoEncoder model with an encoder, decoder, and necessary configurations.
107
108        Args:
109            @encoder_model (Model):
110                The encoder part of the AutoEncoder.
111            @decoder_model (Model):
112                The decoder part of the AutoEncoder.
113            @loss_function (Loss):
114                The loss function used for training.
115            @file_name_encoder (str):
116                The filename for saving the trained encoder model.
117            @file_name_decoder (str):
118                The filename for saving the trained decoder model.
119            @models_saved_path (str):
120                The directory path where models should be saved.
121            @latent_mean_distribution (float):
122                The mean of the latent noise distribution.
123            @latent_standard_deviation (float):
124                The standard deviation of the latent noise distribution.
125            @latent_dimension (int):
126                The number of dimensions in the latent space.
127
128        Attributes:
129            @_encoder (Model):
130                The encoder model.
131            @_decoder (Model):
132                The decoder model.
133            @_loss_function (Loss):
134                The loss function used for optimization.
135            @_total_loss_tracker (Mean):
136                Metric for tracking total loss during training.
137            @_latent_mean_distribution (float):
138                The mean of the latent space distribution.
139            @_latent_standard_deviation (float):
140                The standard deviation of the latent space distribution.
141            @_latent_dimension (int):
142                The dimensionality of the latent space.
143            @_file_name_encoder (str):
144                Name of the file where the encoder model is saved.
145            @_file_name_decoder (str): 
146                Name of the file where the decoder model is saved.
147            @_models_saved_path (str):
148                Path where models are saved.
149            @_encoder_decoder_model (Model):
150                A combined model that links the encoder and decoder.
151
152        """
153        if not isinstance(encoder_model, tensorflow.keras.Model):
154            raise TypeError("encoder_model must be a tf.keras.Model instance.")
155
156        if not isinstance(decoder_model, tensorflow.keras.Model):
157            raise TypeError("decoder_model must be a tf.keras.Model instance.")
158
159        if not isinstance(file_name_encoder, str) or not file_name_encoder:
160            raise ValueError("file_name_encoder must be a non-empty string.")
161
162        if not isinstance(file_name_decoder, str) or not file_name_decoder:
163            raise ValueError("file_name_decoder must be a non-empty string.")
164
165        if not isinstance(models_saved_path, str) or not models_saved_path:
166            raise ValueError("models_saved_path must be a non-empty string.")
167
168        if not isinstance(latent_mean_distribution, (int, float)):
169            raise TypeError("latent_mean_distribution must be a number.")
170
171        if not isinstance(latent_stander_deviation, (int, float)):
172            raise TypeError("latent_stander_deviation must be a number.")
173
174        if latent_stander_deviation <= 0:
175            raise ValueError("latent_stander_deviation must be greater than 0.")
176
177        if not isinstance(latent_dimension, int) or latent_dimension <= 0:
178            raise ValueError("latent_dimension must be a positive integer.")
179
180        # Initialize the encoder and decoder models
181        self._encoder = encoder_model
182        self._decoder = decoder_model
183
184        # Loss function and metric for tracking total loss
185        self._loss_function = loss_function
186        self._total_loss_tracker = Mean(name="loss")
187        self._latent_mean_distribution = latent_mean_distribution
188        self._latent_stander_deviation = latent_stander_deviation
189        self._latent_dimension = latent_dimension
190
191        # File names for saving models
192        self._file_name_encoder = file_name_encoder
193        self._file_name_decoder = file_name_decoder
194
195        # Path for saving models
196        self._models_saved_path = models_saved_path
197
198        # Combined encoder-decoder model
199        self._encoder_decoder_model = Model(self._encoder.input, self._decoder(self._encoder.output))
200
201    @tensorflow.function
202    def train_step(self, batch):
203        """
204        Perform a training step for the AutoEncoder.
205
206        Args:
207            batch: Input data batch.
208
209        Returns:
210            dict: Dictionary containing the loss value.
211        """
212        batch_x, batch_y = batch
213
214        # Use tf.function decorator for improved TensorFlow performance
215
216        with tensorflow.GradientTape() as gradient_ae:
217            # Forward pass: Generate reconstructed data using the encoder-decoder model
218            reconstructed_data = self._encoder_decoder_model(batch_x, training=True)
219
220            # Calculate the mean squared error loss between input batch and reconstructed data
221            update_gradient_loss = tensorflow.reduce_mean(tensorflow.square(batch_y - reconstructed_data))
222
223        # Calculate gradients of the loss with respect to trainable variables
224        gradient_update = gradient_ae.gradient(update_gradient_loss, self._encoder_decoder_model.trainable_variables)
225
226        # Apply gradients using the optimizer
227        self.optimizer.apply_gradients(zip(gradient_update, self._encoder_decoder_model.trainable_variables))
228
229        # Update the total loss metric
230        self._total_loss_tracker.update_state(update_gradient_loss)
231
232        # Return a dictionary containing the current loss value
233        return {"loss": self._total_loss_tracker.result()}
234
235    def get_samples(self, number_samples_per_class):
236        """
237        Generates synthetic data samples for each specified class using the trained decoder.
238        This function creates synthetic samples conditioned on class labels, typically used
239        when working with conditional generative models (like conditional VAEs or conditional GANs).
240
241        Args:
242            number_samples_per_class (dict):
243                A dictionary specifying how many synthetic samples should be generated per class.
244                Expected structure:
245                {
246                    "classes": {class_label: number_of_samples, ...},
247                    "number_classes": total_number_of_classes
248                }
249
250        Returns:
251            dict:
252                A dictionary where each key is a class label and the value is an array of generated samples.
253                Each array contains the synthetic samples generated for the corresponding class.
254        """
255
256        # Initialize an empty dictionary to store generated samples grouped by class label
257        generated_data = {}
258
259        # Loop through each class label and the corresponding number of samples to generate
260        for label_class, number_instances in number_samples_per_class["classes"].items():
261            # Create a batch of one-hot encoded class labels, all set to the current class
262            # Example: if label_class = 1 and number_instances = 3, this creates:
263            # [[0, 1], [0, 1], [0, 1]]
264            label_samples_generated = to_categorical(
265                [label_class] * number_instances,
266                num_classes=number_samples_per_class["number_classes"]
267            )
268
269            # Generate random noise vectors (latent space vectors) for each sample
270            # Shape: (number_instances, latent_dimension)
271            latent_noise = numpy.random.normal(
272                self._latent_mean_distribution,  # Mean of the latent distribution
273                self._latent_stander_deviation,  # Standard deviation of the latent distribution
274                (number_instances, self._latent_dimension)
275            )
276
277            # Use the decoder to generate synthetic samples from the latent space and class labels
278            # Inputs: (latent vectors, class labels)
279            # 'verbose=0' suppresses any output from the decoder's predict method
280            generated_samples = self._decoder.predict([latent_noise, label_samples_generated], verbose=0)
281
282            # Round the output values to the nearest integer
283            # This is useful if the output is binary (like 0/1) or for discrete data types
284            generated_samples = numpy.rint(generated_samples)
285
286            # Store the generated samples in the dictionary under the corresponding class label
287            generated_data[label_class] = generated_samples
288
289        # Return the dictionary containing all generated samples, organized by class
290        return generated_data
291
292
293    def save_model(self, directory, file_name):
294        """
295        Save the encoder and decoder models in both JSON and H5 formats.
296
297        Args:
298            directory (str): Directory where models will be saved.
299            file_name (str): Base file name for saving models.
300        """
301        if not os.path.exists(directory):
302            os.makedirs(directory)
303
304        # Construct file names for encoder and decoder models
305        encoder_file_name = os.path.join(directory, f"fold_{file_name}_encoder")
306        decoder_file_name = os.path.join(directory, f"fold_{file_name}_decoder")
307
308        # Save encoder model
309        self._save_model_to_json(self._encoder, f"{encoder_file_name}.json")
310        self._encoder.save_weights(f"{encoder_file_name}.weights.h5")
311
312        # Save decoder model
313        self._save_model_to_json(self._decoder, f"{decoder_file_name}.json")
314        self._decoder.save_weights(f"{decoder_file_name}.weights.h5")
315
316
317    @staticmethod
318    def _save_model_to_json(model, file_path):
319        """
320        Save model architecture to a JSON file.
321
322        Args:
323            model (Model): Model to save.
324            file_path (str): Path to the JSON file.
325        """
326        with open(file_path, "w") as json_file:
327            json.dump(model.to_json(), json_file)
328
329
330    def load_models(self, directory, file_name):
331        """
332        Load the encoder and decoder models from a directory.
333
334        Args:
335            directory (str): Directory where models are stored.
336            file_name (str): Base file name for loading models.
337        """
338
339        # Construct file names for encoder and decoder models
340        encoder_file_name = "{}_encoder".format(file_name)
341        decoder_file_name = "{}_decoder".format(file_name)
342
343        # Load the encoder and decoder models from the specified directory
344        self._encoder = self._save_neural_network_model(encoder_file_name, directory)
345        self._decoder = self._save_neural_network_model(decoder_file_name, directory)
346
347    @property
348    def decoder(self):
349        return self._decoder
350
351    @property
352    def encoder(self):
353        return self._encoder
354
355    @decoder.setter
356    def decoder(self, decoder):
357        self._decoder = decoder
358
359    @encoder.setter
360    def encoder(self, encoder):
361        self._encoder = encoder

An abstract class for AutoEncoder models.

This class provides a foundation for AutoEncoder models with methods for training, generating synthetic data, saving and loading models.

Args: @encoder_model (Model, optional): The encoder part of the AutoEncoder. @decoder_model (Model, optional): The decoder part of the AutoEncoder. @loss_function (Loss, optional): The loss function for training. @file_name_encoder (str, optional): The file name for saving the encoder model. @file_name_decoder (str, optional): The file name for saving the decoder model. @models_saved_path (str, optional): The path to save the models. @latent_mean_distribution (float, optional): Mean of the latent space distribution. @latent_stander_deviation (float, optional): Standard deviation of the latent space distribution. @latent_dimension (int, optional): The dimensionality of the latent space.

Attributes: @_encoder (Model): The encoder part of the AutoEncoder. @_decoder (Model): The decoder part of the AutoEncoder. @_loss_function (Loss): Loss function for training. @_total_loss_tracker (Mean): Metric for tracking total loss. @_file_name_encoder (str): File name for saving the encoder model. @_file_name_decoder (str): File name for saving the decoder model. @_models_saved_path (str): Path to save the models. @_encoder_decoder_model (Model): Combined encoder-decoder model.

Example:

encoder_model = build_encoder(input_shape=(128, 128, 3), latent_dimension=64) decoder_model = build_decoder(latent_dimension=64, output_shape=(128, 128, 3)) ... autoencoder = AutoencoderAlgorithm( ... encoder_model=encoder_model, ... decoder_model=decoder_model, ... loss_function=tensorflow.keras.losses.MeanSquaredError(), ... file_name_encoder="encoder_model.h5", ... file_name_decoder="decoder_model.h5", ... models_saved_path="./autoencoder_models/", ... latent_mean_distribution=0.0, ... latent_stander_deviation=1.0, ... latent_dimension=64 ... ) ... autoencoder.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001)) autoencoder.fit(train_dataset, epochs=50)

AutoencoderAlgorithm( encoder_model, decoder_model, loss_function, file_name_encoder, file_name_decoder, models_saved_path, latent_mean_distribution, latent_stander_deviation, latent_dimension)
 93    def __init__(self,
 94                 encoder_model,
 95                 decoder_model,
 96                 loss_function,
 97                 file_name_encoder,
 98                 file_name_decoder,
 99                 models_saved_path,
100                 latent_mean_distribution,
101                 latent_stander_deviation,
102                 latent_dimension):
103
104        super().__init__()
105        """
106        Initializes an AutoEncoder model with an encoder, decoder, and necessary configurations.
107
108        Args:
109            @encoder_model (Model):
110                The encoder part of the AutoEncoder.
111            @decoder_model (Model):
112                The decoder part of the AutoEncoder.
113            @loss_function (Loss):
114                The loss function used for training.
115            @file_name_encoder (str):
116                The filename for saving the trained encoder model.
117            @file_name_decoder (str):
118                The filename for saving the trained decoder model.
119            @models_saved_path (str):
120                The directory path where models should be saved.
121            @latent_mean_distribution (float):
122                The mean of the latent noise distribution.
123            @latent_standard_deviation (float):
124                The standard deviation of the latent noise distribution.
125            @latent_dimension (int):
126                The number of dimensions in the latent space.
127
128        Attributes:
129            @_encoder (Model):
130                The encoder model.
131            @_decoder (Model):
132                The decoder model.
133            @_loss_function (Loss):
134                The loss function used for optimization.
135            @_total_loss_tracker (Mean):
136                Metric for tracking total loss during training.
137            @_latent_mean_distribution (float):
138                The mean of the latent space distribution.
139            @_latent_standard_deviation (float):
140                The standard deviation of the latent space distribution.
141            @_latent_dimension (int):
142                The dimensionality of the latent space.
143            @_file_name_encoder (str):
144                Name of the file where the encoder model is saved.
145            @_file_name_decoder (str): 
146                Name of the file where the decoder model is saved.
147            @_models_saved_path (str):
148                Path where models are saved.
149            @_encoder_decoder_model (Model):
150                A combined model that links the encoder and decoder.
151
152        """
153        if not isinstance(encoder_model, tensorflow.keras.Model):
154            raise TypeError("encoder_model must be a tf.keras.Model instance.")
155
156        if not isinstance(decoder_model, tensorflow.keras.Model):
157            raise TypeError("decoder_model must be a tf.keras.Model instance.")
158
159        if not isinstance(file_name_encoder, str) or not file_name_encoder:
160            raise ValueError("file_name_encoder must be a non-empty string.")
161
162        if not isinstance(file_name_decoder, str) or not file_name_decoder:
163            raise ValueError("file_name_decoder must be a non-empty string.")
164
165        if not isinstance(models_saved_path, str) or not models_saved_path:
166            raise ValueError("models_saved_path must be a non-empty string.")
167
168        if not isinstance(latent_mean_distribution, (int, float)):
169            raise TypeError("latent_mean_distribution must be a number.")
170
171        if not isinstance(latent_stander_deviation, (int, float)):
172            raise TypeError("latent_stander_deviation must be a number.")
173
174        if latent_stander_deviation <= 0:
175            raise ValueError("latent_stander_deviation must be greater than 0.")
176
177        if not isinstance(latent_dimension, int) or latent_dimension <= 0:
178            raise ValueError("latent_dimension must be a positive integer.")
179
180        # Initialize the encoder and decoder models
181        self._encoder = encoder_model
182        self._decoder = decoder_model
183
184        # Loss function and metric for tracking total loss
185        self._loss_function = loss_function
186        self._total_loss_tracker = Mean(name="loss")
187        self._latent_mean_distribution = latent_mean_distribution
188        self._latent_stander_deviation = latent_stander_deviation
189        self._latent_dimension = latent_dimension
190
191        # File names for saving models
192        self._file_name_encoder = file_name_encoder
193        self._file_name_decoder = file_name_decoder
194
195        # Path for saving models
196        self._models_saved_path = models_saved_path
197
198        # Combined encoder-decoder model
199        self._encoder_decoder_model = Model(self._encoder.input, self._decoder(self._encoder.output))
@tensorflow.function
def train_step(self, batch):
201    @tensorflow.function
202    def train_step(self, batch):
203        """
204        Perform a training step for the AutoEncoder.
205
206        Args:
207            batch: Input data batch.
208
209        Returns:
210            dict: Dictionary containing the loss value.
211        """
212        batch_x, batch_y = batch
213
214        # Use tf.function decorator for improved TensorFlow performance
215
216        with tensorflow.GradientTape() as gradient_ae:
217            # Forward pass: Generate reconstructed data using the encoder-decoder model
218            reconstructed_data = self._encoder_decoder_model(batch_x, training=True)
219
220            # Calculate the mean squared error loss between input batch and reconstructed data
221            update_gradient_loss = tensorflow.reduce_mean(tensorflow.square(batch_y - reconstructed_data))
222
223        # Calculate gradients of the loss with respect to trainable variables
224        gradient_update = gradient_ae.gradient(update_gradient_loss, self._encoder_decoder_model.trainable_variables)
225
226        # Apply gradients using the optimizer
227        self.optimizer.apply_gradients(zip(gradient_update, self._encoder_decoder_model.trainable_variables))
228
229        # Update the total loss metric
230        self._total_loss_tracker.update_state(update_gradient_loss)
231
232        # Return a dictionary containing the current loss value
233        return {"loss": self._total_loss_tracker.result()}

Perform a training step for the AutoEncoder.

Args: batch: Input data batch.

Returns: dict: Dictionary containing the loss value.

def get_samples(self, number_samples_per_class):
235    def get_samples(self, number_samples_per_class):
236        """
237        Generates synthetic data samples for each specified class using the trained decoder.
238        This function creates synthetic samples conditioned on class labels, typically used
239        when working with conditional generative models (like conditional VAEs or conditional GANs).
240
241        Args:
242            number_samples_per_class (dict):
243                A dictionary specifying how many synthetic samples should be generated per class.
244                Expected structure:
245                {
246                    "classes": {class_label: number_of_samples, ...},
247                    "number_classes": total_number_of_classes
248                }
249
250        Returns:
251            dict:
252                A dictionary where each key is a class label and the value is an array of generated samples.
253                Each array contains the synthetic samples generated for the corresponding class.
254        """
255
256        # Initialize an empty dictionary to store generated samples grouped by class label
257        generated_data = {}
258
259        # Loop through each class label and the corresponding number of samples to generate
260        for label_class, number_instances in number_samples_per_class["classes"].items():
261            # Create a batch of one-hot encoded class labels, all set to the current class
262            # Example: if label_class = 1 and number_instances = 3, this creates:
263            # [[0, 1], [0, 1], [0, 1]]
264            label_samples_generated = to_categorical(
265                [label_class] * number_instances,
266                num_classes=number_samples_per_class["number_classes"]
267            )
268
269            # Generate random noise vectors (latent space vectors) for each sample
270            # Shape: (number_instances, latent_dimension)
271            latent_noise = numpy.random.normal(
272                self._latent_mean_distribution,  # Mean of the latent distribution
273                self._latent_stander_deviation,  # Standard deviation of the latent distribution
274                (number_instances, self._latent_dimension)
275            )
276
277            # Use the decoder to generate synthetic samples from the latent space and class labels
278            # Inputs: (latent vectors, class labels)
279            # 'verbose=0' suppresses any output from the decoder's predict method
280            generated_samples = self._decoder.predict([latent_noise, label_samples_generated], verbose=0)
281
282            # Round the output values to the nearest integer
283            # This is useful if the output is binary (like 0/1) or for discrete data types
284            generated_samples = numpy.rint(generated_samples)
285
286            # Store the generated samples in the dictionary under the corresponding class label
287            generated_data[label_class] = generated_samples
288
289        # Return the dictionary containing all generated samples, organized by class
290        return generated_data

Generates synthetic data samples for each specified class using the trained decoder. This function creates synthetic samples conditioned on class labels, typically used when working with conditional generative models (like conditional VAEs or conditional GANs).

Args: number_samples_per_class (dict): A dictionary specifying how many synthetic samples should be generated per class. Expected structure: { "classes": {class_label: number_of_samples, ...}, "number_classes": total_number_of_classes }

Returns: dict: A dictionary where each key is a class label and the value is an array of generated samples. Each array contains the synthetic samples generated for the corresponding class.

def save_model(self, directory, file_name):
293    def save_model(self, directory, file_name):
294        """
295        Save the encoder and decoder models in both JSON and H5 formats.
296
297        Args:
298            directory (str): Directory where models will be saved.
299            file_name (str): Base file name for saving models.
300        """
301        if not os.path.exists(directory):
302            os.makedirs(directory)
303
304        # Construct file names for encoder and decoder models
305        encoder_file_name = os.path.join(directory, f"fold_{file_name}_encoder")
306        decoder_file_name = os.path.join(directory, f"fold_{file_name}_decoder")
307
308        # Save encoder model
309        self._save_model_to_json(self._encoder, f"{encoder_file_name}.json")
310        self._encoder.save_weights(f"{encoder_file_name}.weights.h5")
311
312        # Save decoder model
313        self._save_model_to_json(self._decoder, f"{decoder_file_name}.json")
314        self._decoder.save_weights(f"{decoder_file_name}.weights.h5")

Save the encoder and decoder models in both JSON and H5 formats.

Args: directory (str): Directory where models will be saved. file_name (str): Base file name for saving models.

def load_models(self, directory, file_name):
330    def load_models(self, directory, file_name):
331        """
332        Load the encoder and decoder models from a directory.
333
334        Args:
335            directory (str): Directory where models are stored.
336            file_name (str): Base file name for loading models.
337        """
338
339        # Construct file names for encoder and decoder models
340        encoder_file_name = "{}_encoder".format(file_name)
341        decoder_file_name = "{}_decoder".format(file_name)
342
343        # Load the encoder and decoder models from the specified directory
344        self._encoder = self._save_neural_network_model(encoder_file_name, directory)
345        self._decoder = self._save_neural_network_model(decoder_file_name, directory)

Load the encoder and decoder models from a directory.

Args: directory (str): Directory where models are stored. file_name (str): Base file name for loading models.

decoder
347    @property
348    def decoder(self):
349        return self._decoder
encoder
351    @property
352    def encoder(self):
353        return self._encoder