1 분 소요

데이터 증강법

  1. keras 전처리 레이어 사용

  2. ImageDataGenerator 사용

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.keras import layers

데이터셋 준비

(train_ds, val_ds, test_ds), metadata = tfds.load(
    'tf_flowers',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True,
)
num_classes = metadata.features['label'].num_classes
num_classes
5
get_label_name = metadata.features['label'].int2str

image, label = next(iter(train_ds))
plt.imshow(image)
plt.title(get_label_name(label))
plt.show()

keras 전처리 레이어

  • Resize: 사이즈 조절

  • Rescaling: 표준화

  • RandomFlip: 가로, 세로 반전

  • RandomRotation: 회전

  • RandomCrop: 이미지 잘라내기

IMG_SIZE = 180

resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMG_SIZE, IMG_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255)
])
result = resize_and_rescale(image)
plt.imshow(result)
plt.show()

data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])
sample = tf.expand_dims(image, 0)
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = data_augmentation(sample)
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image[0])
  plt.axis("off")

data_crop = tf.keras.Sequential([
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.experimental.preprocessing.RandomCrop(height=30, width=30)
])
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = data_crop(sample)
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image[0])

데이터 전처리

batch_size = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE

def prepare(ds, shuffle=False, augment=False):
  # Resize and rescale all datasets
  ds = ds.map(lambda x, y: (resize_and_rescale(x), y), 
              num_parallel_calls=AUTOTUNE)

  if shuffle:
    ds = ds.shuffle(1000)

  # Batch all datasets
  ds = ds.batch(batch_size)

  # Use data augmentation only on the training set
  if augment:
    ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), 
                num_parallel_calls=AUTOTUNE)

  # Use buffered prefecting on all datasets
  return ds.prefetch(buffer_size=AUTOTUNE)
train = prepare(train_ds, shuffle=True, augment=True)
val = prepare(val_ds)
test = prepare(test_ds)

모델 생성

model = tf.keras.Sequential([
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
# history = model.fit(
#   train,
#   validation_data=val,
#   epochs=5
# )
# loss, acc = model.evaluate(test)
# loss, acc

ImageDataGenerator

ImageDataGenerator는

  1. 학습 도중 이미지에 임의 변형 및 정규화를 적용해 주고

  2. 변형된 이미지를 배치 단위로 불러올 수 있는 Generator을 생성해 준다.


  • rotation_range

  • width_shift_range

  • height_shift_range

  • brightness_range

  • zoom_range

  • width_shift_range

  • horizontal_flip

  • vertical_flip

  • preprocessing_function

  • rescale

from tensorflow.keras.preprocessing.image import ImageDataGenerator

#datagenerator 생성
datagenerator = ImageDataGenerator(
    rescale = 1./255,
	  rotation_range = 20, 
    width_shift_range = 0.2, 
    height_shift_range = 0.2, 
    horizontal_flip = True
)
sample = tf.expand_dims(image, 0)
sample.shape
TensorShape([1, 333, 500, 3])
image_result = datagenerator.flow(sample, batch_size=1)
next(image_result).shape
(1, 333, 500, 3)
plt.subplot(1, 2, 1)
plt.title('original')
plt.imshow(np.squeeze(sample))
plt.subplot(1, 2, 2)
plt.title('Transforms Image')
plt.imshow(np.squeeze(next(image_result)))
plt.show()

# 모델 훈련

# model.fit_generator(
#     train_generator, 
#     steps_per_epoch = len(train_generator), 
#     epochs = num_epochs,
#     validation_data =validation_generator, 
#     validation_steps = len(validation_generator))#steps_per_epoch : 한 epoch에 몇 개의 묶음의 이미지(batch)가 epoch에 돌아갈지
# )

댓글남기기