--30.tf.data.Dataset.ipynb--
https://www.tensorflow.org/api_docs/python/tf/data/Dataset
데이터 입력 pipeline 객체
1. 입력데이터로부터 source dataset 생성
1. dataset 에 대한 transformation 수행 (preprocess!)
1. dataset 의 각 element 에 대해 일련의 작업 수행
참조 : TF 입력파이프라인 빌드 https://www.tensorflow.org/guide/data
import tensorflow as tf
tf.data.Dataset.from_tensor_slices
주어진 tensors 로 Dataset 생성
https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices
@staticmethod
from_tensor_slices(
tensors, name=None
) -> 'DatasetV2'
dataset = tf.data.Dataset.from_tensor_slices([[1,2], [3, 4]])
dataset
list(dataset.as_numpy_iterator())
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6]))
list(dataset.as_numpy_iterator())
dataset = tf.data.Dataset.from_tensor_slices({"a" : [1, 2], "b" : [3, 4]})
list(dataset.as_numpy_iterator())
features = tf.constant([[1, 3], [2, 1], [3, 3]]) # ==> 3x2 tensor
labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor
print(features)
print(labels)
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
list(dataset.as_numpy_iterator())
tf.data.Dataset.zip()
주어진 dataset(들)을 묶어서 Dataset 생성
https://www.tensorflow.org/api_docs/python/tf/data/Dataset#zip
@staticmethod
zip(
*args, datasets=None, name=None
) -> 'DatasetV2'
features_dataset = tf.data.Dataset.from_tensor_slices(features)
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))
list(dataset.as_numpy_iterator())
batched_features = tf.constant([[[1, 3], [2, 3]],
[[2, 1], [1, 2]],
[[3, 3], [3, 2]]], shape=(3, 2, 2))
batched_labels = tf.constant([['A', 'A'],
['B', 'B'],
['A', 'B']], shape=(3, 2, 1))
print(batched_features)
print(batched_labels)
dataset = tf.data.Dataset.from_tensor_slices((batched_features, batched_labels))
list(dataset.as_numpy_iterator())
take(
count, name=None
)
dataset = tf.data.Dataset.range(10)
dataset = dataset.take(3)
list(dataset.as_numpy_iterator())
batch(
batch_size,
drop_remainder=False, # batch_size 만큼 묶이지 않는 데이터를 버릴지 여부
num_parallel_calls=None,
deterministic=None,
name=None
)
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3)
list(dataset.as_numpy_iterator())
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3, drop_remainder=True)
list(dataset.as_numpy_iterator())
skip(
count, name=None
)
dataset = tf.data.Dataset.range(10)
dataset = dataset.skip(7)
list(dataset.as_numpy_iterator())
dataset = tf.data.Dataset.range(10)
list(dataset.skip(4).take(3).as_numpy_iterator())
window(
size, shift=None, stride=1, drop_remainder=False, name=None
)
dataset = tf.data.Dataset.range(7).window(3)
for window in dataset :
print([item.numpy() for item in window])
dataset = tf.data.Dataset.range(7).window(3, shift=1)
for window in dataset :
print([item.numpy() for item in window])
dataset = tf.data.Dataset.range(7).window(3, shift=1, drop_remainder=True)
for window in dataset :
print([item.numpy() for item in window])
dataset = tf.data.Dataset.range(7).window(3, shift=1, stride=2, drop_remainder=True)
for window in dataset :
print([item.numpy() for item in window])
flat_map(
map_func, name=None
)
dataset = tf.data.Dataset.from_tensor_slices(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]])
dataset = dataset.flat_map(tf.data.Dataset.from_tensor_slices)
list(dataset.as_numpy_iterator())
shuffle(
buffer_size,
seed=None,
reshuffle_each_iteration=None, # bool, (디폴트 True)
name=None
)
dataset = tf.data.Dataset.range(10)
dataset = dataset.shuffle(10)
list(dataset.as_numpy_iterator())