import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
import tensorflow as tf
import subprocess
cpu_name = subprocess.check_output(f'lscpu | grep "Model name:"', shell=True).decode('utf-8').split(':')[1].strip()
cpu_core = subprocess.check_output(f'lscpu | grep "CPU(s):"', shell=True).decode('utf-8').split('\n')[0].split(':')[1].strip()
cpu_threads_per_core = subprocess.check_output(f'lscpu | grep "Thread(s) per core:"', shell=True).decode('utf-8').split(':')[1].strip()
print(f"CPU: {cpu_name} {cpu_core}C/{int(cpu_core)*int(cpu_threads_per_core)}T")
mem_size =subprocess.check_output(f'cat /proc/meminfo | grep "MemTotal:"', shell=True).decode('utf-8').split(':')[1].split()[0]
print(f"RAM: {int(mem_size)/1024/1024:.2f} GB")
for device in tf.config.list_physical_devices('GPU'):
id = device.name.split(':')[-1]
infos = subprocess.check_output(f'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader -i {id}', shell=True).decode('utf-8').split(',')
print(f'GPU: {infos[0]}, {int(infos[1].split()[0]) / 1024:.1f} GB')
CPU: Intel(R) Xeon(R) CPU @ 2.20GHz 2C/4T
RAM: 12.72 GB
GPU: Tesla T4, 14.7 GB
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for physical_device in physical_devices:
tf.config.experimental.set_memory_growth(physical_device, True)
아래 내용을 모델 생성하기 전에 삽입
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
tf.keras.mixed_precision.experimental.set_policy(policy)
아래 내용을 다 만든 모델 뒤에 삽입.
qat_model
로 fit
수행.
import tensorflow_model_optimization as tfmot
quantize_model = tfmot.quantization.keras.quantize_model
qat_model = quantize_model(model)
0 = silent,
1 = progress bar,
2 = one line per epoch.
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"