이므로,
각각 real value에 quantize 식을 대입하면,
## lINEAR LAYER without Bias
torch.manual_seed(0)
x = torch.rand(1, 2)
m = nn.Linear(2, 4, bias=False)
w = m.weight
y = m(x) # torch.matmul(x, w.t())
print(y)
x_scale = x.abs().max() / 128
x_offset = 0
w_scale = w.abs().max() / 128
w_offset = 0
y_scale = y.abs().max() / 128
y_offset = 0
print(x_scale, x_offset)
print(w_scale, w_offset)
print(y_scale, y_offset)
qx = x / x_scale - x_offset
qx = torch.round(qx)
qw = w / w_scale - w_offset
qw = torch.round(qw)
SwSx_Sy = w_scale * x_scale / y_scale
qwqx = torch.matmul(qx, qw.t())
Zxqw = torch.matmul(torch.ones_like(x) * x_offset, qw.t())
Zy = y_offset
qy = SwSx_Sy * (qwqx + Zxqw) - Zy
print(qy)
dqy = (qy + y_offset) * y_scale
print(dqy)
"""output
tensor(0.0060) 0
tensor(0.0045, grad_fn=<DivBackward0>) 0
tensor(0.0054, grad_fn=<DivBackward0>) 0
tensor([[-0.6886, 0.0105, 0.4238, 0.1126]], grad_fn=<MmBackward0>)
tensor([[-0.6886, 0.0105, 0.4238, 0.1126]], grad_fn=<MmBackward0>)
tensor([[-127.9099, 2.0087, 78.5994, 20.7265]], grad_fn=<SubBackward0>)
tensor([[-0.6881, 0.0108, 0.4228, 0.1115]], grad_fn=<MulBackward0>)
"""
Linear Layer 를 Aimet-torch 의 Qaunt Sim 을 이용하여 구해보자.
from aimet_torch.v2.quantsim import QuantizationSimModel
sim_linear = QuantizationSimModel(model = nn.Sequential(m),
dummy_input = x,
quant_scheme = QuantScheme.post_training_tf_enhanced,
default_output_bw = 8,
default_param_bw = 8)
sim_linear.model[0].input_quantizers[0].symmetric =True
sim_linear.model[0].param_quantizers["weight"].symmetric =True
sim_linear.model[0].output_quantizers[0].symmetric =True
sim_linear.model[0].input_quantizers[0].signed =True
sim_linear.model[0].param_quantizers["weight"].signed =True
sim_linear.model[0].output_quantizers[0].signed =True
def foo(model, data):
_ = model(data)
sim_linear.compute_encodings(forward_pass_callback=foo,
forward_pass_callback_args=x)
print(sim_linear.model[0].input_quantizers[0].get_scale())
print(sim_linear.model[0].input_quantizers[0].get_offset())
print(sim_linear.model[0].param_quantizers["weight"].get_scale())
print(sim_linear.model[0].param_quantizers["weight"].get_offset())
print(sim_linear.model[0].output_quantizers[0].get_scale())
print(sim_linear.model[0].output_quantizers[0].get_offset())
print(sim_linear.model(x))
"""output
tensor([0.0061])
tensor([0.])
tensor([0.0046])
tensor([0.])
tensor([0.0055])
tensor([0.])
DequantizedTensor([[-0.6872, 0.0109, 0.4254, 0.1145]],
grad_fn=<AliasBackward0>)
"""
from aimet_torch.v2.quantsim import QuantizationSimModel
sim_linear = QuantizationSimModel(model = nn.Sequential(m),
dummy_input = x,
quant_scheme = QuantScheme.post_training_tf_enhanced,
default_output_bw = 8,
default_param_bw = 8)
sim_linear.model[0].input_quantizers[0].symmetric =True
sim_linear.model[0].input_quantizers[0].signed =True
sim_linear.model[0].param_quantizers["weight"].symmetric =True
sim_linear.model[0].param_quantizers["weight"].signed =True
sim_linear.model[0].output_quantizers[0].symmetric =True
sim_linear.model[0].output_quantizers[0].signed =True
x_min = -x.abs().max()
x_max = x.abs().max()
sim_linear.model[0].input_quantizers[0].min = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].input_quantizers[0].min) * x_min)
sim_linear.model[0].input_quantizers[0].max = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].input_quantizers[0].max) * x_max)
w_min = -w.abs().max()
w_max = w.abs().max()
sim_linear.model[0].param_quantizers["weight"].min = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].param_quantizers["weight"].min) * w_min)
sim_linear.model[0].param_quantizers["weight"].max = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].param_quantizers["weight"].max) * w_max)
y_min = -y.abs().max()
y_max = y.abs().max()
sim_linear.model[0].output_quantizers[0].min = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].output_quantizers[0].min) * y_min)
sim_linear.model[0].output_quantizers[0].max = torch.nn.Parameter(torch.ones_like(sim_linear.model[0].output_quantizers[0].max) * y_max)
def foo(model, data):
pass
sim_linear.compute_encodings(forward_pass_callback=foo,
forward_pass_callback_args=x)
print(sim_linear.model[0].input_quantizers[0].get_scale())
print(sim_linear.model[0].input_quantizers[0].get_offset())
print(sim_linear.model[0].param_quantizers["weight"].get_scale())
print(sim_linear.model[0].param_quantizers["weight"].get_offset())
print(sim_linear.model[0].output_quantizers[0].get_scale())
print(sim_linear.model[0].output_quantizers[0].get_offset())
print(sim_linear.model(x))
"""output
tensor([0.0060], grad_fn=<DivBackward0>)
tensor([0.])
tensor([0.0046], grad_fn=<DivBackward0>)
tensor([0.])
tensor([0.0054], grad_fn=<DivBackward0>)
tensor([0.])
DequantizedTensor([[-0.6859, 0.0108, 0.4213, 0.1134]],
grad_fn=<AliasBackward0>)
"""
## symmetric, signed 조건이 다르면 어떻게 되지 ?
from aimet_torch.v2.quantsim import QuantizationSimModel
sim_linear = QuantizationSimModel(model = nn.Sequential(m),
dummy_input = x,
quant_scheme = QuantScheme.post_training_tf_enhanced,
default_output_bw = 8,
default_param_bw = 8)
# print(sim_linear.model[0].input_quantizers[0].symmetric) # False
# print(sim_linear.model[0].param_quantizers["weight"].symmetric) # True
# print(sim_linear.model[0].output_quantizers[0].symmetric) # False
# print(sim_linear.model[0].input_quantizers[0].signed) # False
# print(sim_linear.model[0].param_quantizers["weight"].signed) # True
# print(sim_linear.model[0].output_quantizers[0].signed) # False
def foo(model, data):
_ = model(data)
sim_linear.compute_encodings(forward_pass_callback=foo,
forward_pass_callback_args=x)
print(sim_linear.model[0].input_quantizers[0].get_scale(), sim_linear.model[0].input_quantizers[0].get_offset())
print(sim_linear.model[0].param_quantizers["weight"].get_scale(), sim_linear.model[0].param_quantizers["weight"].get_offset())
print(sim_linear.model[0].output_quantizers[0].get_scale(), sim_linear.model[0].output_quantizers[0].get_offset())
print(sim_linear.model(x))
"""output
tensor([0.0030]) tensor([-0.])
tensor([0.0046]) tensor([0.])
tensor([0.0044]) tensor([-158.])
DequantizedTensor([[-0.6901, 0.0087, 0.4236, 0.1136]],
grad_fn=<AliasBackward0>)
"""
위 동작을 직접 구현을 해보면,
## lINEAR LAYER without Bias
## symmetric, signed 조건이 다르면 어떻게 되지 ?
torch.manual_seed(0)
x = torch.rand(1, 2)
m = nn.Linear(2, 4, bias=False)
w = m.weight
y = m(x) # torch.matmul(x, w.t())
print(y)
x_min = min(x.min(), 0)
x_scale = (x.max() - x_min) / 255
x_offset = -0 + x_min / x_scale
w_scale = w.abs().max() / 128
w_offset = 0
y_min = min(y.min(), 0)
y_scale = (y.max() - y_min) / 255
y_offset = -0 + y_min / y_scale
qx = x / x_scale - x_offset
qx = torch.round(qx)
qw = w / w_scale - w_offset
qw = torch.round(qw)
SwSx_Sy = w_scale * x_scale / y_scale
qwqx = torch.matmul(qx, qw.t())
Zxqw = torch.matmul(torch.ones_like(x) * x_offset, qw.t())
Zy = y_offset
qy = SwSx_Sy * (qwqx + Zxqw) - Zy
print(qy)
dqy = (qy + y_offset) * y_scale
print(dqy)
print(x_scale, x_offset)
print(w_scale, w_offset)
print(y_scale, y_offset)
"""output
tensor([[-0.6886, 0.0105, 0.4238, 0.1126]], grad_fn=<MmBackward0>)
tensor([[2.5246e-01, 1.6040e+02, 2.5479e+02, 1.8343e+02]],
grad_fn=<SubBackward0>)
tensor([[-0.6875, 0.0111, 0.4228, 0.1116]], grad_fn=<MulBackward0>)
tensor(0.0030) tensor(0.)
tensor(0.0045, grad_fn=<DivBackward0>) 0
tensor(0.0044, grad_fn=<DivBackward0>) tensor(-157.8554, grad_fn=<AddBackward0>)
"""
bias를 고려한 linear layer의 동작을 알아보자.
## lINEAR LAYER with Bias
torch.manual_seed(0)
x = torch.rand(1, 2)
m = nn.Linear(2, 4, bias=True)
w = m.weight
b = m.bias
y = m(x) # torch.matmul(x, w.t()) + b
print(y)
qmin = 0
qmax = 255
x_min = min(x.min(), 0)
x_scale = (x.max() - x_min) / 255
x_offset = torch.round(-0 + x_min / x_scale)
w_scale = w.abs().max() / 128
w_offset = 0
y_min = min(y.min(), 0)
y_scale = (y.max() - y_min) / 255
y_offset = torch.round(-0 + y_min / y_scale)
b_scale = x_scale * w_scale
b_offset = 0
qx = x / x_scale - x_offset
qx = torch.round(qx)
qw = w / w_scale - w_offset
qw = torch.round(qw)
qb = b / b_scale - b_offset
qb = torch.round(qb)
SwSx_Sy = w_scale * x_scale / y_scale
qwqx = torch.matmul(qx, qw.t())
Zxqw = torch.matmul(torch.ones_like(x) * x_offset, qw.t())
Zy = y_offset
qy = SwSx_Sy * (qwqx + qb + Zxqw) - Zy
qy = torch.round(qy)
print(qy)
dqy = (qy + y_offset) * y_scale
print(dqy)
print(x_scale, x_offset)
print(w_scale, w_offset)
print(b_scale, b_offset)
print(y_scale, y_offset)
"""output
tensor([[-0.9023, -0.1285, -0.2518, -0.3557]], grad_fn=<AddmmBackward0>)
tensor([[ 0., 255., 214., 179.]], grad_fn=<RoundBackward0>)
tensor([[-0.9013, -0.1274, -0.2519, -0.3581]], grad_fn=<MulBackward0>)
tensor(0.0030) tensor(0.)
tensor(0.0045, grad_fn=<DivBackward0>) 0
tensor(1.3698e-05, grad_fn=<MulBackward0>) 0
tensor(0.0030, grad_fn=<DivBackward0>) tensor(-297., grad_fn=<RoundBackward0>)
"""
## lINEAR LAYER with Bias
from aimet_torch.v2.quantsim import QuantizationSimModel
sim_linear = QuantizationSimModel(model = nn.Sequential(m),
dummy_input = x,
quant_scheme = QuantScheme.post_training_tf_enhanced,
default_output_bw = 8,
default_param_bw = 8)
sim_linear.model[0].param_quantizers["bias"] = Q.affine.QuantizeDequantize((1,),
bitwidth = 8,
symmetric = True)
def foo(model, data):
_ = model(data)
sim_linear.compute_encodings(forward_pass_callback=foo,
forward_pass_callback_args=x)
print(y)
print(dqy)
print(sim_linear.model(x))
print(sim_linear.model[0].input_quantizers[0].get_scale(), sim_linear.model[0].input_quantizers[0].get_offset())
print(sim_linear.model[0].param_quantizers["weight"].get_scale(), sim_linear.model[0].param_quantizers["weight"].get_offset())
print(sim_linear.model[0].param_quantizers["bias"].get_scale(), sim_linear.model[0].param_quantizers["bias"].get_offset())
print(sim_linear.model[0].output_quantizers[0].get_scale(), sim_linear.model[0].output_quantizers[0].get_offset())
"""output
tensor([[-0.9023, -0.1285, -0.2518, -0.3557]], grad_fn=<AddmmBackward0>)
tensor([[-0.9013, -0.1274, -0.2519, -0.3581]], grad_fn=<MulBackward0>)
DequantizedTensor([[-0.8996, -0.1270, -0.2505, -0.3563]],
grad_fn=<AliasBackward0>)
tensor([0.0030]) tensor([-0.])
tensor([0.0046]) tensor([0.])
tensor([0.0053], grad_fn=<DivBackward0>) tensor([0.])
tensor([0.0035]) tensor([-255.])
"""