torch.autograd package provides automatic differentiation for all Tensor operations.
This means that backpropagation is defined based on how the code is written and executed.
Automatically computes gradients for backprop
Setting requires_grad = True starts tracking all operations on the tensor.
To stop tracking, call .detach() to detach the tensor from the computation graph.
a = torch.randn(3,3)
a = a * 3
print(a)
print(a.requires_grad)
tensor([[ 2.3014, -5.3353, -5.4971],
[-0.8475, 0.7712, -1.5907],
[-3.8217, 0.3722, 3.5812]])
False
requires_grad_(...) modifies the requires_grad attribute in-place.grad_fn: stores information about the function used to compute gradients (which function was used for backprop).a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b)
print(b.grad_fn)
True
tensor(95.3914, grad_fn=<SumBackward0>)
<SumBackward0 object at 0x000001F722FF2E00>
x = torch.ones(3,3, requires_grad = True)
print(x)
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]], requires_grad=True)
y = x + 5
print(y)
tensor([[6., 6., 6.],
[6., 6., 6.],
[6., 6., 6.]], grad_fn=<AddBackward0>)
z = y * y
out = z.mean()
print(z, out)
tensor([[36., 36., 36.],
[36., 36., 36.],
[36., 36., 36.]], grad_fn=<MulBackward0>) tensor(36., grad_fn=<MeanBackward0>)
.backward()automatically computes backpropagation..grad attribute.print(out)
out.backward()
tensor(36., grad_fn=<MeanBackward0>)
grad: derivatives are saved on the layers that the data has been passed through
print(x)
print(x.grad)
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]], requires_grad=True)
tensor([[1.3333, 1.3333, 1.3333],
[1.3333, 1.3333, 1.3333],
[1.3333, 1.3333, 1.3333]])
x = torch.randn(3, requires_grad = True)
y = x * 2
while y.data.norm() < 1000:
y = y * 2
print(y)
tensor([1257.8265, 343.4454, 40.0833], grad_fn=<MulBackward0>)
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v, retain_graph=True)
print(x.grad)
tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])
torch.no_grad().requires_grad=True but no need for gradient computation.torch.no_grad() during feedforward, not backpropagation.print(x.requires_grad)
print((x ** 2).requires_grad)
with torch.no_grad():
print((x ** 2).requires_grad)
True
True
False
detach(): Creates a new tensor with the same content but different requires_grad setting.print(x)
print(y)
print(x.requires_grad)
print(y.requires_grad)
y = x.detach() # X의 값이 들어가는 것이지만, 기울기는 해제
print("-------------------------")
print(x)
print(y)
print(x.requires_grad)
print(y.requires_grad)
print(x.eq(y).all())
tensor([1.2283, 0.3354, 0.0391], requires_grad=True)
tensor([1257.8265, 343.4454, 40.0833], grad_fn=<MulBackward0>)
True
True
-------------------------
tensor([1.2283, 0.3354, 0.0391], requires_grad=True)
tensor([1.2283, 0.3354, 0.0391])
True
False
tensor(True)
print(y)
tensor([-0.1718, 1.2381, 0.4675])
Computation flow a → b → c → out
Computing $a ← b ← c ← out through backward()
value is stored at a.grad
a = torch.ones(2,2)
print(a)
tensor([[1., 1.],
[1., 1.]])
a = torch.ones(2,2, requires_grad = True)
print(a)
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
print(a.data)
print(a.grad)
print(a.grad_fn)
tensor([[1., 1.],
[1., 1.]])
None
None
b = a + 2
print(b)
tensor([[3., 3.],
[3., 3.]], grad_fn=<AddBackward0>)
c = b ** 2
print(c)
tensor([[9., 9.],
[9., 9.]], grad_fn=<PowBackward0>)
out = c.sum()
print(out)
tensor(36., grad_fn=<SumBackward0>)
print(out)
out.backward()
tensor(36., grad_fn=<SumBackward0>)
a.grad_fn is None because no direct computation was performed on it.print(a.data)
print(a.grad)
print(a.grad_fn)
tensor([[1., 1.],
[1., 1.]])
tensor([[6., 6.],
[6., 6.]])
None
print(b.data)
print(b.grad)
print(b.grad_fn)
tensor([[3., 3.],
[3., 3.]])
None
<AddBackward0 object at 0x000001F77AFE4250>
C:\Users\dof07\AppData\Local\Temp\ipykernel_26252\2485455394.py:2: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\build\aten\src\ATen/core/TensorBody.h:494.)
print(b.grad)
print(c.data)
print(c.grad)
print(c.grad_fn)
tensor([[9., 9.],
[9., 9.]])
None
<PowBackward0 object at 0x000001F72BBA9FC0>
C:\Users\dof07\AppData\Local\Temp\ipykernel_26252\3875808255.py:2: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\build\aten\src\ATen/core/TensorBody.h:494.)
print(c.grad)
print(out.data)
print(out.grad)
print(out.grad_fn)
tensor(36.)
None
<SumBackward0 object at 0x000001F77AFA13F0>
C:\Users\dof07\AppData\Local\Temp\ipykernel_26252\578081240.py:2: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\build\aten\src\ATen/core/TensorBody.h:494.)
print(out.grad)
x = torch.tensor(2.0, requires_grad=True)
print(x)
tensor(2., requires_grad=True)
y = x**2 + 3*x + 5
print(y)
tensor(15., grad_fn=<AddBackward0>)
y.backward()
print(f'dy/dx at x=2: {x.grad.item()}')
dy/dx at x=2: 7.0
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
print(x)
tensor([1., 2., 3.], requires_grad=True)
y = x**2
print(y)
tensor([1., 4., 9.], grad_fn=<PowBackward0>)
y.backward(torch.ones_like(y))
print("dy/dx : ", x.grad)
dy/dx : tensor([2., 4., 6.])
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(3.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)
print(x, w, b)
tensor(2., requires_grad=True) tensor(3., requires_grad=True) tensor(1., requires_grad=True)
y = w * x + b
print(y)
tensor(7., grad_fn=<AddBackward0>)
y.backward()
print(x.grad) # dy/dx = 3 → tensor(3.)
print(w.grad) # dy/dw = 2 → tensor(2.)
print(b.grad) # dy/db = 1 → tensor(1.)
tensor(3.)
tensor(2.)
tensor(1.)
x = torch.tensor(2.0, requires_grad=True)
print(x)
tensor(2., requires_grad=True)
y = x**3 + 2*x**2
print(y)
tensor(16., grad_fn=<AddBackward0>)
grad_x = torch.autograd.grad(y, x) # Compute dy/dx
print(grad_x)
(tensor(20.),)