import numpy as np
def print_obj(obj, name):
print("%s:\n%s\n" % (name, obj))
def check_each(a, b):
return (a == b).astype('bool')
def check_mean(a, b):
return np.mean(a == b).astype('bool')
a = np.array(1.)
b = np.array([1., 2., 3.])
c = np.array([[1., 2., 3.], [4., 5., 6.]])
print_obj(a, "a")
print_obj(b, "b")
print_obj(c, "c")
a:
1.0
b:
[1. 2. 3.]
c:
[[1. 2. 3.]
[4. 5. 6.]]
print_obj(a.ndim, "a.ndim")
print_obj(b.ndim, "b.ndim")
print_obj(c.ndim, "c.ndim")
a.ndim:
0
b.ndim:
1
c.ndim:
2
print_obj(a.shape, "a.shape")
print_obj(b.shape, "b.shape")
print_obj(c.shape, "c.shape")
a.shape:
()
b.shape:
(3,)
c.shape:
(2, 3)
d = np.array([[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]])
e = np.array([[[[1., 2., 3.], [1., 2., 3.]], [[4., 5., 6.], [4., 5., 6.]]],
[[[7., 8., 9.], [7., 8., 9.]], [[10., 11., 12.], [10., 11., 12.]]]])
print_obj(d, "d")
print_obj(d.ndim, "d.ndim")
print_obj(d.shape, "d.shape")
print_obj(e, "e")
print_obj(e.ndim, "e.ndim")
print_obj(e.shape, "e.shape")
d:
[[[ 1. 2. 3.]
[ 4. 5. 6.]]
[[ 7. 8. 9.]
[10. 11. 12.]]]
d.ndim:
3
d.shape:
(2, 2, 3)
e:
[[[[ 1. 2. 3.]
[ 1. 2. 3.]]
[[ 4. 5. 6.]
[ 4. 5. 6.]]]
[[[ 7. 8. 9.]
[ 7. 8. 9.]]
[[10. 11. 12.]
[10. 11. 12.]]]]
e.ndim:
4
e.shape:
(2, 2, 2, 3)
# Quiz: What is the shape of [[[1], [2], [3]], [[4], [5], [6]]]?
# Answer: (2, 3, 1) 이렇게 이상한 형태를 해야하는 경우가 있다. 어떤 경우인지는 나중에 알아본다.
f = np.array([[[1], [2], [3]], [[4], [5], [6]]])
print_obj(f, "f")
print_obj(f.ndim, "f.ndim")
print_obj(f.shape, "f.shape")
f:
[[[1]
[2]
[3]]
[[4]
[5]
[6]]]
f.ndim:
3
f.shape:
(2, 3, 1)
higher order tensor나 매우 큰 shape(ex. (100,100))의 tensor를 만들때는 element를 항상 type할 수 없다. 이럴 때 쉽게 matrix를 create, define하는 방법
a = np.ones(10) # (10, 1)
a
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
a = np.zeros((2, 5))
a
array([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])
a = np.full((2,5), 5)
a
array([[5, 5, 5, 5, 5],
[5, 5, 5, 5, 5]])
a = np.random.random((2, 3, 4)) # random values are drawn from uniform distribution between 0 and 1
a
array([[[0.9142196 , 0.30236846, 0.58074999, 0.38689703],
[0.46415441, 0.47096453, 0.99620226, 0.97122197],
[0.53739628, 0.89200389, 0.58018502, 0.91639591]],
[[0.6118029 , 0.96417403, 0.51340103, 0.64971966],
[0.97797573, 0.32365923, 0.0268915 , 0.31762698],
[0.45924819, 0.77563535, 0.72542822, 0.41534026]]])
# matrix를 arange로 define하면 integer value를 얻는다.
a = np.arange(10)
a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# float로 변환하고 싶을 때 astype을 쓴다.
a = np.arange(10).astype(float)
a
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
# n-dimensional array를 define하고 싶을 때 reshape을 쓴다.
a = np.arange(10).reshape((5,2))
a
array([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
# Quiz: Create a 4-by-3-by-2 tensor filled with 0.0 to 23.0
b = np.arange(24).reshape(4,3,2).astype(float)
b
array([[[ 0., 1.],
[ 2., 3.],
[ 4., 5.]],
[[ 6., 7.],
[ 8., 9.],
[10., 11.]],
[[12., 13.],
[14., 15.],
[16., 17.]],
[[18., 19.],
[20., 21.],
[22., 23.]]])
# Indexing and slicing a vector
a = np.arange(10)
print_obj(a, "a")
print_obj(a[0], "a[0]")
print_obj(a[1], "a[1]")
print_obj(a[-1], "a[-1]")# -를 써서 indexing하는 경우가 많을 것이다.
print_obj(a[-3], "a[-3]")
print_obj(a[0:10], "a[0:10]")
print_obj(a[0:], "a[0:]")
print_obj(a[:10], "a[:10]")
print_obj(a[:], "a[:]")
print_obj(a[7:], "a[7:]")
print_obj(a[:5], "a[:5]")
print_obj(a[2:5], "a[2:5]")
# Quiz: What is a[-4:]?
# Answer: [6,7,8,9]
print_obj(a[-4:], "answer")
# Quiz: What is a[:-8]?
# Answer: [0,1]
print_obj(a[:-8], "answer2")
# 홀수나 짝수 value만 취하고 싶을 때 유용하다.
print_obj(a[0:10:2], "a[0:10:2]") # skipping 1 element at a time
print_obj(a[0:10:3], "a[0:10:3]") # skipping 2 elements at a time
print_obj(a[2:6:3], "a[2:6:3]")
print_obj(a[::-1], "a[::-1]") # negative means going backwards.
print_obj(a[8:5:-1], "a[8:5:-1]")
print_obj(a[8:5], "a[8:5]") # doesn't understand -없이는 ascending으로 생각하기 때문이다.
# Quiz: Create [9, 6, 3] using a.
print_obj(a[9:2:-3], "answer3")
a:
[0 1 2 3 4 5 6 7 8 9]
a[0]:
0
a[1]:
1
a[-1]:
9
a[-3]:
7
a[0:10]:
[0 1 2 3 4 5 6 7 8 9]
a[0:]:
[0 1 2 3 4 5 6 7 8 9]
a[:10]:
[0 1 2 3 4 5 6 7 8 9]
a[:]:
[0 1 2 3 4 5 6 7 8 9]
a[7:]:
[7 8 9]
a[:5]:
[0 1 2 3 4]
a[2:5]:
[2 3 4]
a[-4:]:
[6 7 8 9]
a[:-8]:
[0 1]
a[0:10:2]:
[0 2 4 6 8]
a[0:10:3]:
[0 3 6 9]
a[2:6:3]:
[2 5]
a[::-1]:
[9 8 7 6 5 4 3 2 1 0]
a[8:5:-1]:
[8 7 6]
a[8:5]:
[]
a[9:2:-3]:
[9 6 3]
# Indexing a matrix
a = np.arange(9).reshape((3,3))
print_obj(a, "a")
print_obj(a[0][0], "a[0][0]")
print_obj(a[0,0], "a[0,0]")
print_obj(a[1,1], "a[1,1]")
# Quiz: How to access the last row?
print_obj(a[-1,], "answer")
# Quiz: How to access the second column?
print_obj(a[:,1], "answer2") # you need to have ":" in the row position
# Quiz: How to create [8, 5] using a?
print_obj(a[:0:-1,-1], "answer3")
print_obj(a[1:3:,-1][::-1], "answer3")
a:
[[0 1 2]
[3 4 5]
[6 7 8]]
a[0][0]:
0
a[0,0]:
0
a[1,1]:
4
a[2,:]:
[6 7 8]
a[:,1]:
[1 4 7]
a[:0:-1,2]:
[8 5]
a[:0:-1,2]:
[8 5]
# Indexing and slicing a 3D tensor
a = np.arange(4*3*2).reshape((4, 3, 2))
print_obj(a, "a")
print_obj(a[2, 1, 0], "a[2, 1, 0]")
# Quiz: What would be a[0]?
print_obj(a[0], "a[0]")
# Quiz: What would be a[0, 1]?
print_obj(a[0, 1], "a[0, 1]")
# Quiz: Create [[0, 2, 4], [6, 8, 10]]
print_obj(a[:2,:,0], "a[:2,:,0]")
a:
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]
[[12 13]
[14 15]
[16 17]]
[[18 19]
[20 21]
[22 23]]]
a[2, 1, 0]:
14
a[0]:
[[0 1]
[2 3]
[4 5]]
a[0, 1]:
[2 3]
a[:2,:,0]:
[[ 0 2 4]
[ 6 8 10]]
# Conditional indexing
a = np.arange(3*2).reshape((3,2))
print_obj(a, "a")
idx = a % 2 == 0 # find even number within the matrix
print_obj(idx, "idx")
print_obj(a[idx], "test")
# Quiz: How would you create [3, 4, 5] using a?
idx2 = a >= 3
print_obj(idx2, "idx2")
print_obj(a[idx2], "answer")
a:
[[0 1]
[2 3]
[4 5]]
idx:
[[ True False]
[ True False]
[ True False]]
test:
[0 2 4]
idx2:
[[False False]
[False True]
[ True True]]
answer:
[3 4 5]
# Taking specific elements from a vector
a = np.arange(10)
idx = [0, 2, 3]
print_obj(a[idx], "a[idx]")
a[idx]:
[0 2 3]
# Taking specific elements from a tensor
a = np.arange(24).reshape((6,4))
print_obj(a, "a")
print_obj(a[:,[0, 2, 3]], "a[idx]")
print_obj(a[[0, 2, 3], :], "a[idx]")
#idx = ((0,0,1,5),(1,2,0,3))
#print_obj(a[idx], "tuple indexing")
#idx = np.array([[0,0,1,5],[1,2,0,3]])
#print_obj(a[idx], "ndarray indexing")
a:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]
a[idx]:
[[ 0 2 3]
[ 4 6 7]
[ 8 10 11]
[12 14 15]
[16 18 19]
[20 22 23]]
a[idx]:
[[ 0 1 2 3]
[ 8 9 10 11]
[12 13 14 15]]
# Basic operations
a = np.arange(6).reshape((3, 2))
b = np.ones((3, 2))
print_obj(a, "a")
print_obj(b, "b")
# both a and b is (3,2). shape match
# +, -, *, /
print_obj(a+b, "a+b")
print_obj(a-b, "a-b")
print_obj(a*b, "a*b")
print_obj(a/b, "a/b")
a:
[[0 1]
[2 3]
[4 5]]
b:
[[1. 1.]
[1. 1.]
[1. 1.]]
a+b:
[[1. 2.]
[3. 4.]
[5. 6.]]
a-b:
[[-1. 0.]
[ 1. 2.]
[ 3. 4.]]
a*b:
[[0. 1.]
[2. 3.]
[4. 5.]]
a/b:
[[0. 1.]
[2. 3.]
[4. 5.]]
# Unary operations
a = np.arange(6).reshape((3,2))
print_obj(a, "a")
print_obj(a.sum(), "a.sum()") # scaler value
print_obj(a.sum(axis=0), "a.sum(axis=0)")
print_obj(a.sum(axis=1), "a.sum(axis=1)")
print_obj(a.mean(), "a.mean()") # scaler value
print_obj(a.max(), "a.max()")
print_obj(a.min(), "a.min()")
# Quiz: Given a = np.arange(24).reshape((2,3,4)), what is the mean of the sum w.r.t to the last dimension?
# w.r.t to the last dimension 이므로 (2,3) matrix가 남는다.
a = np.arange(24).reshape((2,3,4))
print_obj(a, "a")
print_obj(a.sum(axis=-1).mean(), "answer")
a:
[[0 1]
[2 3]
[4 5]]
a.sum():
15
a.sum(axis=0):
[6 9]
a.sum(axis=1):
[1 5 9]
a.mean():
2.5
a.max():
5
a.min():
0
a:
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
answer:
46.0
# Vector dot product
a = np.arange(3).astype('float')
b = np.ones(3)
print_obj(a, "a")
print_obj(b, "b")
print_obj(np.dot(a, b), "a dot b")
a:
[0. 1. 2.]
b:
[1. 1. 1.]
a dot b:
3.0
# Matrix dot product, matrix multiplication
a = np.arange(6).reshape((3, 2))
b = np.ones((2, 3))
print_obj(a, "a")
print_obj(b, "b")
print_obj(np.dot(a,b), "a dot b")
print_obj(a@b, "a @ b")
a:
[[0 1]
[2 3]
[4 5]]
b:
[[1. 1. 1.]
[1. 1. 1.]]
a dot b:
[[1. 1. 1.]
[5. 5. 5.]
[9. 9. 9.]]
a @ b:
[[1. 1. 1.]
[5. 5. 5.]
[9. 9. 9.]]
# Tensor dot product, tensor multiplication
# In typical deep learning, the first axis is batch axis, and the remaining axis are your actual input, sample dimension.
# repeat the same multiplication along the batch axis.
a = np.arange(24).reshape((4, 3, 2))
b = np.ones((4, 2, 3))
print_obj(a, "a")
print_obj(b, "b")
# typically you wouldn't use np.dot in the higher order tensor
print_obj(np.dot(a,b).shape, "a dot b")
print_obj((a@b).shape, "a @ b")
# Quiz: what would happen if a.shape==(4,3,2) and b.shape==(2,3)?
a:
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]
[[12 13]
[14 15]
[16 17]]
[[18 19]
[20 21]
[22 23]]]
b:
[[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]]
a dot b:
(4, 3, 4, 3)
a @ b:
(4, 3, 3)
# Reshapes
a = np.arange(24).reshape((2, 3, 4))
print_obj(a, "a")
b = a.reshape((6, 4))
print_obj(b, "b")
# you can skip the remaining axis with -1. -1이어도 무조건 4가 될 수 밖에 없으므로 numpy가 -1로 써도 알아서 4로 똑똑하게 인식한다.
c = a.reshape((6, -1))
print_obj(c, "c")
# Quiz: What would d=a.reshape((6, 4, -1)) look like?
# sometime you need to do this!!
d=a.reshape((6, 4, -1))
print_obj(d.shape, "shape")
print_obj(d, "answer")
a:
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
b:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]
c:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]
shape:
(6, 4, 1)
answer:
[[[ 0]
[ 1]
[ 2]
[ 3]]
[[ 4]
[ 5]
[ 6]
[ 7]]
[[ 8]
[ 9]
[10]
[11]]
[[12]
[13]
[14]
[15]]
[[16]
[17]
[18]
[19]]
[[20]
[21]
[22]
[23]]]
# Adding an extra dimension
# 이유) binary operation을 할 때 same matching shape을 가지거나, 최소한 same enddim을 가져야한다.
a = np.arange(3)
print_obj(a, "a")
# 두 개는 exactly same
print_obj(a.reshape(3,-1), "a.reshape(3,-1)")
print_obj(a[:, None], "a[:, None]")
# Quiz: How to make a = np.ones((3,4)) into shape (3, 1, 1, 4) using reshape and None?
a = np.ones((3,4))
# print_obj(a.reshape(3, -1, 4), "a.reshape(3, -1, -1, 4)")
print_obj(a[:, None, None, :].shape, "shape")
print_obj(a[:, None, None, :], "answer")
a:
[0 1 2]
a.reshape(3,-1):
[[0]
[1]
[2]]
a[:, None]:
[[0]
[1]
[2]]
a.reshape(3, -1, -1, 4):
[[[1. 1. 1. 1.]]
[[1. 1. 1. 1.]]
[[1. 1. 1. 1.]]]
shape:
(3, 1, 1, 4)
answer:
[[[[1. 1. 1. 1.]]]
[[[1. 1. 1. 1.]]]
[[[1. 1. 1. 1.]]]]
# Stack, concatenation
a = np.ones((3,2))
b = np.zeros((3,2))
print_obj(a, "a")
print_obj(b, "b")
print_obj(np.vstack([a, b]), "a,b vstack")
print_obj(np.hstack([a, b]), "a,b hstack")
print_obj(np.hstack([a, b, a]), "a,b hstack")
# Recommend use concatenate operator which is easier to generalize to high order tensor.
print_obj(np.concatenate([a, b], axis=0), "a,b concat axis=0")
print_obj(np.concatenate([a, b], axis=1), "a,b concat axis=1")
# Quiz: Would concatenating two tensors whose shapes are (4, 3, 2) and (5, 4, 2) on axis=2 work?
# Answor: NO
a = np.ones((4,3,2))
b = np.zeros((5,4,2))
#print_obj(np.concatenate([a, b], axis=2).shape, "shape")
a:
[[1. 1.]
[1. 1.]
[1. 1.]]
b:
[[0. 0.]
[0. 0.]
[0. 0.]]
a,b vstack:
[[1. 1.]
[1. 1.]
[1. 1.]
[0. 0.]
[0. 0.]
[0. 0.]]
a,b hstack:
[[1. 1. 0. 0.]
[1. 1. 0. 0.]
[1. 1. 0. 0.]]
a,b hstack:
[[1. 1. 0. 0. 1. 1.]
[1. 1. 0. 0. 1. 1.]
[1. 1. 0. 0. 1. 1.]]
a,b concat axis=0:
[[1. 1.]
[1. 1.]
[1. 1.]
[0. 0.]
[0. 0.]
[0. 0.]]
a,b concat axis=1:
[[1. 1. 0. 0.]
[1. 1. 0. 0.]
[1. 1. 0. 0.]]
# Matrix transpose
a = np.arange(6).reshape((3, 2))
print_obj(a, "a")
print_obj(a.T, "a.T")
a:
[[0 1]
[2 3]
[4 5]]
a.T:
[[0 2 4]
[1 3 5]]
# Tensor transpose
a = np.arange(24).reshape((4, 3, 2))
print_obj(a, "a")
b = np.transpose(a, [0, 2, 1])
print_obj(b, "Swap axis 1 and 2")
print_obj(b.shape, "b's shape")
# first axes go to second place, second axes go to first place, leaving the last axes as is.
c = np.transpose(a, [1, 0, 2])
print_obj(c, "Swap axis 0 and 1")
print_obj(c.shape, "c's shape")
a:
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]
[[12 13]
[14 15]
[16 17]]
[[18 19]
[20 21]
[22 23]]]
Swap axis 1 and 2:
[[[ 0 2 4]
[ 1 3 5]]
[[ 6 8 10]
[ 7 9 11]]
[[12 14 16]
[13 15 17]]
[[18 20 22]
[19 21 23]]]
b's shape:
(4, 2, 3)
Swap axis 0 and 1:
[[[ 0 1]
[ 6 7]
[12 13]
[18 19]]
[[ 2 3]
[ 8 9]
[14 15]
[20 21]]
[[ 4 5]
[10 11]
[16 17]
[22 23]]]
c's shape:
(3, 4, 2)
중요
Broadcasting is very import concept.
Because sometimes when you want to manipulate between two matrices that don't exactly have same shape.
Sometimes it works automatically because pytorch or numpy understand what needs to be done to match the shape between two objects. And it is called broadcasting.
Broadcasting은 shape이 자동적으로 match되서 Math 계산이 가능해진 것을 말한다. 이것이 어떻게 실행되냐면, dimensionality을 늘리고 거기에 같은 element를 copy&paste한다.
# Vector and scalar
a = np.arange(3)
b = 2.
print_obj(a, "a")
# b was treated as [2,2,2], 1 dimensional array.
print_obj(a+b, "a+b")
print_obj(a-b, "a-b")
print_obj(a*b, "a*b")
print_obj(a/b, "a/b")
a:
[0 1 2]
a+b:
[2. 3. 4.]
a-b:
[-2. -1. 0.]
a*b:
[0. 2. 4.]
a/b:
[0. 0.5 1. ]
# Matrix and vector
# b가 1차원에서 2차원으로 확장됐다. internally treated as 2 dimmensional matrix and internally + execution was done.
# a and b have at least one matching axes which is 2. 그래서 numpy는 entire shape을 match하기 위해 2를 3번 repeat한다.
a = np.arange(6).reshape((3,2))
b = np.arange(2).reshape(2) + 1
print_obj(a, "a")
print_obj(b, "b")
print_obj(a+b, "a+b")
# Quiz: What would happen if b were np.arange(2).reshape((2, 1))? How about np.arange(2).reshape((1, 2))?
# Answer: NO. It cannot be expand in any manner. So that's why broadcasting never work.
b = np.arange(2).reshape((1, 2))
print_obj(b, "b")
print_obj(a+b, "a+b")
a:
[[0 1]
[2 3]
[4 5]]
b:
[1 2]
a+b:
[[1 3]
[3 5]
[5 7]]
b:
[[0 1]]
a+b:
[[0 2]
[2 4]
[4 6]]
# Tensor and matrix
a = np.arange(12).reshape((2,3,2))
b = np.arange(6).reshape((3,2))
print_obj(a, "a")
print_obj(b, "b")
print_obj(a+b, "a+b")
#Quiz: How can we use None to do a+b?
print_obj(a+b[None,:,:], "answer")
a:
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]]
b:
[[0 1]
[2 3]
[4 5]]
a+b:
[[[ 0 2]
[ 4 6]
[ 8 10]]
[[ 6 8]
[10 12]
[14 16]]]
answer:
[[[ 0 2]
[ 4 6]
[ 8 10]]
[[ 6 8]
[10 12]
[14 16]]]
def sigmoid(x):
return 1./(1. + np.exp(-x))
# Define a function that, given M of shape (m,n) and W of shape (4n, n), executes the following:
# - Take the first half rows of M
# - Take the second half rows of M
# - Take the odd-numbered rows of M
# - Take the even-numbered rows of M
# - Append them horizontally in the listed order so that you obtain a matrix X of shape (?, 4n)
# - Linearly transform X with W so that you obtain a matrix Y of shape (?, ?)
# - Put Y through the sigmoid function
# - Obtain the sum of the row-wise mean
def foo(M, W):
rows, columns = M.shape
h = int(rows/2)
a = M[:h, :] # first half rows of M
b = M[h:, :] # second half rows of M
c = M[1::2, :] # odd-numbered rows of M
d = M[::2, :] # even-numbered rows of M
X = np.concatenate([a, b, c, d], axis=1)
Y = X @ W
e = sigmoid(Y)
return e.mean(axis=0).sum()
M = (np.arange(20).reshape((10,2)).astype('float') - 10.) / 10
W = np.arange(16).reshape(8,2).astype('float') / 10.
foo(M,W)
1.0114933115231504
Reference
- AI504: Programming for AI Lecture at KAIST AI