[Python] Numpy

NAEMAMDAEROG·2021년 12월 4일

Numpy

Numerical Python
python에서 대규모 다차원 배열을 다룰 수 있게 도와주는 라이브러리.

import numpy as np

np.array([1, 2, 3, 4, 5])  # array([1, 2, 3, 4, 5])

np.array([3, 1.4, 2, 3, 4])  # array([3., 1.4, 2., 3., 4.])
# 실수가 1개라도 들어가면 모든 수가 다 실수로 바뀐다.

np.array([[1, 2],   # array([[1, 2],
		  [3, 4]])  #        [3, 4]])

배열 데이터 타입 dtype

arr = np.array([1, 2, 3, 4], dtype='float')  # array([1., 2., 3., 4.])
arr.dtype  # dtype('float64')
arr.astype(int)  # array([1, 2, 3, 4])

다양한 배열 만들기

np.zeros(10, dtype=int)  # array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

np.ones((3, 5), dtype=float)
# array([[1., 1., 1., 1., 1.],
#      [1., 1., 1., 1., 1.],
#      [1., 1., 1., 1., 1.]])

np.arange(0, 20, 2)  # np.arange(start, end, step)
# array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

np.linspace(0, 1, 5)  # array([0.  , 0.25, 0.5 , 0.75, 1.  ])
# 항상 균일하게 나눠지는 건 아니므로 안 쓰는 게 좋다.

난수로 채워진 배열 만들기

np.random.random((2,2))
# array([[0.66953571, 0.19642349],
       [0.99364465, 0.68965631]])
       
np.random.normal(0, 1, (2,2))  # np.random.normal(평균, 표준편차, shape)
# array([[-0.96698477, -1.84170259],
       [ 1.40256817, -1.06960613]])
       
np.random.randint(0, 10, (2,2))
# array([[3, 5],
       [4, 6]])
       
np.empty((2,2))  # (값의 초기화를 수행하지 않고) 주어진 형태와 타입을 갖는 새로운 어레이를 반환
# array([[0.96698477, 1.84170259],
#      [1.40256817, 1.06960613]])

배열의 기초

x2 = np.random.randint(10, size=(3,4))
# array([[6, 7, 5, 4],
#      [1, 9, 3, 8],
#      [3, 7, 1, 8]])
       
x2.ndim  # 2, 차원
x2.shape  # (3,4)
x2.size  # 12, 원소가 몇 개 있는지
x2.dtype  # dtype('int64')

찾고 잘라내기

x = np.arange(7)  # array([0, 1, 2, 3, 4, 5, 6])

x[3]  # 3
x[7]  # IndexError: index 7 is out of bounds for axis 0 with size 7
x[0]=10
# array([10,  1,  2,  3,  4,  5,  6])

x[1:4]  # array([1, 2, 3])
x[1:]  # array([1, 2, 3, 4, 5, 6])
x[:4]  # array([0, 1, 2, 3])
x[::2]  # array([0, 2, 4, 6])

모양 바꾸기

reshape : array의 shape를 변경한다.

x = np.arange(8)  # array([0, 1, 2, 3, 4, 5, 6, 7])
x.shape  # (8,)

x2 = x.reshape((2,4))
# array([[0, 1, 2, 3],
#      [4, 5, 6, 7]])

x2.shape  # (2,4)

이어 붙이고 나누고

concatenate : array를 이어 붙인다.
split : axis 축을 기준으로 나눌 수 있다.

x = np.array([0, 1, 2])
y = np.array([3, 4, 5])
np.concatenate([x, y])
# array([0, 1, 2, 3, 4, 5])

matrix = np.arange(4).reshape(2,2)
# array([[0, 1],
#      [2, 3]])

np.concatenate([matrix, matrix], axis=0)
# array([[0, 1],
#      [2, 3],
#      [0, 1],
#      [2, 3]])

np.concatenate([matrix, matrix], axis=1)
# array([[0, 1, 0, 1],
#      [2, 3, 2, 3]])
       
matrix = np.arange(16).reshape(4,4)
# array([[ 0,  1,  2,  3],
#      [ 4,  5,  6,  7],
#      [ 8,  9, 10, 11],
#      [12, 13, 14, 15]])

upper, lower = np.split(matrix, [3], axis=0)
# [[ 0  1  2  3]
# [ 4  5  6  7]
# [ 8  9 10 11]] [[12 13 14 15]]

left, right = np.split(matrix, [3], axis=1)
# left               
# [[ 0  1  2]
#  [ 4  5  6]
#  [ 8  9 10]
#  [12 13 14]]
# right
# [[ 3]
#  [ 7]
#  [11]
#  [15]]

Broadcasting

shape이 다른 array끼리 연산

matrix = np.arange(9).reshape(3,3)
# array([[0, 1, 2],
#       [3, 4, 5],
#       [6, 7, 8]])

matrix + 3
# array([[ 3,  4,  5],
#        [ 6,  7,  8],
#        [ 9, 10, 11]])

matrix + np.array([1,2,3])
# array([[ 1,  3,  5],
#        [ 4,  6,  8],
#        [ 7,  9, 11]])

np.arange(3).reshape(3,1) + np.arange(3)
# array([[0],  +  array([0, 1, 2]) = array([[0, 1, 2],
#        [1],                               [1, 2, 3],
#        [2]])                              [2, 3, 4]])

집계함수

x = np.arange(8).reshape(2,4)
# array([[0, 1, 2, 3],
#        [4, 5, 6, 7]])

np.sum(x)  # 28
np.min(x)  # 0
np.max(x)  # 7
np.mean(x)  # 3.5
np.std(x)  # 2.29128784747792

np.sum(x, axis=0)  # array([ 4,  6,  8, 10])
np.sum(x, axis=1)  # array([ 6, 22])

마스킹 연산

True, False array를 통해 특정 값들을 뽑아내는 방법

x = np.arange(5)
# array([0, 1, 2, 3, 4])

x < 3
# array([ True,  True,  True, False, False])

x < 5
# array([ True,  True,  True,  True,  True])

x[x<3]
# array([0, 1, 2])