[python] 통계 지식 기반 연습 Day 1

Pygmalion Dali·2023년 9월 20일

AI엔지니어 Data Analyst SeSAC data python 새싹 통계 파이썬

python

목록 보기

5/6

#47. 두 과목의 평균 구하기

math_scores = [40, 60, 80]
english_scores = [30, 40, 50]

n_class = 2
n_student = len(math_scores)

score_sums = [] #빈 리스트 만들어주기1
score_means = [] #빈 리스트 만들어주기2

for _ in range(n_class): #_(반복문만 돌릴 목적)
  score_sums.append(0)

for score_idx in range (n_student):
  score_sums[0] += math_scores[score_idx] #score[0]에 math score를 하나씩 더해주기
  score_sums[1] += english_scores[score_idx] #score[0]에 math score를 하나씩 더해주기
  #score_sum의 원소 수는 총 2개

print(score_sums) #score_sums[0]은 수학 스코어 합 / score_sum[1]은 영어 스코어 합

for class_idx in range(n_class): #for loop을 통해 list의 원소들을 하나씩 평균 구해주기
  class_mean = score_sums[class_idx] / n_student
  score_means.append(class_mean)
print(score_means)

#48. Mean Subtraction

math_scores = [40, 60, 80]
english_scores = [30, 40, 50]

n_student = len(math_scores)

for student_idx in range(n_student): 
  math_scores[student_idx] -= score_means[0] #인덱스 순서대로 math 리스트에서 score_means를 빼기
  english_scores[student_idx] -= score_means[1] #인덱스 순서대로 english 리스트에서 score_means를 빼기


print(math_scores)
print(english_scores)

#49. 분산과 표준편차

scores = [10, 20, 30]
n_student = len(scores)
score_sum, score_square_sum = 0, 0 #score_sum / score_square_sum의 0으로 설정

for score in scores:
  score_sum += score #총합 
  score_square_sum += score**2 #제곱의 합

mean = score_sum / n_student
square_of_mean = mean**2 #평균의 제곱
mean_of_square = score_square_sum / n_student #제곱의 평균

variance = mean_of_square - square_of_mean #분산: 제평 - 평제
std = variance**0.5 #표준편차 구하기

print(variance)
print(std)

#50. Standardization

#표준화된 값= (원래 값−평균)/표준 편차

scores = [10, 20, 30]

n_student = len(scores)
score_sum, score_square_sum = 0, 0

for score in scores:
  score_sum += score #scores 원소 값의 총합 구하기
  score_square_sum += score**2 #scores 원소 값의 제곱 총합 구하기

mean = score_sum / n_student #평균 계산
square_of_mean = mean**2 #평균의 제곱 계산
mean_of_square = score_square_sum / n_student #재곱의 평균 계산

variance = mean_of_square - square_of_mean
std = variance**0.5

#분산과 표준편차
for student_idx in range (n_student):
  scores[student_idx] = (scores[student_idx] - mean) / std #표준화구하기: (사실값 - 평균) / 표준편차
print(scores)

#표준화 진행 후 평균과 표준편차 구하기
n_student = len(scores)
score_sum, score_square_sum = 0, 0

for score in scores:
  score_sum += score #원소들의 총합
  score_square_sum += score**2 #원소 제곱의 총합

mean = score_sum / n_student #평균
square_of_mean = mean**2 #평제
mean_of_square = score_square_sum / n_student #제평

variance = mean_of_square - square_of_mean #분산
std = variance**0.5 #표준편차 구하기

print("mean: ",mean)
print("standard deviation: ", std)

#51. 분산과 표준편차(4)

math_scores, english_scores = [50, 60, 70], [30, 40, 50]
n_student = len(math_scores)


math_sum, english_sum = 0, 0
math_square_sum, english_square_sum = 0, 0

#반복문을 통해 math,english list 총합과 제곱 총합을 구합니다
for student_idx in range(n_student):
  math_sum += math_scores[student_idx]
  math_square_sum += math_scores[student_idx]**2 #math 제곱의 총합

  english_sum += english_scores[student_idx]
  english_square_sum += english_scores[student_idx]**2 #english 제곱의 총합

#math와 english의 평균을 구합니다
math_mean = math_sum / n_student
english_mean = english_sum / n_student

#구해진 값을 통해 math와 english의 분산을 구합니다.
math_variance = math_square_sum/n_student - math_mean**2
english_variance = english_square_sum/n_student - english_mean**2

math_std = math_variance**0.5
english_std = english_variance**0.5

print(math_mean,math_std)
print(english_mean,english_std)

#52. Standardization

for student_idx in range(n_student):
  math_scores[student_idx] = (math_scores[student_idx] - math_mean)/math_std #math 표준화
  english_scores[student_idx] = (english_scores[student_idx] - english_mean)/english_std #english 표준화

print(math_scores)
print(english_scores)

#------

math_sum, english_sum = 0, 0 #math와 english 총합 초기화
math_square_sum, english_square_sum = 0, 0  #math와 english 제곱 총합 초기화

#반복문을 통해 math,english list의 표준화된 과 제곱 총합을 구합니다
for student_idx in range(n_student):
  math_sum += math_scores[student_idx]
  math_square_sum += math_scores[student_idx]**2

  english_sum += english_scores[student_idx]
  english_square_sum += english_scores[student_idx]**2

#math와 english의 표준화된 평균을 구합니다
math_mean = math_sum / n_student 
english_mean = english_sum / n_student

#구해진 값을 통해 math와 english의 표준화된 분산을 구합니다.
math_variance = math_square_sum/n_student - math_mean**2 
english_variance = english_square_sum/n_student - english_mean**2

math_std = math_variance**0.5 
english_std = english_variance**0.5

#-------

print(math_mean, math_std)
print(english_mean, english_std)

#53. Hadamard Product

v1 = [1, 2, 3, 4, 5]
v2 = [10, 20, 30, 40, 50]

# method.1

v3 = []
for dim_dix in range(len(v1)):
  v3.append(v1[dim_dix]*v2[dim_dix])
print(v3)

#method.2
v3 = []
for _ in range(len(v1)):
  v3.append(0)

for dim_idx in range (len(v1)):
  v3[dim_idx] = v1[dim_idx]*v2[dim_idx]
print(v3)

#54. Vector Norm(3)

v1 = [1, 2, 3]

square_sum = 0

for dim_val in v1:
  square_sum += dim_val**2

norm = square_sum**0.5
print("norm of v1: ", norm)

#55. Making Unit Vectors

#U(unit vector) = 1/vector * norm = 1

v1 = [1, 2, 3]

print("norm of v1: ", norm)

#Unit Vector 업데이트
square_sum = 0

for dim_val in v1:
  square_sum += dim_val**2

norm = square_sum**0.5

#dim_idx -> 인덱싱하기 위함
#unit vector의 norm

for dim_idx in range(len(v1)):
  v1[dim_idx] /= norm

square_sum = 0

for dim_val in v1:
  square_sum += dim_val**2

norm = square_sum**0.5

print("norm of v1: ", norm)

#56. Dot Product(내접)

#원소 값의 합
#값이 scalar로 나온다

v1, v2 = [1, 2, 3], [3, 4, 5]

dot_prod = 0
for dim_idx in range(len(v1)):
  dot_prod += v1[dim_idx]*v2[dim_idx]

print("dot product of v1 and v2: ", dot_prod)

#57. Euclidean Distance

#유클리디안 디스턴스: 각 원소 간의 차 제곱의 총합을 루트한 값
#벡터 간의 거리를 알아보는 방법(= 두 점 사이를 알아보는 것)

v1, v2 = [1, 2, 3], [3, 4, 5]
diff_square_sum = 0

for dim_idx in range(len(v1)):
  diff_square_sum += (v1[dim_idx] + v2[dim_idx])**2

e_distance = diff_square_sum**0.5

print("Euclidian distance between v1 and v2: ", e_distance)

#58. Mean Squared Error

#For Loop을 이용해 MSE를 연산하는 방법에 대해 복습합니다

predictions = [10, 20, 30]
labels = [10, 25, 40]

n_data = len(predictions)
diff_square_sum = 0

for data_idx in range(n_data):
  diff_square_sum += (predictions[data_idx] - labels[data_idx])**2
mse = diff_square_sum/n_data

print("MSE: ", mse)

#59. 숫자 빈도 구하기

numbers = [0, 2, 4, 2, 1, 4, 3, 1, 2, 3, 4, 1, 2, 3, 4]
#number count 약어
number_cnt = [0, 0, 0, 0, 0]

#numbers의 인덱스를 반복적으로 넘겨줍니다
for num in numbers:
  number_cnt[num] = number_cnt[num] + 1

print(number_cnt)

#60. 합격 알려주기

score = 60

if score > 50:
  print("Pass!")

#61. 합격 / 불합격 알려주기

#점수를 입력합니다
score = 40
#cut off를 정해줍니다
cutoff = 50

if score > cutoff:
  print("Pass!")
else:
  print("Try Again!")

#62. 초를 분초로 표현하기

seconds = 200


if seconds >= 60:
  minutes = seconds // 60
  seconds -= minutes*60

else:
  minutes = 0
  
print(minutes, "min",seconds, "sec")

#63. 초를 시분초로 표현하기

seconds = 5000


if seconds >= 60:
  minutes = seconds // 60
  seconds -= minutes*60
  
  if minutes >= 60:
    hours = minutes // 60
    minutes -= hours*60

else:
  minutes = 0
  hours = 0
print(hours,"hour",minutes, "min",seconds, "sec")

#64. 홀수/짝수 구하기

number = 10

if number % 2 == 0:
  print("Even!")

else:
  print("Odd!")

#65. 두 수 비교하기

num1, num2 = 10, 10

if num1 > num2:
  print("first number")
elif num1 == num2:
  print("equal")
else:
  print("sencond number")

#변수 전체 명을 대문자: 고정된 변수
#class name: camelcase 띄어쓰기 안쓰고 대문자

#66. 점수 부여하기

score = 70

if score > 80:
  grade = 'A'
elif score > 60:
  grade = 'B'
elif score > 40:
  grade = 'C'
else:
  garde = 'F'
print("Grade: ",grade)

#67. 합격 / 불합격 알려주기

scores = [20, 50, 10, 60, 70]
cutoff = 50

for score in scores:
  if score > cutoff:
    print("Pass!")
  else:
    print("Try again")

#68. 성적을 평점으로 바꾸기

scores = [20, 50, 10, 60, 90]
grades = []
#grades = list()

for score in scores:
  if score > 80:
    grades.append('A')
  elif score > 60:
    grades.append('B')
  elif score > 40:
    grades.append('C')
  else:
    grades.append('F')
print(grades)

#69. 합격/불합격 학생들의 평균 구하기

score = [20, 50, 10, 60, 90]
cutoff = 50

p_score_sum, n_p = 0, 0
np_score_sum, n_np = 0, 0

for score in scores:
  if score > cutoff:
    p_score_sum += score
    n_p += 1
  else:
    np_score_sum += score
    n_np += 1
  
p_score_mean = p_score_sum/n_p
np_score_mean = np_score_sum/n_np

print("mean of passed scores: ",p_score_mean)
print("mean of non passed scores: ",np_score_mean)

#70. 홀수 / 짝수 구하기

numbers = []
for num in range(10):
  numbers.append(num)
numbers.append(3.14)
print(numbers)

for num in numbers:
  if num % 2 == 0:
    print("Even Number")
  elif num % 2 == 1:
    print("Odd Number")
  else:
    print("Not a Integer")

#71. 배수의 합 구하기

multiple_of = 23
numbers = []
for num in range(100):
  numbers.append(num)

#배수의 합을 구하는 숫자를 생성합니다
sum_multiple_of_n = 0

#나머지가 0인 수, 즉 숫자의 배수일 시 더해줍니다
for num in numbers:
  if num % multiple_of == 0:
    sum_multiple_of_n += num
  
print(sum_multiple_of_n)

#72. 최댓값, 최솟값 구하기

scores = [60, 40, 70, 20, 30]

#데이터가 음수거나 훨씬 큰 수면 제대로 돌아가지 않음
M, m = 0, 100

#if를 두 번 쓰면 각각 다 확인
#if / elif는 조건 만족하면 넘어감
#최댓값/최솟값을 따로따로 보기 때문에 if 두 번
for score in scores:
  if score > M:
    M = score
  if score < m:
    m = score

print("Max value: ", M)
print("min value: ", m)

#73. 최댓값, 최솟값 구하기(2)

scores = [-20, 60, 40, 70, 120]

#method. 1

#기준을 잡아줘서 비교를 하기 위해 최솟값을 지정해준다
M, m = scores[0], scores[0]
#리스트를 통해 갖고 있는 데이터에서 첫번째 값으로 초기화를 해주겠다

#72번 같은 오류를 방지할 수 있음

#따로따로 if문을 돌린다
for score in scores:
  if score > M:
    M = score
  if score < m:
    m = score
print("Max value :", M)
print("min value :", m)

#method. 2

#none 값을 넣으면 첫번째 만나는 score로 업데이트를 해주겠다
M, m = None, None

#if 문이 두개라면 별개의 문장이라 보면 된다.
for score in scores:
  if M == None or score > M:
    M = score
  if m == None or score < m:
    m = score

print("Max value :", M)
print("min value :", m)

#74. Min-max Normalization : Normalization 방법 중 하나

#숫자 - 최솟값 / 최댓값 - 최솟값
#0 ~ 1사이에 존재
#복습하기


#정규화 -> 다양한 방법론이 있음
# 1. Normalization
# 2. Standardization

#----
scores = [-20, 60, 40, 70, 120]

#method. 1

M, m = scores[0], scores[0]

for score in scores:
  if score > M:
    M = score
  if score < m:
    m = score


for score_idx in range(len(scores)):
  scores[score_idx] = (scores[score_idx] - m) / (M - m)

print("scores after normalization: \n", scores)

#----
#max - min normalization

#데이터 안의 값을 초기화
#이해가 잘 안간다
M, m = scores[0], scores[0]

for score in scores:
  if score > M:
    M = score
  if score < m:
    m = score
#max - min normalization의 최댓값은 1, 최솟값은 0이다
print("Max value: ", M)
print("Min value: ", m)

#75. 최댓값, 최솟값의 위치 구하기

# 복습하기 
scores = [60, -20, 40, 120, 70]
M, m = None, None
M_idx, m_idx = 0, 0

for score_idx in range(len(scores)):
  #score인덱스 값을 통해 순서를 뽑아내야 하니 score_idx가 필요
  score = scores[score_idx]

  if M == None or score > M:
    M = score
    M_idx = score_idx
  if m == None or score < m:
    m = score
    m_idx = score_idx

print('M/M_idx: ', M, M_idx)
print('m/m_idx: ', m, m_idx)

#75. 최댓값, 최솟값의 위치 구하기
# 복습하기 
scores = [60, -20, 40, 120, 70]

M, m = None, None
M_idx = 0
m_idx = 0

for score_idx in range(len(scores)):
   score = scores[score_idx]

    if M == None or score > M:
      M = score
      M_idx = score_idx
    if m == None or score < m:
      m = score
      m_idx = score_idx
print(M, M_idx)
print(m, m_idx)

#76. Sorting 정렬하기

#최댓값, 최솟값의 위치를 이용하여 List를 정렬하는 방법을 배워보자
## 졸라 복습

#선택정렬? 정렬의 종류가 많아서 우선 하기

scores = [40, 20, 30, 10, 50]
sorted_scores = []

# 1. 기준이 되는 첫번째 값을 가져오기
# 2. 

#이중포문 - 초기화 과정
for _ in range(len(scores)):
  #리스트 초기화 과정
  M, M_idx = scores[0], 0
  
  
  for score_idx in range(len(scores)):
    if scores[score_idx] > M:
      M = scores[score_idx]
      M_idx = score_idx
 
 #남은 것 때려박기
 #지우개
  tmp_scores = []

  for score_idx in range(len(scores)):
    if score_idx == M_idx:
      sorted_scores.append(scores[score_idx])
    else:
      tmp_scores.append(scores[score_idx])
  scores = tmp_scores
  print("remaining scores: ", scores)
  print("sorted scores: ", sorted_scores, '\n')

Pygmalion Dali

이전 포스트

다음 포스트

[python] 통계 지식 기반 연습 Day 1

python

[python] 통계 지식 기반 연습 Day 2

0개의 댓글