통계에 필요한 정보들
데이터의 표준적인 값 찾기.
#mean
mean = sum(num_friends) / len(num_friends)
print(f"mean:{mean}")
#median
num_friends_sorted = sorted(num_friends)
median = num_friends_sorted[len(num_friends_sorted)//2]
print(f"median: {median}")
#mode
cnt = Counter(num_friends)
mode = cnt.most_common(1)[0][0]
print(f"mode:{mode}")
#quantile
q25 = num_friends_sorted[int(len(num_friends_sorted)*0.25)]
q50 = num_friends_sorted[int(len(num_friends_sorted)*0.5)]
q75 = num_friends_sorted[int(len(num_friends_sorted)*0.75)]
print(f"quantile:{q25},{q50},{q75}")
데이터가 퍼져있는 정도
#min,max
minval = min(num_friends)
maxval = max(num_friends)
print(f"min:{minval}")
print(f"max:{maxval}")
#variance
var = sum ((x-mean)**2 for x in num_friends)/len(num_friends)
print(f"var:{var}")
print(f"stddev:{var**0.5}")
#iqr
print(f"iqr:{q75-q25}")
두가지의 확률변수의 상관관계를 나타내는 값.
#cov
cov = sum((k - meanx)*(l - meany) for k,l in zip(x,y))/len(x)
print(cov)
#corr
corr = cov/stdx/stdy
print(corr)