collection 내장 모듈의 namedtuple
사용을 고려하라dataclasses
내장 모듈을 사용하라. class
는 언제 쓸까? namedtuple
을 사용하는 모든 부분을 제어할 수 있는 상황이 아니라면, 명시적으로 새로운 class를 정의하는 편이 낫다.book = Gradebook()
albert = book.get_student('알버트 아인슈타인')
math = albert.get_subject('수학')
math.report_grade(75, 0.05)
math.report_grade(65, 0.15)
math.report_grade(70, 0.80)
gym = albert.get_subject('체육')
gym.report_grade(100, 0.40)
gym.report_grade(85, 0.60)
print(albert.average_grade())
class Gradebook:
def __init__(self):
self._students = defaultdict(Student)
def get_student(self, name):
return self._students[name]
class Student:
def __init__(self):
self._subjects = defaultdict(Subject)
def get_subject(self, name):
return self._subjects[name]
def average_grade(self):
total, count = 0, 0
for subject in self._subjects.values():
total += subject.average_grade()
count += 1
return total / count
class Subject:
def __init__(self):
self._grades = []
def report_grade(self, score, weight):
self._grades.append(Grade(score, weight))
def average_grade(self):
total, total_weight = 0, 0
for grade in self._grades:
total += grade.score * grade.weight
total_weight += grade.weight
return total / total_weight
from collections import namedtuple
Grade = namedtuple('Grade', ('score', 'weight'))
__call__
특별 메서드를 사용하면, class의 instance인 객체를 일반 파이썬 함수처럼 호출할 수 있다.__call__
메서드가 있는 클래스를 정의할지 고려해보자. closure 함수
파이썬 함수 = first-class citizen 객체
식(expression)
API(Application Programming Interface)
names = [‘asd’, ‘db’ ‘dsdf’]
names.sort(key=len)
>>>
[‘db’, ‘asd’, ‘dsdf’]
from collections import defaultdict
def log_missing():
print(‘키 추가됨‘)
return 0
current = {‘초록’: 12, ‘파랑’: 3}
increments = [(‘빨강‘, 5), (’파랑‘, 17), (‘주황’, 9), ]
result = defaultdict(log_missing, current)
for key, amount in increments:
result[key] += amount
>>>
키 추가됨
키 추가됨
이후: {‘초록’: 12, ‘파랑’: 20, ‘빨강’: 5, ‘주황’: 9}
class BetterCountMissing:
def __init__(self):
self.added = 0
def __call__(self):
self.added += 1
return 0
counter= BetterCountMissing()
assert counter() == 0
assert callable(counter)
counter = BetterCountMissing()
result = defaultdict(counter, current) #__call__ 에 의존함
for key, amount in increments:
result[key] += amount
assert counter.added == 2
__call__
메서드는 (API hook 처럼) 함수가 argument로 쓰일 수 있는 부분에, 이 클래스의 instance를 사용할 수 있다는 사실을 나타낸다.__call__
이라는 사실을 쉽게 알 수 있으며, 이 클래스를 만든 목적이 상태를 저장하는 closure 역할이라는 사실을 잘 알 수 있다.__init__
메서드뿐이다.
import os
import random
tmpdir = 'test_inputs'
write_test_files(tmpdir)
def write_test_files(tmpdir):
os.makedirs(tmpdir)
for i in range(100):
with open(os.path.join(tmpdir, str(i)), 'w') as f:
f.write('\n' * random.randint(0, 100))
result = mapreduce(tmpdir)
print(f'총 {result} 줄이 있습니다.')
def mapreduce(data_dir):
inputs = generate_inputs(data_dir)
workers = create_workers(inputs)
return execute(workers)
def generate_inputs(data_dir):
for name in os.listdir(data_dir):
yield PathInputData(os.path.join(data_dir, name))
class InputData:
def read(self):
raise NotImplementedError
class PathInputData(InputData):
def __init__(self, path):
super().__init__()
self.path = path
def read(self):
with open(self.path) as f:
return f.read()
def create_workers(input_list):
workers = []
for input_data in input_list:
workers.append(LineCountWorker(input_data))
return workers
class Worker:
def __init__(self, input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
class LineCountWorker(Worker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
from threading import Thread
def execute(workers):
threads = [Thread(target=w.map) for w in workers]
for thread in threads: thread.start()
for thread in threads: thread.join()
first, *rest = workers
for worker in rest:
first.reduce(worker)
return first.result
위 코드의 문제점
__init__
밖에 없다는 점이 문제다. 해결책
### GenericInputData와 PathInputData를 사용한 방법
config = {'data_dir': tmpdir}
result = mapreduce(LineCountWorker, PathInputData, config)
print(f'총 {result} 줄이 있습니다.')
def mapreduce(worker_class, input_class, config):
workers = worker_class.create_workers(input_class, config)
return execute(workers)
class LineCountWorker(GenericWorker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
class GenericWorker:
def __init__(self, input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
@classmethod
def create_workers(cls, input_class, config):
workers = []
for input_data in input_class.generate_inputs(config):
workers.append(cls(input_data))
return workers
class PathInputData(GenericInputData):
def __init__(self, path):
super().__init__()
self.path = path
def read(self):
with open(self.path) as f:
return f.read()
@classmethod
def generate_inputs(cls, config):
data_dir = config['data_dir']
for name in os.listdir(data_dir):
yield cls(os.path.join(data_dir, name))
class GenericInputData:
def read(self):
raise NotImplementedError
@classmethod
def generate_inputs(cls, config):
raise NotImplementedError