airflow 에서 lambda 함수를 호출 할 때, path list가 너무 긴 경우 read timeout이 발생하여 path list의 크기가 20 이하가 되도록 account가 key 값, path list가 value 값인 딕셔너리 형태를 만들 때 활용한 코드입니다.
# 경로에서 유니크한 계정 뽑아내기
account_list = []
for path in path_list:
account_path = re.search(r"accountId%3D[a-zA-z0-9]+", path)
account_list.append(account_path.group().replace("accountId%3D", ""))
unique_accounts = set(account_list)
print("number of unique accounts : {}".format(len(unique_accounts)))
# 계정별 경로 값 json 형태로 만들기
result_path_by_accounts = {}
max_len = 20
for account in unique_accounts:
path_by_account_list = []
for path in path_list:
if "accountId%3D" + account in path:
path_by_account_list.append(path)
for offset in range(0, len(path_by_account_list), max_len):
result_path_by_accounts[account + "_" + str(offset//max_len)] = path_by_account_list[offset : max_len + offset]
js = json.dumps(result_path_by_accounts)
Variable.set("path_by_accounts", js)