사전 지식
알고리즘
메인 알고리즘
- 15분마다 한 번씩, 모든 조합에 대해 scp 전송을 시도한다.
- 1시간마다 한 번씩, 성공적으로 전송된 조합을 초기화한다.
디테일
- 내 pc의
~/PycharmProjects/nl_navigation/nl_navigation/summaries/[target_branch__note_folder_name]
에 저장
- 원격 pc의
~/PycharmProjects/nl_navigation/nl_navigation/[target_branch]/[note_folder_name]/controller/summary/*
을 가져옴.
- port 3100번으로 해놓았음 일단.
사용법
python scp.py 0:target_branch1 1:target_branch2
TODO
- 서버의 학습이 종료된 것을 감지해서, tensorboard를 그 타이밍에 받아 올 수 있을까?
코드
import os
import subprocess
import time
from tqdm import tqdm
from typing import List, Tuple, Optional
import argparse
def get_target_pc_ip_and_port_and_user_name_and_target_base_folder(
target_pc_number: str) -> Tuple[Optional[str], Optional[List[int]]]:
target_base_folder = "~/PycharmProjects/nl_navigation/nl_navigation"
if target_pc_number == '0' or target_pc_number == '1':
return '10.60.188.51', [3100], "hsb", target_base_folder
elif target_pc_number == '2':
target_base_folder = "~/workspace/shared_storage/hsb/nl_navigation/nl_navigation"
return '10.60.79.96', [3100], "swdl", target_base_folder
elif target_pc_number == '3':
target_base_folder = "~/workspace/shared_storage/hsb/nl_navigation/nl_navigation"
return '10.60.79.31', [3100], "user", target_base_folder
elif target_pc_number == '4':
return 'homehsb.iptime.org', [3100], "hsb", target_base_folder
elif target_pc_number == '5':
return 'homehsb.iptime.org', [3101], "hsb", target_base_folder
else:
raise ValueError("target_pc_number는 0, 1, 2, 3,4, 5 이어야 합니다.")
def get_target_path(user_name:str, target_pc_ip: str, target_branch: str, port: int,
target_base: str) -> Tuple[str, str]:
target_path = f"{target_base}/{target_branch}"
command = f"ssh -p {port} {user_name}@{target_pc_ip} 'ls {target_path}'"
try:
print(f"[Try] to connect to {target_pc_ip}:{port} {target_path} ")
output = subprocess.check_output(command, shell=True,
text=True).splitlines()
note_folder_name = [
folder for folder in output if folder.startswith('note')
][0]
except IndexError:
raise ValueError(f"{target_path}에 'note'로 시작하는 폴더가 없습니다.")
except subprocess.CalledProcessError as e:
raise ConnectionError(f"{command}에 대한 ssh 연결이 안됩니다.")
target_path = f"{target_path}/{note_folder_name}/controller/summary/*"
return target_path, note_folder_name
def scp_transfer(scp_command: str) -> bool:
try:
subprocess.run(scp_command, shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"{scp_command} [전송 실패]: {e}")
return False
print(f"{scp_command} [전송 성공]")
return True
def scheduler(scp_commands: List[str],
copy_and_paste_command: Optional[str] = None) -> None:
""" scp 전송을 주기적으로 시도하는 스케줄러 함수.
Algorithm:
1. 15분마다 한 번씩, 모든 조합에 대해 scp 전송을 시도한다.
2. 1시간마다 한 번씩, 성공적으로 전송된 조합을 초기화한다.
"""
successful_scps = set()
copy_and_paste_success = False
scp_try_time = -100.
reset_time = time.time()
while True:
if time.time() - scp_try_time > 15 * 60:
scp_try_time = time.time()
print("----------[새로운 scp 전송 시도]----------")
for scp_command in scp_commands:
if scp_command in successful_scps:
continue
if scp_transfer(scp_command):
successful_scps.add(scp_command)
success_num = len(successful_scps)
print(f"성공적으로 전송된 조합 수: "
f"{success_num}/{len(scp_commands)}")
if copy_and_paste_command and copy_and_paste_success is False:
print("----------[새로운 copy and paste 시도]----------")
try:
subprocess.run(copy_and_paste_command,
shell=True,
check=True)
except subprocess.CalledProcessError as e:
print(f"{copy_and_paste_command} [전송 실패]: {e}")
else:
print(f"{copy_and_paste_command} [전송 성공]")
copy_and_paste_success = True
print("----------[scp 전송 시도 종료]----------")
if time.time() - reset_time > 60 * 60:
print("1시간이 지나서 성공적으로 전송된 조합을 초기화합니다.")
successful_scps = set()
copy_and_paste_success = False
reset_time = time.time()
else:
print("1분 쉽니다.")
time.sleep(60)
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="SCP transfer scheduler")
parser.add_argument(
"target_pc_numbers_and_branches",
type=str,
nargs='+',
help=
"Combinations of target_pc_number and target_branch. Each combination is separated by a colon. E.g., 0:branch1 1:branch2"
)
parser.add_argument(
"--my_save_base_folder",
default="~/PycharmProjects/nl_navigation/nl_navigation/summaries",
help="Local base save directory")
return parser.parse_args()
def set_my_save_path(my_save_base_folder: str, target_branch: str,
note_folder_name: str) -> str:
save_folder_name = f"{target_branch}__{note_folder_name}"
my_save_path = f"{my_save_base_folder}/{save_folder_name}"
my_save_path = os.path.expanduser(my_save_path)
if not os.path.exists(my_save_path):
print(f"{my_save_path} 폴더가 없어서 생성합니다.")
os.makedirs(my_save_path, exist_ok=True)
else:
print(f"{my_save_path} 폴더가 이미 있습니다.")
return my_save_path
if __name__ == "__main__":
args = parse_arguments()
target_pc_numbers_and_branches = [
tuple(comb.split(":")) for comb in args.target_pc_numbers_and_branches
]
scp_commands = []
copy_and_paste_command = None
print("---[scp 전송 명령어 생성]---")
for target_pc_number, target_branch in target_pc_numbers_and_branches:
if '/' in target_branch:
target_branch = target_branch.replace('/', '_')
target_pc_ip, ports, user_name, target_base_folder = get_target_pc_ip_and_port_and_user_name_and_target_base_folder(target_pc_number)
if ports:
for port in ports:
print("---[연결 시도]---")
try:
target_path, note_folder_name = get_target_path(user_name,
target_pc_ip, target_branch, port, target_base_folder)
except ConnectionError as e:
print(f"target_pc_number: {target_pc_number}, "
f"target_branch: {target_branch}에 대한 ssh 연결이 안됩니다.")
continue
print(f"[연결성공][{target_pc_number}] {target_pc_ip}:{port} {target_path} ")
target_address = f"{user_name}@{target_pc_ip}:{target_path}"
my_save_path = set_my_save_path(args.my_save_base_folder,
target_branch, note_folder_name)
scp_command = f"scp -P {port} -r {target_address} {my_save_path}"
print("[scp_command]:", scp_command, '\n')
scp_commands.append(scp_command)
break
else:
try:
print("---[로컬 pc의 tensorboard summary 폴더 복사 명령어 생성 시도]---")
target_base = "~/PycharmProjects/nl_navigation/nl_navigation"
target_path = f"{target_base}/{target_branch}"
command = f"ls {target_path}"
output = subprocess.check_output(command, shell=True,
text=True).splitlines()
note_folder_name = [
folder for folder in output if folder.startswith('note')
][0]
target_path = f"{target_path}/{note_folder_name}/controller/summary/*"
except IndexError:
raise ValueError(f"{target_path}에 'note'로 시작하는 폴더가 없습니다.")
my_save_path = set_my_save_path(args.my_save_base_folder,
target_branch, note_folder_name)
copy_and_paste_command = f"cp -R {target_path} {my_save_path}"
print("[copy_and_paste_command]:", copy_and_paste_command, '\n')
print(f"---[scp 전송 명령어 생성 완료] {len(scp_commands)}개---")
scheduler(scp_commands, copy_and_paste_command)