#!/bin/bash
# HDFS to S3 Migration Script
# Usage: sh migration_to_s3.sh <hdfs_paths_file>
# Check if input file is provided
if [ $# -eq 0 ]; then
echo "사용법: sh migration_to_s3.sh <hdfs_paths_file>"
echo "예시: sh migration_to_s3.sh hdfs_paths.txt"
exit 1
fi
HDFS_PATHS_FILE="$1"
# Check if input file exists
if [ ! -f "$HDFS_PATHS_FILE" ]; then
echo "오류: 파일 '$HDFS_PATHS_FILE'을 찾을 수 없습니다."
exit 1
fi
# Get target S3 path from user
echo -n "타겟 경로를 입력하세요 : "
read TARGET_S3_PATH
# Validate S3 path format
if [[ ! "$TARGET_S3_PATH" =~ ^s3a?:// ]]; then
echo "오류: 올바른 S3 경로를 입력하세요 (s3:// 또는 s3a://로 시작)"
exit 1
fi
# Remove trailing slash from target path if exists
TARGET_S3_PATH=$(echo "$TARGET_S3_PATH" | sed 's/\/$//')
# Generate timestamp for log file
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_FILE="/tmp/distcp_${TIMESTAMP}.log"
echo "=== HDFS to S3 Migration 시작 ==="
echo "입력 파일: $HDFS_PATHS_FILE"
echo "타겟 경로: $TARGET_S3_PATH"
echo "로그 파일: $LOG_FILE"
echo "================================"
# Create migration script
MIGRATION_SCRIPT="/tmp/run_distcp_${TIMESTAMP}.sh"
cat > "$MIGRATION_SCRIPT" << 'EOF'
#!/bin/bash
echo "=== HDFS to S3 Migration 로그 ===" >> LOG_PLACEHOLDER
echo "시작 시간: $(date)" >> LOG_PLACEHOLDER
echo "=================================" >> LOG_PLACEHOLDER
TOTAL_COUNT=0
SUCCESS_COUNT=0
FAIL_COUNT=0
while IFS= read -r hdfs_path || [ -n "$hdfs_path" ]; do
# Skip empty lines and comments
if [[ -z "$hdfs_path" ]] || [[ "$hdfs_path" =~ ^[[:space:]]*# ]]; then
continue
fi
# Remove leading/trailing whitespace
hdfs_path=$(echo "$hdfs_path" | xargs)
# Extract the last directory/file name from HDFS path
BASENAME=$(basename "$hdfs_path")
# Construct target path
TARGET_FULL_PATH="TARGET_S3_PLACEHOLDER/$BASENAME"
# Construct distcp command
DISTCP_CMD="hadoop distcp -update -skipcrccheck \"$hdfs_path\" \"$TARGET_FULL_PATH\""
echo "----------------------------------------" >> LOG_PLACEHOLDER
echo "처리 중: $hdfs_path" >> LOG_PLACEHOLDER
echo "타겟: $TARGET_FULL_PATH" >> LOG_PLACEHOLDER
echo "명령어: $DISTCP_CMD" >> LOG_PLACEHOLDER
echo "시작 시간: $(date)" >> LOG_PLACEHOLDER
TOTAL_COUNT=$((TOTAL_COUNT + 1))
# Execute distcp command
if eval "$DISTCP_CMD" >> LOG_PLACEHOLDER 2>&1; then
echo "✓ 성공: $hdfs_path" >> LOG_PLACEHOLDER
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
else
echo "✗ 실패: $hdfs_path" >> LOG_PLACEHOLDER
FAIL_COUNT=$((FAIL_COUNT + 1))
fi
echo "완료 시간: $(date)" >> LOG_PLACEHOLDER
done < "HDFS_FILE_PLACEHOLDER"
echo "========== Migration 완료 ==========" >> LOG_PLACEHOLDER
echo "전체: $TOTAL_COUNT" >> LOG_PLACEHOLDER
echo "성공: $SUCCESS_COUNT" >> LOG_PLACEHOLDER
echo "실패: $FAIL_COUNT" >> LOG_PLACEHOLDER
echo "종료 시간: $(date)" >> LOG_PLACEHOLDER
echo "====================================" >> LOG_PLACEHOLDER
EOF
# Replace placeholders in the migration script
sed -i "s|LOG_PLACEHOLDER|$LOG_FILE|g" "$MIGRATION_SCRIPT"
sed -i "s|TARGET_S3_PLACEHOLDER|$TARGET_S3_PATH|g" "$MIGRATION_SCRIPT"
sed -i "s|HDFS_FILE_PLACEHOLDER|$HDFS_PATHS_FILE|g" "$MIGRATION_SCRIPT"
# Make the migration script executable
chmod +x "$MIGRATION_SCRIPT"
# Show preview of commands to be executed
echo ""
echo "=== 실행될 명령어 미리보기 (처음 5개) ==="
head -5 "$HDFS_PATHS_FILE" | while IFS= read -r hdfs_path || [ -n "$hdfs_path" ]; do
if [[ -n "$hdfs_path" ]] && [[ ! "$hdfs_path" =~ ^[[:space:]]*# ]]; then
hdfs_path=$(echo "$hdfs_path" | xargs)
basename_path=$(basename "$hdfs_path")
echo "hadoop distcp -update -skipcrccheck \"$hdfs_path\" \"$TARGET_S3_PATH/$basename_path\""
fi
done
echo "================================"
echo ""
# Ask for confirmation
echo -n "위의 명령어들을 백그라운드에서 실행하시겠습니까? (y/N): "
read -r CONFIRM
if [[ "$CONFIRM" =~ ^[Yy]$ ]]; then
# Execute in background with nohup
echo "백그라운드에서 마이그레이션을 시작합니다..."
nohup "$MIGRATION_SCRIPT" > /dev/null 2>&1 &
BACKGROUND_PID=$!
echo "마이그레이션이 백그라운드에서 실행 중입니다."
echo "프로세스 ID: $BACKGROUND_PID"
echo "로그 파일: $LOG_FILE"
echo ""
echo "진행상황 확인: tail -f $LOG_FILE"
echo "프로세스 확인: ps -p $BACKGROUND_PID"
echo "프로세스 종료: kill $BACKGROUND_PID"
# Clean up temporary migration script after a delay
(sleep 5 && rm -f "$MIGRATION_SCRIPT") &
else
echo "마이그레이션이 취소되었습니다."
rm -f "$MIGRATION_SCRIPT"
fi