s3 migration 3

진웅·2025년 7월 22일

minIO

목록 보기
10/21
#!/bin/bash

# HDFS to S3 Migration Script
# Usage: sh migration_to_s3.sh <hdfs_paths_file>

# Check if input file is provided
if [ $# -eq 0 ]; then
    echo "사용법: sh migration_to_s3.sh <hdfs_paths_file>"
    echo "예시: sh migration_to_s3.sh hdfs_paths.txt"
    exit 1
fi

HDFS_PATHS_FILE="$1"

# Check if input file exists
if [ ! -f "$HDFS_PATHS_FILE" ]; then
    echo "오류: 파일 '$HDFS_PATHS_FILE'을 찾을 수 없습니다."
    exit 1
fi

# Get target S3 path from user
echo -n "타겟 경로를 입력하세요 : "
read TARGET_S3_PATH

# Validate S3 path format
if [[ ! "$TARGET_S3_PATH" =~ ^s3a?:// ]]; then
    echo "오류: 올바른 S3 경로를 입력하세요 (s3:// 또는 s3a://로 시작)"
    exit 1
fi

# Remove trailing slash from target path if exists
TARGET_S3_PATH=$(echo "$TARGET_S3_PATH" | sed 's/\/$//')

# Generate timestamp for log file
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_FILE="/tmp/distcp_${TIMESTAMP}.log"

echo "=== HDFS to S3 Migration 시작 ==="
echo "입력 파일: $HDFS_PATHS_FILE"
echo "타겟 경로: $TARGET_S3_PATH"
echo "로그 파일: $LOG_FILE"
echo "================================"

# Create migration script
MIGRATION_SCRIPT="/tmp/run_distcp_${TIMESTAMP}.sh"

cat > "$MIGRATION_SCRIPT" << 'EOF'
#!/bin/bash
echo "=== HDFS to S3 Migration 로그 ===" >> LOG_PLACEHOLDER
echo "시작 시간: $(date)" >> LOG_PLACEHOLDER
echo "=================================" >> LOG_PLACEHOLDER

TOTAL_COUNT=0
SUCCESS_COUNT=0
FAIL_COUNT=0

while IFS= read -r hdfs_path || [ -n "$hdfs_path" ]; do
    # Skip empty lines and comments
    if [[ -z "$hdfs_path" ]] || [[ "$hdfs_path" =~ ^[[:space:]]*# ]]; then
        continue
    fi
    
    # Remove leading/trailing whitespace
    hdfs_path=$(echo "$hdfs_path" | xargs)
    
    # Extract the last directory/file name from HDFS path
    BASENAME=$(basename "$hdfs_path")
    
    # Construct target path
    TARGET_FULL_PATH="TARGET_S3_PLACEHOLDER/$BASENAME"
    
    # Construct distcp command
    DISTCP_CMD="hadoop distcp -update -skipcrccheck \"$hdfs_path\" \"$TARGET_FULL_PATH\""
    
    echo "----------------------------------------" >> LOG_PLACEHOLDER
    echo "처리 중: $hdfs_path" >> LOG_PLACEHOLDER
    echo "타겟: $TARGET_FULL_PATH" >> LOG_PLACEHOLDER
    echo "명령어: $DISTCP_CMD" >> LOG_PLACEHOLDER
    echo "시작 시간: $(date)" >> LOG_PLACEHOLDER
    
    TOTAL_COUNT=$((TOTAL_COUNT + 1))
    
    # Execute distcp command
    if eval "$DISTCP_CMD" >> LOG_PLACEHOLDER 2>&1; then
        echo "✓ 성공: $hdfs_path" >> LOG_PLACEHOLDER
        SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
    else
        echo "✗ 실패: $hdfs_path" >> LOG_PLACEHOLDER
        FAIL_COUNT=$((FAIL_COUNT + 1))
    fi
    
    echo "완료 시간: $(date)" >> LOG_PLACEHOLDER
    
done < "HDFS_FILE_PLACEHOLDER"

echo "========== Migration 완료 ==========" >> LOG_PLACEHOLDER
echo "전체: $TOTAL_COUNT" >> LOG_PLACEHOLDER
echo "성공: $SUCCESS_COUNT" >> LOG_PLACEHOLDER
echo "실패: $FAIL_COUNT" >> LOG_PLACEHOLDER
echo "종료 시간: $(date)" >> LOG_PLACEHOLDER
echo "====================================" >> LOG_PLACEHOLDER
EOF

# Replace placeholders in the migration script
sed -i "s|LOG_PLACEHOLDER|$LOG_FILE|g" "$MIGRATION_SCRIPT"
sed -i "s|TARGET_S3_PLACEHOLDER|$TARGET_S3_PATH|g" "$MIGRATION_SCRIPT"
sed -i "s|HDFS_FILE_PLACEHOLDER|$HDFS_PATHS_FILE|g" "$MIGRATION_SCRIPT"

# Make the migration script executable
chmod +x "$MIGRATION_SCRIPT"

# Show preview of commands to be executed
echo ""
echo "=== 실행될 명령어 미리보기 (처음 5개) ==="
head -5 "$HDFS_PATHS_FILE" | while IFS= read -r hdfs_path || [ -n "$hdfs_path" ]; do
    if [[ -n "$hdfs_path" ]] && [[ ! "$hdfs_path" =~ ^[[:space:]]*# ]]; then
        hdfs_path=$(echo "$hdfs_path" | xargs)
        basename_path=$(basename "$hdfs_path")
        echo "hadoop distcp -update -skipcrccheck \"$hdfs_path\" \"$TARGET_S3_PATH/$basename_path\""
    fi
done
echo "================================"
echo ""

# Ask for confirmation
echo -n "위의 명령어들을 백그라운드에서 실행하시겠습니까? (y/N): "
read -r CONFIRM

if [[ "$CONFIRM" =~ ^[Yy]$ ]]; then
    # Execute in background with nohup
    echo "백그라운드에서 마이그레이션을 시작합니다..."
    nohup "$MIGRATION_SCRIPT" > /dev/null 2>&1 &
    BACKGROUND_PID=$!
    
    echo "마이그레이션이 백그라운드에서 실행 중입니다."
    echo "프로세스 ID: $BACKGROUND_PID"
    echo "로그 파일: $LOG_FILE"
    echo ""
    echo "진행상황 확인: tail -f $LOG_FILE"
    echo "프로세스 확인: ps -p $BACKGROUND_PID"
    echo "프로세스 종료: kill $BACKGROUND_PID"
    
    # Clean up temporary migration script after a delay
    (sleep 5 && rm -f "$MIGRATION_SCRIPT") &
else
    echo "마이그레이션이 취소되었습니다."
    rm -f "$MIGRATION_SCRIPT"
fi
profile
bytebliss

0개의 댓글