해당 문서는 data-engineering-project-beginner를 set_up_infra를 진행하던 도중, wsl에서는 chmod 등 에러 발생으로 이를 위해 aws cli도 공부할 겸 이를 정리하고자 한다.
aws s3api create-bucket --acl public-read-write --region $AWS_REGION --bucket $BUCKET_NAME
aws sts get-caller-identity
여기서는 AWS_ID라는 변수로 저장
AWS_ID=$(aws sts get-caller-identity --query Account --output text | cat)
aws iam create-role --role-name $EC2_IAM_ROLE --assume-role-policy-document file://trust-policy.json --description 'EC2 access to S3' --output text >> setup.log
aws iam attach-role-policy --role-name $EC2_IAM_ROLE --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess
aws iam create-instance-profile --instance-profile-name $EC2_IAM_ROLE-instance-profile --output text >> setup.log
aws iam add-role-to-instance-profile --role-name $EC2_IAM_ROLE --instance-profile-name $EC2_IAM_ROLE-instance-profile --output text >> setup.log
aws iam delete-instance-profile --instance-profile-name $EC2_IAM_ROLE-instance-profile
aws iam remove-role-from-instance-profile --role-name $EC2_IAM_ROLE --instance-profile-name $EC2_IAM_ROLE-instance-profile
aws ec2 create-key-pair --key-name sde-key --query "KeyMaterial" --output text --region $AWS_REGION > sde-key.pem
EC2_SECURITY_GROUP_ID=$(aws ec2 create-security-group --description "Security group to allow inbound SCP connection" --group-name $EC2_SECURITY_GROUP --output text)
aws ec2 authorize-security-group-ingress --group-id $EC2_SECURITY_GROUP_ID --protocol tcp --port 22 --cidr $MY_IP/24
aws ec2 authorize-security-group-egress --group-id $EC2_SECURITY_GROUP_ID --protocol tcp --port 8080 --cidr $MY_IP/32
aws ec2 run-instances --image-id $EC2_IMAGE_ID --instance-type $AWS_EC2_INSTANCE --count 1 --key-name sde-key --user-data file://setup_ubuntu_docker.txt --tag-specifications 'ResourceType=instance,Tags=[{Key=Name,Value='$AWS_EC2_INSTANCE_NAME'}]' --region $AWS_REGION >> setup.log
ami-07d16c043aa8e5153
)로 진행t2.medium
로 진행EC2_ID=$(aws --region $AWS_REGION ec2 describe-instances --filters "Name=instance-state-name,Values=running" "Name=tag:Name,Values=$AWS_EC2_INSTANCE_NAME" --query 'Reservations[*].Instances[*].[InstanceId]' --output text)
aws ec2 describe-instance-status --instance-ids $EC2_ID --query 'InstanceStatuses[0].InstanceState.Name' --output text
EC2_IPV4=$(aws --region $AWS_REGION ec2 describe-instances --filters "Name=instance-state-name,Values=running" "Name=instance-id,Values=$EC2_ID" --query 'Reservations[*].Instances[*].[PublicDnsName]' --output text)
aws ec2 modify-instance-attribute --instance-id $EC2_ID --groups $EC2_SECURITY_GROUP_ID --output text
aws ec2 associate-iam-instance-profile --instance-id $EC2_ID --iam-instance-profile Name=$EC2_IAM_ROLE-instance-profile --output text >> setup.log
aws emr create-default-roles
aws emr create-cluster --applications Name=Hadoop Name=Spark --release-label emr-6.2.0 --name $SERVICE_NAME --scale-down-behavior TERMINATE_AT_TASK_COMPLETION --service-role EMR_DefaultRole --instance-groups '[
{
"InstanceCount": 1, # master node
"EbsConfiguration": {
"EbsBlockDeviceConfigs": [
{
"VolumeSpecification": {
"SizeInGB": 32,
"VolumeType": "gp2"
},
"VolumesPerInstance": 2
}
]
},
"InstanceGroupType": "MASTER",
"InstanceType": "'$EMR_NODE_TYPE'",
"Name": "Master - 1"
},
{
"InstanceCount": 1,
"BidPrice": "OnDemandPrice",
"EbsConfiguration": {
"EbsBlockDeviceConfigs": [
{
"VolumeSpecification": {
"SizeInGB": 32,
"VolumeType": "gp2"
},
"VolumesPerInstance": 2
}
]
},
"InstanceGroupType": "CORE",
"InstanceType": "'$EMR_NODE_TYPE'",
"Name": "Core - 1"
}
]' >> setup.log
(차피 날릴거라서 id 공유되어도 문제 없음)
aws redshift create-cluster --cluster-identifier $SERVICE_NAME --node-type dc2.large --master-username $REDSHIFT_USER --master-user-password $REDSHIFT_PASSWORD --cluster-type single-node --publicly-accessible --iam-roles "arn:aws:iam::"$AWS_ID":role/"$IAM_ROLE_NAME"" >> setup.log
aws redshift describe-clusters --cluster-identifier $SERVICE_NAME --query 'Clusters[0].ClusterStatus' --output text
aws redshift describe-clusters --cluster-identifier $SERVICE_NAME --query 'Clusters[0].Endpoint.Address' --output text
먼저 설치 필요
sudo apt-get install postgresql postgresql-contrib
psql -f ./redshift_setup.sql postgres://$REDSHIFT_USER:$REDSHIFT_PASSWORD@$REDSHIFT_HOST:$REDSHIFT_PORT/dev
여기서 삽질 했는데, Connection timed out
이 발생 했었다.
이를 해결하기 위해, 인바운드, 아웃바운드 규칙 설정을 변경이 필요