본 포스팅은 YBIGTA DA 19기 이재영 팀장님의 발제 자료를 바탕으로 작성되었습니다.
왜 SQL을 공부해야 하는가?
데이터 분석에서 제일 중요한 것은 목적에 부합하는 데이터들을 알맞게 추출하는 것인데, 이 때 SQL이 큰 역할을 한다. 대부분의 회사, 정부기관, 은행 등의 주요 데이터들은 RDBMS의 형태로 저장 및 관리되고 있다. 앞서 말했듯, SQL은 이러한 RDBMS의 데이터 관리에 최적화된 언어이다. SQL을 통해 필요에 맞는 데이터를 쉽고 빠르게 추출할 수 있는 동시에, 데이터를 추출하는 과정에서 데이터 자체에 대한 이해도도 높일 수 있다. 이것이야말로 일석이조?
SQL을 통해 다양한 데이터에 접근이 가능 + SQL을 통해 추출한 데이터를 이용해 올바른 분석이 가능!
select 1+2;
select 열 이름 # 열 여러 개일 경우 쉼표(,) 사용
from 테이블명;
select 열 이름
from 테이블명
where 조건
group by 열 이름
having 검색 조건(about group by)
order by 열 이름(asc/desc or 1, 2);
# 집계함수: sum, avg, count, max, min 등
# where 조건: 비교(=, <>, >=, >, <=, <), 범위(between a and b),포함(in, not in), 패턴(like, not like), null(is null, is not null), 여러개 묶기(and, or, not)
select * from 테이블명; # 전체 선택
select distinct 열 이름 from 테이블명; # 중복 제거
select 열 이름 as ___ from 테이블명; # 필드 명 재설정
Join(조인)은 한 데이터베이스 내의 여러 테이블의 레코드를 조합하여 하나의 열로 표현한 것이다. 따라서 조인은 테이블로서 저장되거나, 그 자체로 이용할 수 있는 결과 셋을 만들어 낸다. (위키백과)
MySQL을 통해 실습 진행
데이터: MySQL Sample Database
요구사항으로부터 얻어낸 정보들을 개체(Entity), 애트리뷰트(Attribute), 관계성(Relation)으로 기술하는 데이터 모델
show tables from classicmodels;
select count(distinct customerNumber) from customers;
select count(customerNumber) from customers
where contactLastName = 'Brown';
select count(customerNumber) from customers where phone like "4%";
select distinct country from customers;
select salesRepEmployeeNumber, count(distinct customerNumber) as 담당고객수 from customers
where salesRepEmployeeNumber is not null group by salesRepEmployeeNumber;
select salesRepEmployeeNumber, count(distinct customerNumber) as 담당고객수 from customers
where salesRepEmployeeNumber is not null group by salesRepEmployeeNumber
having count(distinct customerNumber) >= 7;
select extract(year from orderDate) as year, extract(month from orderDate) as month,
count(distinct customerNumber) as 고객수 from orders
group by 1, 2
order by 1, 2;
# 다른 풀이
select date_format(orderDate, '%y-%m') as '연-월',
count(orderNumber) as '고객 수'
from orders group by 1;
select extract(year from orderDate) as year,
extract(month from orderDate) as month,
count(distinct officeCode) as office개수 from orders
left join customers on orders.customerNumber = customers.customerNumber
left join employees on customers.salesRepEmployeeNumber = employees.employeeNumber
group by 1, 2
order by 1, 2;
select extract(year from orderDate) as year,
extract(month from orderDate) as month,
count(distinct orderNumber) as 주문건수,
avg(shippedDate-orderDate) as 평균소요일
from orders
where extract(year from orderDate) = 2003
group by 1, 2
order by 1, 2;
select country, count(distinct orderNumber) as 주문건수,
avg(shippedDate-orderDate) as 평균소요일 from orders
left join customers on orders.customerNumber = customers.customerNumber
group by 1;
select extract(year from orderDate) as year,
extract(month from orderDate) as month,
count(distinct orderNumber) as 주문건수,
avg(shippedDate-orderDate) as 평균소요일
from orders
left join customers on orders.customerNumber = customers.customerNumber
where country = "USA"
group by 1, 2;
select extract(year from paymentDate) as year,
extract(month from paymentDate) as month,
sum(amount) as '매출 합' from payments group by 1, 2;
select extract(year from paymentDate) as year, extract(month from paymentDate) as month,
sum(amount) as '매출 합' from payments
left join customers on payments.customerNumber = customers.customerNumber
where country = 'USA'
group by 1, 2 order by 1, 2;
select country, extract(year from paymentDate) as year, extract(month from paymentDate) as month,
sum(amount) as '매출 합' from payments
left join customers on payments.customerNumber = customers.customerNumber
group by 1, 2, 3 order by 1, 2, 3;
select customerName, country, phone from customers
where customerNumber in (select customerNumber from payments group by 1 having sum(amount) >= 100000);
select salestable.year, salestable.month, 월매출, 월주문수
from (select extract(year from paymentDate) as year,
extract(month from paymentDate) as month, sum(amount) as 월매출
from payments group by 1, 2) as salestable
left join (select extract(year from orderDate) as year,
extract(month from orderDate) as month, count(distinct orderNumber) as 월주문수
from orders group by 1, 2)
as ordertable on salestable.year = ordertable.year and salestable.month = ordertable.month
order by 1, 2;