1) customers의 customerNumber를 조회하세요
#SQL 버전
SELECT customerNumber FROM customers;
#판다스 버전
customers['customerNumber']
2) payments의 amount의 총합과 checknumber개수를 구하세요
#SQL 버전 1
SELECT sum(amount), count(checknumber) FROM payments;
#SQL 버전 2
SELECT sum(amount) AS '총합', COUNT(checknumber) AS '결제 수' FROM payments;
#판다스 버전 1
pd.read_sql_query("SELECT SUM(amount), COUNT(checknumber) FROM payments",con=con)
#판다스 버전 2
print (payments['amount'].sum(), payments['checkNumber'].count())
3) products의 productName, productLine을 조회하세요
#SQL 버전
SELECT productName, productLine FROM products;
#판다스 버전
products[['productName','productLine']]
4) products의 productCode의 개수를 구하고, 칼럼 명을 n_product로 변경하세요
#SQL 버전
SELECT count(productCode) AS n_product FROM products;
#판다스 버전 1
pd.read_sql_query("SELECT SUM(amount), COUNT(checknumber) FROM payments",con=con)
#판다스 버전 2
print (payments['amount'].sum(), payments['checkNumber'].count())
5) orderdetails의 ordernumber의 중복을 제거하고 조회하세요
#SQL 버전
SELECT DISTINCT ordernumber FROM orderdetails;
#판다스 버전 1
orderdetails['orderNumber'].unique()
#판다스 버전 2
orderdetails['orderNumber'].drop_duplicates()
6) orderdetails의 priceeach가 30에서 50사이인 데이터를 조회하세요
#SQL 버전 1
SELECT * FROM orderdetails WHERE 30 <= priceeach <=50 ORDER BY 4 DESC;
#SQL 버전 2 (강사님픽★★)
SELECT * FROM orderdetails WHERE priceeach BETWEEN 30 AND 50;
#판다스 버전 1
orderdetails = pd.read_sql_query('select * from orderdetails', con =con)
#판다스 버전 2
orderdetails.query("priceEach >= 30 and priceEach <=50")
#판다스 버전 3
orderdetails[orderdetails['priceEach'].apply(lambda x : 30 <= x <= 50)]
7) orderdetails의 priceeach가 30 이상인 데이터를 조회하세요
#SQL 버전
SELECT * FROM orderdetails WHERE priceeach>= 30 ORDER BY 4;
#판다스 버전 1
orderdetails[orderdetails['priceEach'].apply(lambda x : x >= 30)]
#판다스 버전 2
orderdetails.where((orderdetails['priceEach'] >= 30)).dropna()
8) customers의 country가 USA 또는 Canada인 customernumber를 조회하세요
#SQL 버전 1
SELECT customernumber FROM customers WHERE country = 'USA' OR 'Canada';
#SQL 버전 2
SELECT customernumber FROM customers WHERE country IN ("USA","Canada");
customers = pd.read_sql_query('select country, customerNumber from customers' ,con = con)
# 판다스 버전
customers.loc[customers['country'].apply(lambda x : x == 'USA' or x == 'Canada'), 'customerNumber']
customers.where((customers['country'] == 'USA')|(customers['country']=='Canada')).dropna()
customers[customers['country'].isin(['USA', 'Canada'])]
9) customers의 country가 USA, Canada가 아닌 customernumber를 조회하세요
#SQL 버전 1
SELECT customernumber FROM customers WHERE country != 'USA' OR 'Canada';
#SQL 버전 2
SELECT customernumber FROM customers WHERE country not IN ("USA","Canada");
# 판다스 버전
customers[customers['country'].notin(['USA', 'Canada'])]
customers.loc[customers['country'].apply(lambda x : x != 'USA' and x != 'Canada'), 'customerNumber']
10) employees의 reportsTo의 값이 NULL인 employeenumber를 조회하세요
# sql 버전
SELECT employeenumber FROM employees WHERE reportsTo IS NULL;
# 판다스 버전
employees = pd.read_sql_query('select reportsTo, employeeNumber from employees', con = con)
employees.loc[employees['reportsTo'].isnull(), 'employeeNumber']
11) customers의 addressline1에 ST가 포함된 addressline1을 출력하세요
# sql 버전 1
SELECT addressline1 FROM customers WHERE addressline1 LIKE "%ST%";
# sql버전 2
SELECT addresslin1 FROM customers where addressline1 LIKE '%ST%' order by 1;
# 판다스 버전
customers = pd.read_sql_query('select addresline1 from customers', con = con)
customers[customers['addressline1'].str.find('St') > -1]
12) customers 테이블을 이용해 국가, 도시별 고객 수를 구하세요
# sql 버전 1
SELECT country, city, COUNT(*) AS cnt FROM customers GROUP BY country, city;
# sql 버전 2
SELECT country,city,COUNT(customernumber) FROM customers GROUP BY country,city;
# 판다스 버전
customers = pd.read_sql_query('select * from customers', con = con)
costomers.groupby(['country', 'city'])['customerId'].count()
13) customers, orders 테이블을 결합하고 ORDERNUMBER와 COUNTRY를 출력하세요(LEFT JOIN)
# sql 버전 1
SELECT ordernumber,country FROM customers A LEFT JOIN orders B ON A.customernumber = B.customerNumber;
# sql 버전 2
SELECT o.orderNumber, c.country FROM customers c LEFT JOIN orders o ON c.customerNumber = o.customerNumber;
# 판다스 버전
customers = pd.read_sql_query('select * from customers', con = con)
orders = pd.read_sql_query('select * from orders', con = con)
pd.merge(cutomers,order,left_on = customerNumber, right_on = customerNumber,how=left).loc[:, ['orderNumber', 'country']]
14) customers, orders 테이블을 이용해 USA 거주자의 주문 번호(OrderNumber), 국가(Country)를 출력하세요
# sql 버전 1
SELECT ordernumber,country FROM customers JOIN orders USING (customernumber) WHERE country = "USA" ; <-- 와 이렇게도 할 수 있구나 배워갑니당
# sql 버전 2
SELECT o.orderNumber, c.country FROM customers c LEFT JOIN orders o ON c.customerNumber = o.customerNumber WHERE c.country='USA';
# 판다스 버전
df = pd.merge(cutomers,order,left_on = customerNumber, right_on = customerNumber,how=left)
df.loc[df['country'].apply(lambda x : x == 'USA'), ['orderNumber', 'country']]
15) customers, orders 테이블을 이용해 USA 거주자의 주문 번호(CustomerNumber), 국가(Country)를 출력하세요(INNER JOIN 사용)
# sql 버전 1
SELECT c.CustomerNumber, c.country FROM customers c INNER JOIN orders o ON c.customerNumber = o.customerNumber WHERE c.country='USA';
# sql 버전 2
SELECT customernumber,country FROM customers inner JOIN orders USING (customernumber) WHERE country = "USA";
# 판다스 버전
df = pd.merge(cutomers,order,left_on = customerNumber, right_on = customerNumber,how='inner')
df.loc[df['country'].apply(lambda x : x == 'USA'), ['customerNumber', 'country']]