SQL 중급 튜토리얼: 실무 데이터 분석 마스터하기
SQL 중급 과정 소개
SQL 중급 과정에서는 기본적인 SELECT, INSERT, UPDATE, DELETE를 넘어서 실무에서 자주 사용되는 고급 쿼리 기법들을 학습합니다. 복잡한 비즈니스 로직을 SQL로 구현하고, 대용량 데이터를 효율적으로 처리하는 방법을 익히게 됩니다. 이 튜토리얼은 실제 비즈니스 시나리오를 기반으로 구성되어 있어 실무에 바로 적용 가능한 내용들로 구성되어 있습니다.
실습 환경 준비
이 튜토리얼의 예제는 대부분의 관계형 데이터베이스(MySQL, PostgreSQL, SQL Server, Oracle)에서 실행 가능하도록 작성되었습니다. 일부 문법은 데이터베이스별로 차이가 있을 수 있으며, 이러한 경우 주석으로 표시하였습니다.
-- 실습용 데이터베이스 및 테이블 생성
CREATE DATABASE IF NOT EXISTS sql_tutorial;
USE sql_tutorial;
-- 고객 테이블
CREATE TABLE customers (
customer_id INT PRIMARY KEY,
customer_name VARCHAR(100),
email VARCHAR(100),
registration_date DATE,
city VARCHAR(50),
customer_grade VARCHAR(20)
);
-- 제품 테이블
CREATE TABLE products (
product_id INT PRIMARY KEY,
product_name VARCHAR(100),
category VARCHAR(50),
price DECIMAL(10, 2),
stock_quantity INT
);
-- 주문 테이블
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT,
order_date DATETIME,
total_amount DECIMAL(10, 2),
order_status VARCHAR(20),
FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
);
-- 주문 상세 테이블
CREATE TABLE order_details (
order_detail_id INT PRIMARY KEY,
order_id INT,
product_id INT,
quantity INT,
unit_price DECIMAL(10, 2),
discount DECIMAL(5, 2),
FOREIGN KEY (order_id) REFERENCES orders(order_id),
FOREIGN KEY (product_id) REFERENCES products(product_id)
);
1. JOIN 심화: 다양한 테이블 결합
다중 테이블 JOIN
실무에서는 여러 테이블을 동시에 조인하여 복잡한 데이터를 추출하는 경우가 많습니다. 각 JOIN 유형의 특성을 이해하고 적절히 활용하는 것이 중요합니다.
-- INNER JOIN: 고객별 주문 내역과 제품 정보 조회
SELECT
c.customer_name,
c.city,
o.order_date,
p.product_name,
p.category,
od.quantity,
od.unit_price,
(od.quantity * od.unit_price * (1 - od.discount/100)) AS line_total
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_details od ON o.order_id = od.order_id
INNER JOIN products p ON od.product_id = p.product_id
WHERE o.order_date >= '2024-01-01'
ORDER BY o.order_date DESC;
-- LEFT JOIN: 주문하지 않은 고객 포함 조회
SELECT
c.customer_name,
c.registration_date,
COUNT(DISTINCT o.order_id) AS order_count,
COALESCE(SUM(o.total_amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.registration_date
HAVING order_count = 0
OR total_spent < 10000;
-- SELF JOIN: 같은 도시의 다른 고객 찾기
SELECT
c1.customer_name AS customer1,
c2.customer_name AS customer2,
c1.city
FROM customers c1
INNER JOIN customers c2
ON c1.city = c2.city
AND c1.customer_id < c2.customer_id
ORDER BY c1.city, c1.customer_name;
-- FULL OUTER JOIN (MySQL에서는 UNION으로 구현)
-- 모든 고객과 모든 제품의 조합 (구매 여부 상관없이)
SELECT
c.customer_name,
p.product_name,
COALESCE(SUM(od.quantity), 0) AS total_purchased
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
LEFT JOIN order_details od ON o.order_id = od.order_id
RIGHT JOIN products p ON od.product_id = p.product_id
GROUP BY c.customer_id, p.product_id
UNION
SELECT
c.customer_name,
p.product_name,
0 AS total_purchased
FROM customers c
CROSS JOIN products p
WHERE NOT EXISTS (
SELECT 1
FROM orders o
JOIN order_details od ON o.order_id = od.order_id
WHERE o.customer_id = c.customer_id
AND od.product_id = p.product_id
);
2. 서브쿼리와 상관 서브쿼리
서브쿼리는 다른 쿼리 내부에 포함된 SELECT 문으로, 복잡한 조건을 표현하거나 단계적인 데이터 처리를 수행할 때 유용합니다.
-- 스칼라 서브쿼리: 각 고객의 최근 주문 정보
SELECT
c.customer_name,
c.customer_grade,
(
SELECT MAX(order_date)
FROM orders o
WHERE o.customer_id = c.customer_id
) AS last_order_date,
(
SELECT COUNT(*)
FROM orders o
WHERE o.customer_id = c.customer_id
) AS total_orders
FROM customers c
WHERE c.customer_grade = 'Gold';
-- 인라인 뷰 서브쿼리: 평균 이상 구매 고객
SELECT
customer_summary.*,
CASE
WHEN total_spent > avg_spent * 2 THEN 'VIP'
WHEN total_spent > avg_spent THEN '우수'
ELSE '일반'
END AS customer_tier
FROM (
SELECT
c.customer_id,
c.customer_name,
SUM(o.total_amount) AS total_spent,
(
SELECT AVG(total_amount)
FROM (
SELECT SUM(total_amount) AS total_amount
FROM orders
GROUP BY customer_id
) avg_calc
) AS avg_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
) customer_summary;
-- 상관 서브쿼리: 각 카테고리에서 평균 가격보다 비싼 제품
SELECT
p1.product_name,
p1.category,
p1.price,
(
SELECT AVG(p2.price)
FROM products p2
WHERE p2.category = p1.category
) AS category_avg_price
FROM products p1
WHERE p1.price > (
SELECT AVG(p2.price)
FROM products p2
WHERE p2.category = p1.category
)
ORDER BY p1.category, p1.price DESC;
-- EXISTS를 사용한 서브쿼리: 특정 제품을 구매한 고객
SELECT DISTINCT
c.customer_name,
c.email
FROM customers c
WHERE EXISTS (
SELECT 1
FROM orders o
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
WHERE o.customer_id = c.customer_id
AND p.category = '전자제품'
AND o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
);
3. CTE(Common Table Expression)
CTE는 쿼리 내에서 임시로 사용할 수 있는 명명된 결과 집합입니다. 복잡한 쿼리를 더 읽기 쉽고 유지보수하기 쉽게 만들어줍니다.
-- 기본 CTE 사용법
WITH customer_orders AS (
SELECT
c.customer_id,
c.customer_name,
COUNT(o.order_id) AS order_count,
SUM(o.total_amount) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
)
SELECT
customer_name,
order_count,
total_spent,
RANK() OVER (ORDER BY total_spent DESC) AS spending_rank
FROM customer_orders
WHERE order_count > 0;
-- 다중 CTE 사용
WITH
monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(total_amount) AS monthly_total,
COUNT(DISTINCT customer_id) AS unique_customers
FROM orders
WHERE order_status = 'Completed'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
),
monthly_growth AS (
SELECT
month,
monthly_total,
unique_customers,
LAG(monthly_total) OVER (ORDER BY month) AS prev_month_total,
(100.0 * (monthly_total - LAG(monthly_total) OVER (ORDER BY month))
/ LAG(monthly_total) OVER (ORDER BY month)) AS growth_rate
FROM monthly_sales
)
SELECT
month,
monthly_total,
unique_customers,
ROUND(monthly_total / unique_customers, 2) AS avg_order_value,
COALESCE(ROUND(growth_rate, 2), 0) AS growth_rate_pct
FROM monthly_growth
ORDER BY month;
-- 재귀 CTE (계층 구조 데이터 처리)
-- 예: 추천인 체인 추적
WITH RECURSIVE referral_chain AS (
-- Anchor member: 최초 가입자
SELECT
customer_id,
customer_name,
referred_by,
0 AS level,
CAST(customer_id AS CHAR(200)) AS path
FROM customers
WHERE referred_by IS NULL
UNION ALL
-- Recursive member: 추천받은 고객
SELECT
c.customer_id,
c.customer_name,
c.referred_by,
rc.level + 1,
CONCAT(rc.path, ' -> ', c.customer_id)
FROM customers c
INNER JOIN referral_chain rc ON c.referred_by = rc.customer_id
WHERE rc.level < 5 -- 최대 5단계까지만
)
SELECT
level AS referral_level,
COUNT(*) AS customer_count,
GROUP_CONCAT(customer_name ORDER BY customer_name SEPARATOR ', ') AS customers
FROM referral_chain
GROUP BY level
ORDER BY level;
4. 윈도우 함수
윈도우 함수는 행 집합에 대해 계산을 수행하면서도 각 행을 개별적으로 유지할 수 있게 해주는 강력한 기능입니다.
-- ROW_NUMBER, RANK, DENSE_RANK 비교
SELECT
product_name,
category,
price,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS row_num,
RANK() OVER (PARTITION BY category ORDER BY price DESC) AS rank_num,
DENSE_RANK() OVER (PARTITION BY category ORDER BY price DESC) AS dense_rank_num
FROM products
ORDER BY category, price DESC;
-- 누적 합계와 이동 평균
SELECT
order_date,
total_amount,
SUM(total_amount) OVER (
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_sales,
AVG(total_amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7days,
COUNT(*) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS orders_in_window
FROM orders
WHERE order_status = 'Completed'
ORDER BY order_date;
-- LAG와 LEAD를 사용한 전후 비교
SELECT
customer_id,
order_date,
total_amount,
LAG(total_amount, 1) OVER (
PARTITION BY customer_id
ORDER BY order_date
) AS prev_order_amount,
LEAD(total_amount, 1) OVER (
PARTITION BY customer_id
ORDER BY order_date
) AS next_order_amount,
total_amount - LAG(total_amount, 1, 0) OVER (
PARTITION BY customer_id
ORDER BY order_date
) AS amount_change
FROM orders
ORDER BY customer_id, order_date;
-- NTILE을 사용한 분위수 계산
SELECT
customer_name,
total_spent,
NTILE(4) OVER (ORDER BY total_spent DESC) AS spending_quartile,
CASE NTILE(4) OVER (ORDER BY total_spent DESC)
WHEN 1 THEN '최상위 25%'
WHEN 2 THEN '상위 25-50%'
WHEN 3 THEN '하위 25-50%'
WHEN 4 THEN '최하위 25%'
END AS customer_segment
FROM (
SELECT
c.customer_name,
COALESCE(SUM(o.total_amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
) customer_spending;
5. PIVOT과 UNPIVOT
PIVOT은 행 데이터를 열로 변환하고, UNPIVOT은 열 데이터를 행으로 변환합니다. 데이터의 형태를 변경하여 분석하기 쉽게 만들어줍니다.
-- PIVOT 구현 (MySQL - CASE문 사용)
SELECT
category,
SUM(CASE WHEN MONTH(order_date) = 1 THEN total_sales ELSE 0 END) AS Jan,
SUM(CASE WHEN MONTH(order_date) = 2 THEN total_sales ELSE 0 END) AS Feb,
SUM(CASE WHEN MONTH(order_date) = 3 THEN total_sales ELSE 0 END) AS Mar,
SUM(CASE WHEN MONTH(order_date) = 4 THEN total_sales ELSE 0 END) AS Apr,
SUM(total_sales) AS Total
FROM (
SELECT
p.category,
o.order_date,
SUM(od.quantity * od.unit_price * (1 - od.discount/100)) AS total_sales
FROM order_details od
JOIN orders o ON od.order_id = o.order_id
JOIN products p ON od.product_id = p.product_id
WHERE YEAR(o.order_date) = 2024
GROUP BY p.category, o.order_date
) monthly_sales
GROUP BY category;
-- 동적 PIVOT (준비된 문 사용)
SET @sql = NULL;
SELECT
GROUP_CONCAT(DISTINCT
CONCAT(
'SUM(CASE WHEN city = ''',
city,
''' THEN total_amount ELSE 0 END) AS `',
city, '`'
)
) INTO @sql
FROM customers;
SET @sql = CONCAT('SELECT
DATE_FORMAT(order_date, ''%Y-%m'') AS month, ',
@sql, ',
SUM(total_amount) AS total
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
GROUP BY DATE_FORMAT(order_date, ''%Y-%m'')');
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
-- UNPIVOT 구현 (UNION ALL 사용)
SELECT
product_id,
'Q1' AS quarter,
q1_sales AS sales
FROM quarterly_sales
UNION ALL
SELECT
product_id,
'Q2' AS quarter,
q2_sales AS sales
FROM quarterly_sales
UNION ALL
SELECT
product_id,
'Q3' AS quarter,
q3_sales AS sales
FROM quarterly_sales
UNION ALL
SELECT
product_id,
'Q4' AS quarter,
q4_sales AS sales
FROM quarterly_sales;
6. 고급 그룹화: ROLLUP, CUBE, GROUPING SETS
고급 그룹화 기능을 사용하면 여러 수준의 집계를 한 번의 쿼리로 수행할 수 있습니다.
-- ROLLUP: 계층적 집계
SELECT
COALESCE(category, '전체') AS category,
COALESCE(DATE_FORMAT(order_date, '%Y-%m'), '소계') AS month,
COUNT(DISTINCT o.order_id) AS order_count,
SUM(od.quantity) AS total_quantity,
SUM(od.quantity * od.unit_price * (1 - od.discount/100)) AS total_sales
FROM order_details od
JOIN orders o ON od.order_id = o.order_id
JOIN products p ON od.product_id = p.product_id
WHERE o.order_date >= '2024-01-01'
GROUP BY category, DATE_FORMAT(order_date, '%Y-%m') WITH ROLLUP;
-- CUBE: 모든 조합의 집계 (MySQL 8.0 미지원, 대체 구현)
SELECT
category,
city,
SUM(total_amount) AS sales,
'Category-City' AS grouping_level
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
GROUP BY category, city
UNION ALL
SELECT
category,
NULL AS city,
SUM(total_amount) AS sales,
'Category-Total' AS grouping_level
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
GROUP BY category
UNION ALL
SELECT
NULL AS category,
city,
SUM(total_amount) AS sales,
'City-Total' AS grouping_level
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
GROUP BY city
UNION ALL
SELECT
NULL AS category,
NULL AS city,
SUM(total_amount) AS sales,
'Grand-Total' AS grouping_level
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id;
-- GROUPING SETS (PostgreSQL, SQL Server)
-- MySQL에서는 UNION ALL로 구현
SELECT
category,
customer_grade,
YEAR(order_date) AS year,
SUM(total_amount) AS total_sales
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
GROUP BY category, customer_grade, YEAR(order_date)
UNION ALL
SELECT
category,
NULL AS customer_grade,
YEAR(order_date) AS year,
SUM(total_amount) AS total_sales
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
GROUP BY category, YEAR(order_date);
7. 날짜/시간 함수 활용
날짜와 시간 데이터를 효과적으로 처리하는 것은 시계열 분석과 비즈니스 리포팅에서 매우 중요합니다.
-- 날짜 계산과 포맷팅
SELECT
order_date,
DATE_FORMAT(order_date, '%Y년 %m월 %d일') AS korean_date,
DATE_FORMAT(order_date, '%W') AS weekday_name,
DAYOFWEEK(order_date) AS weekday_num,
WEEK(order_date) AS week_of_year,
QUARTER(order_date) AS quarter,
DATEDIFF(CURDATE(), order_date) AS days_ago,
DATE_ADD(order_date, INTERVAL 7 DAY) AS expected_delivery
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY);
-- 기간별 집계
SELECT
CASE
WHEN HOUR(order_date) BETWEEN 6 AND 11 THEN '아침 (6-11시)'
WHEN HOUR(order_date) BETWEEN 12 AND 17 THEN '오후 (12-17시)'
WHEN HOUR(order_date) BETWEEN 18 AND 23 THEN '저녁 (18-23시)'
ELSE '새벽 (0-5시)'
END AS time_period,
COUNT(*) AS order_count,
AVG(total_amount) AS avg_order_value
FROM orders
GROUP BY time_period
ORDER BY
CASE time_period
WHEN '새벽 (0-5시)' THEN 1
WHEN '아침 (6-11시)' THEN 2
WHEN '오후 (12-17시)' THEN 3
WHEN '저녁 (18-23시)' THEN 4
END;
-- 코호트 분석을 위한 날짜 처리
WITH cohort_data AS (
SELECT
c.customer_id,
DATE_FORMAT(c.registration_date, '%Y-%m') AS cohort_month,
DATE_FORMAT(o.order_date, '%Y-%m') AS order_month,
PERIOD_DIFF(
DATE_FORMAT(o.order_date, '%Y%m'),
DATE_FORMAT(c.registration_date, '%Y%m')
) AS months_since_registration
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
)
SELECT
cohort_month,
months_since_registration,
COUNT(DISTINCT customer_id) AS active_customers
FROM cohort_data
WHERE months_since_registration >= 0
GROUP BY cohort_month, months_since_registration
ORDER BY cohort_month, months_since_registration;
8. 문자열 처리 고급 기법
문자열 데이터를 정제하고 변환하는 것은 데이터 품질 관리에 필수적입니다.
-- 문자열 조작 함수들
SELECT
customer_name,
UPPER(customer_name) AS upper_name,
LOWER(customer_name) AS lower_name,
LENGTH(customer_name) AS name_length,
SUBSTRING(customer_name, 1, 3) AS first_3_chars,
REPLACE(customer_name, ' ', '_') AS underscore_name,
CONCAT('고객: ', customer_name) AS formatted_name,
TRIM(customer_name) AS trimmed_name
FROM customers
LIMIT 10;
-- 정규 표현식 사용 (MySQL 8.0+)
SELECT
email,
CASE
WHEN email REGEXP '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
THEN '유효'
ELSE '무효'
END AS email_validity,
SUBSTRING_INDEX(email, '@', 1) AS username,
SUBSTRING_INDEX(email, '@', -1) AS domain,
SUBSTRING_INDEX(SUBSTRING_INDEX(email, '@', -1), '.', 1) AS domain_name
FROM customers
WHERE email IS NOT NULL;
-- 문자열 집계
SELECT
city,
GROUP_CONCAT(customer_name ORDER BY customer_name SEPARATOR ', ') AS customers_list,
COUNT(*) AS customer_count
FROM customers
GROUP BY city
HAVING customer_count >= 5;
-- JSON 데이터 처리 (MySQL 5.7+)
SELECT
customer_id,
JSON_OBJECT(
'name', customer_name,
'email', email,
'city', city,
'grade', customer_grade
) AS customer_json,
JSON_EXTRACT(
JSON_OBJECT('city', city, 'grade', customer_grade),
'$.city'
) AS extracted_city
FROM customers
LIMIT 5;
9. CASE문과 조건부 로직
CASE문을 활용하면 SQL 내에서 복잡한 비즈니스 로직을 구현할 수 있습니다.
-- 다중 조건 CASE문
SELECT
customer_name,
total_spent,
order_count,
CASE
WHEN total_spent > 1000000 AND order_count > 20 THEN 'VIP'
WHEN total_spent > 500000 OR order_count > 15 THEN 'Gold'
WHEN total_spent > 100000 OR order_count > 5 THEN 'Silver'
ELSE 'Bronze'
END AS calculated_grade,
CASE
WHEN last_order_date > DATE_SUB(CURDATE(), INTERVAL 30 DAY) THEN '활성'
WHEN last_order_date > DATE_SUB(CURDATE(), INTERVAL 90 DAY) THEN '휴면위험'
ELSE '휴면'
END AS activity_status
FROM (
SELECT
c.customer_id,
c.customer_name,
COUNT(o.order_id) AS order_count,
COALESCE(SUM(o.total_amount), 0) AS total_spent,
MAX(o.order_date) AS last_order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
) customer_summary;
-- CASE문을 사용한 동적 정렬
SET @sort_column = 'price'; -- 'name', 'price', 'stock' 중 선택
SET @sort_order = 'DESC'; -- 'ASC' 또는 'DESC'
SELECT
product_name,
price,
stock_quantity
FROM products
ORDER BY
CASE
WHEN @sort_column = 'name' AND @sort_order = 'ASC' THEN product_name
END ASC,
CASE
WHEN @sort_column = 'name' AND @sort_order = 'DESC' THEN product_name
END DESC,
CASE
WHEN @sort_column = 'price' AND @sort_order = 'ASC' THEN price
END ASC,
CASE
WHEN @sort_column = 'price' AND @sort_order = 'DESC' THEN price
END DESC;
-- CASE문을 활용한 피벗
SELECT
customer_id,
SUM(CASE WHEN category = '전자제품' THEN amount ELSE 0 END) AS electronics,
SUM(CASE WHEN category = '의류' THEN amount ELSE 0 END) AS clothing,
SUM(CASE WHEN category = '식품' THEN amount ELSE 0 END) AS food,
SUM(amount) AS total_amount
FROM (
SELECT
o.customer_id,
p.category,
od.quantity * od.unit_price * (1 - od.discount/100) AS amount
FROM orders o
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
) purchase_details
GROUP BY customer_id;
10. 쿼리 성능 최적화
대용량 데이터를 다룰 때는 쿼리 성능 최적화가 필수적입니다. 인덱스 활용과 쿼리 구조 개선을 통해 성능을 크게 향상시킬 수 있습니다.
-- 인덱스 생성
CREATE INDEX idx_orders_customer_date
ON orders(customer_id, order_date);
CREATE INDEX idx_order_details_order_product
ON order_details(order_id, product_id);
CREATE INDEX idx_products_category_price
ON products(category, price);
-- 실행 계획 확인
EXPLAIN
SELECT
c.customer_name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_date >= '2024-01-01'
GROUP BY c.customer_id;
-- 쿼리 최적화: EXISTS vs IN
-- 비효율적인 IN 서브쿼리
SELECT * FROM customers
WHERE customer_id IN (
SELECT customer_id FROM orders
WHERE total_amount > 100000
);
-- 효율적인 EXISTS
SELECT * FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.total_amount > 100000
);
-- 파티셔닝을 활용한 성능 개선
CREATE TABLE orders_partitioned (
order_id INT,
customer_id INT,
order_date DATE,
total_amount DECIMAL(10, 2)
) PARTITION BY RANGE (YEAR(order_date)) (
PARTITION p2022 VALUES LESS THAN (2023),
PARTITION p2023 VALUES LESS THAN (2024),
PARTITION p2024 VALUES LESS THAN (2025),
PARTITION p2025 VALUES LESS THAN MAXVALUE
);
-- 쿼리 힌트 사용 (MySQL)
SELECT /*+ MAX_EXECUTION_TIME(1000) */
c.customer_name,
o.order_date
FROM customers c
STRAIGHT_JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_date >= '2024-01-01';
11. 트랜잭션과 동시성 제어
데이터의 일관성과 무결성을 보장하기 위해 트랜잭션을 올바르게 사용하는 것이 중요합니다.
-- 트랜잭션 기본 사용
START TRANSACTION;
-- 주문 생성
INSERT INTO orders (customer_id, order_date, total_amount, order_status)
VALUES (1001, NOW(), 150000, 'Pending');
SET @order_id = LAST_INSERT_ID();
-- 주문 상세 추가
INSERT INTO order_details (order_id, product_id, quantity, unit_price, discount)
VALUES
(@order_id, 101, 2, 50000, 10),
(@order_id, 102, 1, 50000, 0);
-- 재고 감소
UPDATE products
SET stock_quantity = stock_quantity - 2
WHERE product_id = 101;
UPDATE products
SET stock_quantity = stock_quantity - 1
WHERE product_id = 102;
-- 재고 확인
SELECT product_id, stock_quantity
FROM products
WHERE product_id IN (101, 102);
-- 문제가 없으면 커밋, 문제가 있으면 롤백
COMMIT;
-- ROLLBACK;
-- 격리 수준 설정
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION;
-- 낙관적 잠금 (Optimistic Locking)
SELECT
product_id,
stock_quantity,
version
FROM products
WHERE product_id = 101;
-- 애플리케이션에서 처리 후
UPDATE products
SET
stock_quantity = stock_quantity - 5,
version = version + 1
WHERE product_id = 101
AND version = @current_version;
-- 비관적 잠금 (Pessimistic Locking)
SELECT * FROM products
WHERE product_id = 101
FOR UPDATE;
-- 데드락 방지를 위한 순서 지정
SELECT * FROM products
WHERE product_id IN (101, 102, 103)
ORDER BY product_id -- 항상 같은 순서로 잠금
FOR UPDATE;
12. 실전 예제: 종합 비즈니스 분석
지금까지 학습한 내용을 종합하여 실제 비즈니스 분석 시나리오를 구현해보겠습니다.
-- 종합 비즈니스 대시보드 쿼리
-- 1. 매출 트렌드와 성장률 분석
WITH monthly_metrics AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
COUNT(DISTINCT order_id) AS order_count,
COUNT(DISTINCT customer_id) AS unique_customers,
SUM(total_amount) AS revenue,
AVG(total_amount) AS avg_order_value
FROM orders
WHERE order_status = 'Completed'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
),
growth_analysis AS (
SELECT
month,
revenue,
order_count,
unique_customers,
avg_order_value,
LAG(revenue) OVER (ORDER BY month) AS prev_revenue,
100.0 * (revenue - LAG(revenue) OVER (ORDER BY month)) /
NULLIF(LAG(revenue) OVER (ORDER BY month), 0) AS revenue_growth
FROM monthly_metrics
)
SELECT
month,
FORMAT(revenue, 0) AS revenue,
order_count,
unique_customers,
ROUND(avg_order_value, 0) AS avg_order_value,
COALESCE(ROUND(revenue_growth, 1), 0) AS growth_rate
FROM growth_analysis
ORDER BY month DESC
LIMIT 12;
-- 2. 고객 세분화 및 RFM 분석
WITH rfm_data AS (
SELECT
c.customer_id,
c.customer_name,
c.customer_grade,
DATEDIFF(CURDATE(), MAX(o.order_date)) AS recency,
COUNT(DISTINCT o.order_id) AS frequency,
COALESCE(SUM(o.total_amount), 0) AS monetary
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.customer_grade
),
rfm_scores AS (
SELECT
*,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequency) AS f_score,
NTILE(5) OVER (ORDER BY monetary) AS m_score
FROM rfm_data
)
SELECT
CASE
WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN 'Champions'
WHEN r_score >= 3 AND f_score >= 3 AND m_score >= 4 THEN 'Loyal Customers'
WHEN r_score >= 3 AND f_score <= 2 AND m_score >= 3 THEN 'Potential Loyalists'
WHEN r_score >= 4 AND f_score <= 2 THEN 'New Customers'
WHEN r_score <= 2 AND f_score >= 3 THEN 'At Risk'
WHEN r_score <= 2 AND f_score <= 2 THEN 'Lost'
ELSE 'Others'
END AS customer_segment,
COUNT(*) AS customer_count,
ROUND(AVG(monetary), 0) AS avg_value,
ROUND(AVG(frequency), 1) AS avg_frequency
FROM rfm_scores
GROUP BY customer_segment
ORDER BY avg_value DESC;
-- 3. 제품 성과 분석
WITH product_performance AS (
SELECT
p.product_id,
p.product_name,
p.category,
p.price,
COUNT(DISTINCT od.order_id) AS order_count,
SUM(od.quantity) AS total_quantity,
SUM(od.quantity * od.unit_price * (1 - od.discount/100)) AS revenue,
AVG(od.discount) AS avg_discount
FROM products p
LEFT JOIN order_details od ON p.product_id = od.product_id
LEFT JOIN orders o ON od.order_id = o.order_id
WHERE o.order_date >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY p.product_id, p.product_name, p.category, p.price
)
SELECT
product_name,
category,
FORMAT(price, 0) AS price,
order_count,
total_quantity,
FORMAT(revenue, 0) AS revenue,
ROUND(avg_discount, 1) AS avg_discount,
RANK() OVER (PARTITION BY category ORDER BY revenue DESC) AS category_rank,
ROUND(100.0 * revenue / SUM(revenue) OVER (PARTITION BY category), 1) AS category_share
FROM product_performance
WHERE revenue > 0
ORDER BY category, category_rank
LIMIT 20;
-- 4. 코호트 분석
WITH cohort_items AS (
SELECT
c.customer_id,
DATE_FORMAT(c.registration_date, '%Y-%m') AS cohort_month,
o.order_id,
o.order_date,
PERIOD_DIFF(
DATE_FORMAT(o.order_date, '%Y%m'),
DATE_FORMAT(c.registration_date, '%Y%m')
) AS month_number
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
),
cohort_size AS (
SELECT
cohort_month,
COUNT(DISTINCT customer_id) AS num_users
FROM cohort_items
GROUP BY cohort_month
),
retention_table AS (
SELECT
ci.cohort_month,
ci.month_number,
COUNT(DISTINCT ci.customer_id) AS num_users
FROM cohort_items ci
WHERE ci.order_id IS NOT NULL
GROUP BY ci.cohort_month, ci.month_number
)
SELECT
r.cohort_month,
s.num_users AS cohort_size,
r.month_number,
r.num_users AS retained_users,
ROUND(100.0 * r.num_users / s.num_users, 1) AS retention_rate
FROM retention_table r
LEFT JOIN cohort_size s ON r.cohort_month = s.cohort_month
WHERE r.cohort_month >= '2024-01'
AND r.month_number <= 6
ORDER BY r.cohort_month, r.month_number;
-- 5. 최종 대시보드 요약
SELECT
'핵심 지표' AS metric_category,
'총 매출' AS metric_name,
FORMAT(SUM(total_amount), 0) AS metric_value
FROM orders
WHERE order_status = 'Completed'
AND YEAR(order_date) = YEAR(CURDATE())
UNION ALL
SELECT
'핵심 지표',
'총 주문 수',
FORMAT(COUNT(*), 0)
FROM orders
WHERE YEAR(order_date) = YEAR(CURDATE())
UNION ALL
SELECT
'핵심 지표',
'활성 고객 수',
FORMAT(COUNT(DISTINCT customer_id), 0)
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
UNION ALL
SELECT
'핵심 지표',
'평균 주문 금액',
FORMAT(AVG(total_amount), 0)
FROM orders
WHERE order_status = 'Completed'
AND YEAR(order_date) = YEAR(CURDATE());
마무리
이 튜토리얼에서는 SQL 중급 수준의 핵심 기능들을 실제 비즈니스 시나리오와 함께 살펴보았습니다. JOIN, 서브쿼리, CTE, 윈도우 함수 등의 고급 기능을 활용하면 복잡한 데이터 분석을 효율적으로 수행할 수 있습니다.
- 실제 데이터베이스에서 예제를 실행하며 실행 계획을 분석해보세요
- 각 데이터베이스 시스템(MySQL, PostgreSQL, SQL Server 등)의 고유 기능을 학습하세요
- 대용량 데이터 처리를 위한 파티셔닝과 인덱싱 전략을 깊이 있게 공부하세요
- NoSQL과 SQL을 함께 사용하는 하이브리드 아키텍처를 이해하세요
- 실무 프로젝트에서 쿼리 성능 최적화를 지속적으로 연습하세요