SQL 고급 쿼리 튜토리얼: 전문가를 위한 완벽 가이드
SQL 고급 과정 소개
SQL 고급 과정은 기본적인 SELECT, INSERT, UPDATE, DELETE를 넘어서 데이터베이스의 강력한 기능들을 활용하는 방법을 다룹니다. 복잡한 비즈니스 로직을 구현하고, 대용량 데이터를 효율적으로 처리하며, 데이터베이스 성능을 최적화하는 기술들을 학습합니다. 이 튜토리얼은 실무에서 바로 활용할 수 있는 고급 SQL 기법들을 실제 예제와 함께 제공합니다.
실습 환경 설정
이 튜토리얼의 예제들은 PostgreSQL 15+ 또는 MySQL 8.0+에서 테스트되었습니다. 일부 기능은 특정 DBMS에서만 지원될 수 있으므로, 사용하는 데이터베이스 시스템의 문서를 참고하시기 바랍니다.
-- 실습용 데이터베이스 및 테이블 생성
CREATE DATABASE advanced_sql_tutorial;
USE advanced_sql_tutorial;
-- 직원 테이블
CREATE TABLE employees (
employee_id INT PRIMARY KEY,
name VARCHAR(100),
department_id INT,
manager_id INT,
salary DECIMAL(10, 2),
hire_date DATE,
performance_score DECIMAL(3, 2)
);
-- 부서 테이블
CREATE TABLE departments (
department_id INT PRIMARY KEY,
department_name VARCHAR(100),
location VARCHAR(100),
budget DECIMAL(12, 2)
);
-- 판매 테이블
CREATE TABLE sales (
sale_id INT PRIMARY KEY,
product_id INT,
customer_id INT,
employee_id INT,
sale_date DATE,
quantity INT,
amount DECIMAL(10, 2),
region VARCHAR(50)
);
1. 윈도우 함수 (Window Functions)
순위 함수 (Ranking Functions)
윈도우 함수는 행 집합에 대해 계산을 수행하면서도 GROUP BY처럼 행을 그룹화하지 않습니다. 각 행은 별도로 유지되면서 집계 결과를 함께 볼 수 있습니다.
-- ROW_NUMBER, RANK, DENSE_RANK 비교
SELECT
employee_id,
name,
department_id,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
RANK() OVER (ORDER BY salary DESC) AS rank_num,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank_num,
NTILE(4) OVER (ORDER BY salary DESC) AS quartile
FROM employees
ORDER BY salary DESC;
-- 부서별 급여 순위
SELECT
e.name,
d.department_name,
e.salary,
RANK() OVER (PARTITION BY e.department_id ORDER BY e.salary DESC) AS dept_salary_rank,
PERCENT_RANK() OVER (PARTITION BY e.department_id ORDER BY e.salary) AS percentile
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE e.salary IS NOT NULL;
분석 함수 (Analytic Functions)
분석 함수는 현재 행을 기준으로 다른 행들의 값을 참조하거나 집계를 수행할 수 있습니다.
-- LAG과 LEAD를 사용한 전월/익월 비교
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', sale_date) AS month,
SUM(amount) AS total_sales
FROM sales
GROUP BY DATE_TRUNC('month', sale_date)
)
SELECT
month,
total_sales,
LAG(total_sales, 1) OVER (ORDER BY month) AS prev_month_sales,
LEAD(total_sales, 1) OVER (ORDER BY month) AS next_month_sales,
total_sales - LAG(total_sales, 1) OVER (ORDER BY month) AS month_over_month_change,
ROUND(
(total_sales - LAG(total_sales, 1) OVER (ORDER BY month)) /
LAG(total_sales, 1) OVER (ORDER BY month) * 100,
2
) AS growth_rate_percent
FROM monthly_sales
ORDER BY month;
-- 누적 합계와 이동 평균
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS cumulative_sales,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7days,
COUNT(*) OVER (
ORDER BY sale_date
RANGE BETWEEN INTERVAL '30 days' PRECEDING AND CURRENT ROW
) AS sales_last_30days
FROM sales
WHERE sale_date >= '2024-01-01'
ORDER BY sale_date;
2. CTE (Common Table Expressions)
CTE는 쿼리 내에서 임시 결과 집합을 정의하여 복잡한 쿼리를 더 읽기 쉽고 관리하기 쉽게 만듭니다.
-- 기본 CTE 사용법
WITH high_performers AS (
SELECT
employee_id,
name,
performance_score
FROM employees
WHERE performance_score >= 4.0
),
department_stats AS (
SELECT
department_id,
AVG(salary) AS avg_salary,
COUNT(*) AS emp_count
FROM employees
GROUP BY department_id
)
SELECT
hp.name,
hp.performance_score,
e.salary,
ds.avg_salary AS dept_avg_salary,
e.salary - ds.avg_salary AS salary_diff_from_avg
FROM high_performers hp
JOIN employees e ON hp.employee_id = e.employee_id
JOIN department_stats ds ON e.department_id = ds.department_id
ORDER BY hp.performance_score DESC;
-- 다중 레벨 CTE
WITH quarterly_sales AS (
SELECT
EXTRACT(YEAR FROM sale_date) AS year,
EXTRACT(QUARTER FROM sale_date) AS quarter,
region,
SUM(amount) AS total_sales
FROM sales
GROUP BY
EXTRACT(YEAR FROM sale_date),
EXTRACT(QUARTER FROM sale_date),
region
),
ranked_regions AS (
SELECT
year,
quarter,
region,
total_sales,
RANK() OVER (
PARTITION BY year, quarter
ORDER BY total_sales DESC
) AS region_rank
FROM quarterly_sales
),
top_regions AS (
SELECT *
FROM ranked_regions
WHERE region_rank <= 3
)
SELECT
year,
quarter,
STRING_AGG(
region || ': $' || ROUND(total_sales, 2),
', '
ORDER BY region_rank
) AS top_3_regions
FROM top_regions
GROUP BY year, quarter
ORDER BY year, quarter;
3. 재귀 쿼리 (Recursive Queries)
재귀 CTE를 사용하면 계층적 데이터나 그래프 구조를 탐색할 수 있습니다.
-- 조직 계층 구조 탐색
WITH RECURSIVE org_hierarchy AS (
-- Anchor member: 최상위 관리자
SELECT
employee_id,
name,
manager_id,
1 AS level,
name AS path,
CAST(employee_id AS VARCHAR(1000)) AS hierarchy_path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive member
SELECT
e.employee_id,
e.name,
e.manager_id,
oh.level + 1,
oh.path || ' > ' || e.name,
oh.hierarchy_path || '/' || CAST(e.employee_id AS VARCHAR(1000))
FROM employees e
INNER JOIN org_hierarchy oh ON e.manager_id = oh.employee_id
)
SELECT
level,
REPEAT(' ', level - 1) || name AS org_chart,
path AS reporting_line
FROM org_hierarchy
ORDER BY hierarchy_path;
-- 재귀를 사용한 날짜 시퀀스 생성
WITH RECURSIVE date_series AS (
SELECT
DATE '2024-01-01' AS date
UNION ALL
SELECT
date + INTERVAL '1 day'
FROM date_series
WHERE date < DATE '2024-12-31'
)
SELECT
ds.date,
COALESCE(COUNT(s.sale_id), 0) AS sales_count,
COALESCE(SUM(s.amount), 0) AS total_sales
FROM date_series ds
LEFT JOIN sales s ON ds.date = s.sale_date
GROUP BY ds.date
ORDER BY ds.date;
4. PIVOT과 UNPIVOT
PIVOT은 행 데이터를 열로 변환하고, UNPIVOT은 열 데이터를 행으로 변환합니다.
-- PIVOT 구현 (CASE WHEN 사용)
SELECT
region,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 1
THEN amount ELSE 0 END) AS Q1,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 2
THEN amount ELSE 0 END) AS Q2,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 3
THEN amount ELSE 0 END) AS Q3,
SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 4
THEN amount ELSE 0 END) AS Q4,
SUM(amount) AS total_year
FROM sales
WHERE EXTRACT(YEAR FROM sale_date) = 2024
GROUP BY region
ORDER BY total_year DESC;
-- 동적 PIVOT (PostgreSQL의 crosstab 사용)
CREATE EXTENSION IF NOT EXISTS tablefunc;
SELECT * FROM crosstab(
$$SELECT
department_name,
EXTRACT(YEAR FROM hire_date)::text AS year,
COUNT(*) AS emp_count
FROM employees e
JOIN departments d ON e.department_id = d.department_id
GROUP BY department_name, EXTRACT(YEAR FROM hire_date)
ORDER BY 1, 2$$,
$$SELECT DISTINCT EXTRACT(YEAR FROM hire_date)::text
FROM employees ORDER BY 1$$
) AS pivot_table(
department VARCHAR(100),
year_2020 INT,
year_2021 INT,
year_2022 INT,
year_2023 INT,
year_2024 INT
);
-- UNPIVOT 구현 (UNION ALL 사용)
WITH quarterly_data AS (
SELECT
region,
2024 AS year,
100000 AS Q1,
120000 AS Q2,
110000 AS Q3,
130000 AS Q4
FROM (VALUES ('North'), ('South'), ('East'), ('West')) AS t(region)
)
SELECT
region,
year,
quarter,
sales
FROM (
SELECT region, year, 'Q1' AS quarter, Q1 AS sales FROM quarterly_data
UNION ALL
SELECT region, year, 'Q2', Q2 FROM quarterly_data
UNION ALL
SELECT region, year, 'Q3', Q3 FROM quarterly_data
UNION ALL
SELECT region, year, 'Q4', Q4 FROM quarterly_data
) unpivoted
ORDER BY region, quarter;
5. JSON 데이터 처리
현대 데이터베이스는 JSON 데이터를 네이티브하게 지원하여 NoSQL의 유연성과 SQL의 강력함을 결합할 수 있습니다.
-- JSON 데이터 테이블 생성
CREATE TABLE product_catalog (
product_id INT PRIMARY KEY,
product_name VARCHAR(200),
specifications JSON,
metadata JSONB -- PostgreSQL의 바이너리 JSON
);
-- JSON 데이터 삽입
INSERT INTO product_catalog VALUES
(1, 'Laptop Pro',
'{"cpu": "Intel i7", "ram": "16GB", "storage": {"type": "SSD", "size": "512GB"}}',
'{"categories": ["Electronics", "Computers"], "tags": ["premium", "business"]}'),
(2, 'Smartphone X',
'{"screen": "6.5 inch", "camera": {"front": "12MP", "rear": "48MP"}, "battery": "4000mAh"}',
'{"categories": ["Electronics", "Mobile"], "tags": ["5G", "flagship"]}');
-- JSON 데이터 쿼리
SELECT
product_name,
specifications->'cpu' AS cpu,
specifications->'storage'->'size' AS storage_size,
jsonb_array_elements_text(metadata->'categories') AS category
FROM product_catalog
WHERE specifications->>'ram' = '16GB';
-- JSON 배열 처리
WITH product_tags AS (
SELECT
product_name,
jsonb_array_elements_text(metadata->'tags') AS tag
FROM product_catalog
)
SELECT
tag,
STRING_AGG(product_name, ', ') AS products
FROM product_tags
GROUP BY tag;
-- JSON 데이터 업데이트
UPDATE product_catalog
SET metadata = jsonb_set(
metadata,
'{price}',
'1299.99'
)
WHERE product_id = 1;
-- JSON 집계
SELECT
json_build_object(
'total_products', COUNT(*),
'products', json_agg(
json_build_object(
'id', product_id,
'name', product_name,
'specs', specifications
)
)
) AS product_summary
FROM product_catalog;
6. 테이블 파티셔닝과 분할
대용량 테이블을 효율적으로 관리하기 위한 파티셔닝 전략을 구현합니다.
-- 범위 파티셔닝 (Range Partitioning)
CREATE TABLE sales_partitioned (
sale_id BIGINT,
sale_date DATE,
product_id INT,
amount DECIMAL(10, 2),
region VARCHAR(50)
) PARTITION BY RANGE (sale_date);
-- 월별 파티션 생성
CREATE TABLE sales_2024_01 PARTITION OF sales_partitioned
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE sales_2024_02 PARTITION OF sales_partitioned
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
CREATE TABLE sales_2024_03 PARTITION OF sales_partitioned
FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');
-- 리스트 파티셔닝 (List Partitioning)
CREATE TABLE sales_by_region (
sale_id BIGINT,
sale_date DATE,
amount DECIMAL(10, 2),
region VARCHAR(50)
) PARTITION BY LIST (region);
CREATE TABLE sales_north PARTITION OF sales_by_region
FOR VALUES IN ('North', 'Northeast', 'Northwest');
CREATE TABLE sales_south PARTITION OF sales_by_region
FOR VALUES IN ('South', 'Southeast', 'Southwest');
-- 해시 파티셔닝 (Hash Partitioning)
CREATE TABLE customer_data (
customer_id BIGINT,
customer_name VARCHAR(200),
registration_date DATE,
total_purchases DECIMAL(12, 2)
) PARTITION BY HASH (customer_id);
CREATE TABLE customer_data_p0 PARTITION OF customer_data
FOR VALUES WITH (modulus 4, remainder 0);
CREATE TABLE customer_data_p1 PARTITION OF customer_data
FOR VALUES WITH (modulus 4, remainder 1);
-- 파티션 관리
ALTER TABLE sales_partitioned DETACH PARTITION sales_2024_01;
ALTER TABLE sales_partitioned ATTACH PARTITION sales_2024_01
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
7. 쿼리 최적화 기법
성능을 극대화하기 위한 고급 쿼리 최적화 기법들을 다룹니다.
-- 실행 계획 분석
EXPLAIN ANALYZE
SELECT
e.name,
d.department_name,
e.salary
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE e.salary > 50000;
-- 쿼리 힌트 사용 (Oracle/SQL Server)
SELECT /*+ INDEX(e emp_salary_idx) PARALLEL(e, 4) */
e.employee_id,
e.name,
e.salary
FROM employees e
WHERE e.salary BETWEEN 50000 AND 100000;
-- EXISTS vs IN 최적화
-- EXISTS (일반적으로 더 빠름)
SELECT e.*
FROM employees e
WHERE EXISTS (
SELECT 1
FROM sales s
WHERE s.employee_id = e.employee_id
AND s.amount > 10000
);
-- 서브쿼리 최적화 (JOIN으로 변환)
-- 비효율적인 서브쿼리
SELECT
e.name,
(SELECT COUNT(*) FROM sales s WHERE s.employee_id = e.employee_id) AS sale_count
FROM employees e;
-- 최적화된 JOIN
SELECT
e.name,
COUNT(s.sale_id) AS sale_count
FROM employees e
LEFT JOIN sales s ON e.employee_id = s.employee_id
GROUP BY e.employee_id, e.name;
-- MATERIALIZED VIEW 활용
CREATE MATERIALIZED VIEW mv_monthly_sales_summary AS
SELECT
DATE_TRUNC('month', sale_date) AS month,
region,
product_id,
COUNT(*) AS sale_count,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount
FROM sales
GROUP BY DATE_TRUNC('month', sale_date), region, product_id;
CREATE INDEX idx_mv_month_region ON mv_monthly_sales_summary(month, region);
-- 정기적으로 MATERIALIZED VIEW 갱신
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales_summary;
8. 고급 인덱싱 전략
효과적인 인덱스 설계와 관리를 통해 쿼리 성능을 극대화합니다.
-- 복합 인덱스 (Composite Index)
CREATE INDEX idx_emp_dept_salary
ON employees(department_id, salary DESC, hire_date);
-- 커버링 인덱스 (Covering Index)
CREATE INDEX idx_sales_covering
ON sales(employee_id, sale_date)
INCLUDE (amount, product_id);
-- 부분 인덱스 (Partial Index)
CREATE INDEX idx_active_employees
ON employees(salary)
WHERE hire_date >= '2023-01-01';
-- 함수 기반 인덱스 (Function-based Index)
CREATE INDEX idx_upper_name
ON employees(UPPER(name));
-- GIN 인덱스 (JSON/Array 데이터)
CREATE INDEX idx_product_metadata
ON product_catalog USING GIN (metadata);
-- 전문 검색 인덱스
CREATE INDEX idx_product_search
ON products USING GIN (to_tsvector('english', product_description));
-- 인덱스 사용 통계 확인
SELECT
schemaname,
tablename,
indexname,
idx_scan AS index_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan DESC;
-- 중복/미사용 인덱스 찾기
WITH index_usage AS (
SELECT
schemaname,
tablename,
indexname,
idx_scan,
pg_relation_size(indexrelid) AS index_size
FROM pg_stat_user_indexes
)
SELECT
schemaname,
tablename,
indexname,
idx_scan,
pg_size_pretty(index_size) AS size,
CASE
WHEN idx_scan = 0 THEN 'UNUSED'
WHEN idx_scan < 100 THEN 'RARELY USED'
ELSE 'ACTIVE'
END AS usage_status
FROM index_usage
ORDER BY idx_scan;
9. 트랜잭션과 격리 수준
데이터 일관성과 동시성을 관리하기 위한 트랜잭션 제어 기법을 다룹니다.
-- 트랜잭션 격리 수준 설정
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
BEGIN TRANSACTION;
-- SAVEPOINT 사용
SAVEPOINT before_update;
UPDATE employees
SET salary = salary * 1.1
WHERE performance_score >= 4.0;
-- 조건부 롤백
SELECT COUNT(*) INTO @affected_rows
FROM employees
WHERE performance_score >= 4.0;
IF @affected_rows > 100 THEN
ROLLBACK TO SAVEPOINT before_update;
ELSE
RELEASE SAVEPOINT before_update;
END IF;
COMMIT;
-- 데드락 감지 및 처리
DO $$
DECLARE
v_retries INT := 0;
v_max_retries INT := 3;
BEGIN
WHILE v_retries < v_max_retries LOOP
BEGIN
-- 트랜잭션 로직
UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
EXIT; -- 성공시 루프 종료
EXCEPTION
WHEN deadlock_detected THEN
v_retries := v_retries + 1;
RAISE NOTICE 'Deadlock detected, retry % of %', v_retries, v_max_retries;
PERFORM pg_sleep(0.1 * v_retries);
END;
END LOOP;
END $$;
-- 낙관적 잠금 (Optimistic Locking)
UPDATE products
SET
stock_quantity = stock_quantity - 10,
version = version + 1,
last_modified = CURRENT_TIMESTAMP
WHERE
product_id = 123
AND version = 5 -- 버전 체크
RETURNING *;
10. 저장 프로시저와 함수
복잡한 비즈니스 로직을 데이터베이스 레벨에서 구현합니다.
-- 저장 프로시저 생성
CREATE OR REPLACE PROCEDURE process_monthly_bonuses(
p_month DATE,
p_bonus_rate DECIMAL
)
LANGUAGE plpgsql
AS $$
DECLARE
v_total_bonus DECIMAL(12, 2) := 0;
v_employee_count INT := 0;
v_employee RECORD;
BEGIN
-- 트랜잭션 시작
BEGIN
-- 각 직원의 보너스 계산
FOR v_employee IN
SELECT
e.employee_id,
e.salary,
e.performance_score,
SUM(s.amount) AS total_sales
FROM employees e
LEFT JOIN sales s ON e.employee_id = s.employee_id
AND DATE_TRUNC('month', s.sale_date) = DATE_TRUNC('month', p_month)
GROUP BY e.employee_id, e.salary, e.performance_score
LOOP
DECLARE
v_bonus DECIMAL(10, 2);
BEGIN
-- 보너스 계산 로직
v_bonus := v_employee.salary * p_bonus_rate;
IF v_employee.performance_score >= 4.5 THEN
v_bonus := v_bonus * 1.5;
ELSIF v_employee.performance_score >= 3.5 THEN
v_bonus := v_bonus * 1.2;
END IF;
IF v_employee.total_sales > 1000000 THEN
v_bonus := v_bonus + 5000;
END IF;
-- 보너스 기록
INSERT INTO bonus_payments (
employee_id,
bonus_month,
bonus_amount,
processed_date
) VALUES (
v_employee.employee_id,
p_month,
v_bonus,
CURRENT_TIMESTAMP
);
v_total_bonus := v_total_bonus + v_bonus;
v_employee_count := v_employee_count + 1;
END;
END LOOP;
-- 로그 기록
INSERT INTO process_log (
process_name,
process_date,
parameters,
result
) VALUES (
'process_monthly_bonuses',
CURRENT_TIMESTAMP,
json_build_object('month', p_month, 'rate', p_bonus_rate),
json_build_object('total_bonus', v_total_bonus, 'employee_count', v_employee_count)
);
RAISE NOTICE 'Processed % employees with total bonus: %', v_employee_count, v_total_bonus;
EXCEPTION
WHEN OTHERS THEN
RAISE EXCEPTION 'Error processing bonuses: %', SQLERRM;
ROLLBACK;
END;
END;
$$;
-- 테이블 반환 함수
CREATE OR REPLACE FUNCTION get_department_statistics(
p_start_date DATE,
p_end_date DATE
)
RETURNS TABLE (
department_name VARCHAR,
total_employees BIGINT,
avg_salary NUMERIC,
total_sales NUMERIC,
top_performer VARCHAR
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
WITH dept_sales AS (
SELECT
e.department_id,
SUM(s.amount) AS total_sales
FROM sales s
JOIN employees e ON s.employee_id = e.employee_id
WHERE s.sale_date BETWEEN p_start_date AND p_end_date
GROUP BY e.department_id
),
top_performers AS (
SELECT DISTINCT ON (department_id)
department_id,
name AS top_performer
FROM employees
ORDER BY department_id, performance_score DESC
)
SELECT
d.department_name,
COUNT(DISTINCT e.employee_id),
ROUND(AVG(e.salary), 2),
COALESCE(ds.total_sales, 0),
tp.top_performer
FROM departments d
LEFT JOIN employees e ON d.department_id = e.department_id
LEFT JOIN dept_sales ds ON d.department_id = ds.department_id
LEFT JOIN top_performers tp ON d.department_id = tp.department_id
GROUP BY d.department_id, d.department_name, ds.total_sales, tp.top_performer;
END;
$$;
11. 트리거와 이벤트
데이터 변경을 자동으로 감지하고 처리하는 트리거를 구현합니다.
-- 감사(Audit) 트리거
CREATE TABLE audit_log (
audit_id SERIAL PRIMARY KEY,
table_name VARCHAR(100),
operation VARCHAR(10),
user_name VARCHAR(100),
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
old_data JSONB,
new_data JSONB
);
CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
INSERT INTO audit_log (table_name, operation, user_name, new_data)
VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(NEW));
RETURN NEW;
ELSIF TG_OP = 'UPDATE' THEN
INSERT INTO audit_log (table_name, operation, user_name, old_data, new_data)
VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(OLD), to_jsonb(NEW));
RETURN NEW;
ELSIF TG_OP = 'DELETE' THEN
INSERT INTO audit_log (table_name, operation, user_name, old_data)
VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(OLD));
RETURN OLD;
END IF;
END;
$$;
CREATE TRIGGER employees_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON employees
FOR EACH ROW
EXECUTE FUNCTION audit_trigger_function();
-- 비즈니스 규칙 트리거
CREATE OR REPLACE FUNCTION validate_salary_increase()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
DECLARE
v_increase_percentage DECIMAL;
BEGIN
IF NEW.salary > OLD.salary THEN
v_increase_percentage := ((NEW.salary - OLD.salary) / OLD.salary) * 100;
IF v_increase_percentage > 50 THEN
RAISE EXCEPTION 'Salary increase cannot exceed 50%';
END IF;
IF NEW.performance_score < 3.0 AND v_increase_percentage > 10 THEN
RAISE EXCEPTION 'Employees with performance score < 3.0 cannot receive > 10% raise';
END IF;
END IF;
RETURN NEW;
END;
$$;
CREATE TRIGGER validate_salary_trigger
BEFORE UPDATE ON employees
FOR EACH ROW
WHEN (NEW.salary <> OLD.salary)
EXECUTE FUNCTION validate_salary_increase();
-- 이벤트 스케줄러 (MySQL)
CREATE EVENT daily_cleanup
ON SCHEDULE EVERY 1 DAY
STARTS '2024-01-01 02:00:00'
DO
BEGIN
-- 30일 이상 된 로그 삭제
DELETE FROM audit_log
WHERE timestamp < DATE_SUB(NOW(), INTERVAL 30 DAY);
-- 통계 테이블 업데이트
CALL update_daily_statistics();
END;
12. 실전 예제: 종합 데이터 분석 시스템
지금까지 배운 모든 고급 SQL 기법을 활용한 종합적인 데이터 분석 시스템을 구현합니다.
-- 종합 분석 대시보드 쿼리
WITH
-- 1. 기간별 매출 트렌드
sales_trend AS (
SELECT
DATE_TRUNC('month', sale_date) AS month,
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(*) AS transaction_count,
SUM(amount) AS total_revenue,
AVG(amount) AS avg_transaction_value,
SUM(SUM(amount)) OVER (
ORDER BY DATE_TRUNC('month', sale_date)
ROWS UNBOUNDED PRECEDING
) AS cumulative_revenue
FROM sales
WHERE sale_date >= DATE_TRUNC('year', CURRENT_DATE)
GROUP BY DATE_TRUNC('month', sale_date)
),
-- 2. 직원 성과 분석
employee_performance AS (
SELECT
e.employee_id,
e.name,
e.department_id,
COUNT(s.sale_id) AS sales_count,
SUM(s.amount) AS total_sales,
AVG(s.amount) AS avg_sale_amount,
RANK() OVER (
PARTITION BY e.department_id
ORDER BY SUM(s.amount) DESC
) AS dept_rank,
PERCENTILE_CONT(0.5) WITHIN GROUP (
ORDER BY s.amount
) AS median_sale
FROM employees e
LEFT JOIN sales s ON e.employee_id = s.employee_id
AND s.sale_date >= DATE_TRUNC('quarter', CURRENT_DATE)
GROUP BY e.employee_id, e.name, e.department_id
),
-- 3. 제품 카테고리 분석
product_analysis AS (
SELECT
p.category,
COUNT(DISTINCT s.product_id) AS products_sold,
SUM(s.quantity) AS units_sold,
SUM(s.amount) AS revenue,
SUM(s.amount) / SUM(SUM(s.amount)) OVER () * 100 AS revenue_percentage,
ARRAY_AGG(
DISTINCT p.product_name
ORDER BY p.product_name
LIMIT 5
) AS top_products
FROM sales s
JOIN products p ON s.product_id = p.product_id
GROUP BY p.category
),
-- 4. 고객 세분화 (RFM 분석)
customer_rfm AS (
SELECT
customer_id,
MAX(sale_date) AS last_purchase_date,
CURRENT_DATE - MAX(sale_date) AS recency_days,
COUNT(*) AS frequency,
SUM(amount) AS monetary_value,
NTILE(5) OVER (ORDER BY CURRENT_DATE - MAX(sale_date) DESC) AS r_score,
NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
FROM sales
GROUP BY customer_id
),
-- 5. 이상치 탐지
anomaly_detection AS (
SELECT
sale_date,
amount,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 30 PRECEDING AND 1 PRECEDING
) AS moving_avg,
STDDEV(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 30 PRECEDING AND 1 PRECEDING
) AS moving_stddev
FROM sales
)
-- 최종 대시보드 출력
SELECT
json_build_object(
'dashboard_generated', CURRENT_TIMESTAMP,
'sales_summary', (
SELECT json_build_object(
'current_month_revenue', total_revenue,
'ytd_revenue', cumulative_revenue,
'avg_transaction', avg_transaction_value,
'unique_customers', unique_customers
)
FROM sales_trend
WHERE month = DATE_TRUNC('month', CURRENT_DATE)
),
'top_performers', (
SELECT json_agg(
json_build_object(
'name', name,
'sales', total_sales,
'rank', dept_rank
)
)
FROM employee_performance
WHERE dept_rank <= 3
),
'category_breakdown', (
SELECT json_agg(
json_build_object(
'category', category,
'revenue', revenue,
'percentage', ROUND(revenue_percentage, 2)
)
)
FROM product_analysis
),
'customer_segments', (
SELECT json_build_object(
'champions', COUNT(*) FILTER (WHERE r_score >= 4 AND f_score >= 4),
'at_risk', COUNT(*) FILTER (WHERE r_score <= 2 AND f_score >= 3),
'new_customers', COUNT(*) FILTER (WHERE r_score >= 4 AND f_score = 1)
)
FROM customer_rfm
),
'anomalies_detected', (
SELECT COUNT(*)
FROM anomaly_detection
WHERE ABS(amount - moving_avg) > 3 * moving_stddev
AND moving_stddev IS NOT NULL
)
) AS dashboard_data;
마무리
이 튜토리얼에서는 SQL의 고급 기능들을 실제 예제와 함께 살펴보았습니다. 윈도우 함수, CTE, 재귀 쿼리, JSON 처리, 파티셔닝, 최적화 기법, 저장 프로시저, 트리거 등 현대 데이터베이스 시스템의 강력한 기능들을 활용하면 복잡한 비즈니스 요구사항을 효율적으로 구현할 수 있습니다.
- 각 DBMS(PostgreSQL, MySQL, Oracle, SQL Server)의 고유 기능을 깊이 학습하세요
- 실행 계획을 읽고 분석하는 능력을 기르세요
- 대용량 데이터셋으로 성능 테스트를 수행하며 최적화 기법을 연습하세요
- NoSQL과 SQL을 결합한 하이브리드 접근법을 탐구하세요
- 데이터베이스 설계 패턴과 안티패턴을 학습하세요