SQL 고급 쿼리 튜토리얼 전문가를 위한 완벽 가이드

SQL 고급 쿼리 튜토리얼: 전문가를 위한 완벽 가이드

SQL 고급 쿼리 튜토리얼: 전문가를 위한 완벽 가이드

SQL 고급 과정 소개

SQL 고급 과정은 기본적인 SELECT, INSERT, UPDATE, DELETE를 넘어서 데이터베이스의 강력한 기능들을 활용하는 방법을 다룹니다. 복잡한 비즈니스 로직을 구현하고, 대용량 데이터를 효율적으로 처리하며, 데이터베이스 성능을 최적화하는 기술들을 학습합니다. 이 튜토리얼은 실무에서 바로 활용할 수 있는 고급 SQL 기법들을 실제 예제와 함께 제공합니다.

실습 환경 설정

이 튜토리얼의 예제들은 PostgreSQL 15+ 또는 MySQL 8.0+에서 테스트되었습니다. 일부 기능은 특정 DBMS에서만 지원될 수 있으므로, 사용하는 데이터베이스 시스템의 문서를 참고하시기 바랍니다.

-- 실습용 데이터베이스 및 테이블 생성
CREATE DATABASE advanced_sql_tutorial;
USE advanced_sql_tutorial;

-- 직원 테이블
CREATE TABLE employees (
    employee_id INT PRIMARY KEY,
    name VARCHAR(100),
    department_id INT,
    manager_id INT,
    salary DECIMAL(10, 2),
    hire_date DATE,
    performance_score DECIMAL(3, 2)
);

-- 부서 테이블
CREATE TABLE departments (
    department_id INT PRIMARY KEY,
    department_name VARCHAR(100),
    location VARCHAR(100),
    budget DECIMAL(12, 2)
);

-- 판매 테이블
CREATE TABLE sales (
    sale_id INT PRIMARY KEY,
    product_id INT,
    customer_id INT,
    employee_id INT,
    sale_date DATE,
    quantity INT,
    amount DECIMAL(10, 2),
    region VARCHAR(50)
);

1. 윈도우 함수 (Window Functions)

순위 함수 (Ranking Functions)

윈도우 함수는 행 집합에 대해 계산을 수행하면서도 GROUP BY처럼 행을 그룹화하지 않습니다. 각 행은 별도로 유지되면서 집계 결과를 함께 볼 수 있습니다.

-- ROW_NUMBER, RANK, DENSE_RANK 비교
SELECT 
    employee_id,
    name,
    department_id,
    salary,
    ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
    RANK() OVER (ORDER BY salary DESC) AS rank_num,
    DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank_num,
    NTILE(4) OVER (ORDER BY salary DESC) AS quartile
FROM employees
ORDER BY salary DESC;

-- 부서별 급여 순위
SELECT 
    e.name,
    d.department_name,
    e.salary,
    RANK() OVER (PARTITION BY e.department_id ORDER BY e.salary DESC) AS dept_salary_rank,
    PERCENT_RANK() OVER (PARTITION BY e.department_id ORDER BY e.salary) AS percentile
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE e.salary IS NOT NULL;

분석 함수 (Analytic Functions)

분석 함수는 현재 행을 기준으로 다른 행들의 값을 참조하거나 집계를 수행할 수 있습니다.

-- LAG과 LEAD를 사용한 전월/익월 비교
WITH monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', sale_date) AS month,
        SUM(amount) AS total_sales
    FROM sales
    GROUP BY DATE_TRUNC('month', sale_date)
)
SELECT 
    month,
    total_sales,
    LAG(total_sales, 1) OVER (ORDER BY month) AS prev_month_sales,
    LEAD(total_sales, 1) OVER (ORDER BY month) AS next_month_sales,
    total_sales - LAG(total_sales, 1) OVER (ORDER BY month) AS month_over_month_change,
    ROUND(
        (total_sales - LAG(total_sales, 1) OVER (ORDER BY month)) / 
        LAG(total_sales, 1) OVER (ORDER BY month) * 100, 
        2
    ) AS growth_rate_percent
FROM monthly_sales
ORDER BY month;

-- 누적 합계와 이동 평균
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (ORDER BY sale_date) AS cumulative_sales,
    AVG(amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7days,
    COUNT(*) OVER (
        ORDER BY sale_date 
        RANGE BETWEEN INTERVAL '30 days' PRECEDING AND CURRENT ROW
    ) AS sales_last_30days
FROM sales
WHERE sale_date >= '2024-01-01'
ORDER BY sale_date;

2. CTE (Common Table Expressions)

CTE는 쿼리 내에서 임시 결과 집합을 정의하여 복잡한 쿼리를 더 읽기 쉽고 관리하기 쉽게 만듭니다.

-- 기본 CTE 사용법
WITH high_performers AS (
    SELECT 
        employee_id,
        name,
        performance_score
    FROM employees
    WHERE performance_score >= 4.0
),
department_stats AS (
    SELECT 
        department_id,
        AVG(salary) AS avg_salary,
        COUNT(*) AS emp_count
    FROM employees
    GROUP BY department_id
)
SELECT 
    hp.name,
    hp.performance_score,
    e.salary,
    ds.avg_salary AS dept_avg_salary,
    e.salary - ds.avg_salary AS salary_diff_from_avg
FROM high_performers hp
JOIN employees e ON hp.employee_id = e.employee_id
JOIN department_stats ds ON e.department_id = ds.department_id
ORDER BY hp.performance_score DESC;

-- 다중 레벨 CTE
WITH quarterly_sales AS (
    SELECT 
        EXTRACT(YEAR FROM sale_date) AS year,
        EXTRACT(QUARTER FROM sale_date) AS quarter,
        region,
        SUM(amount) AS total_sales
    FROM sales
    GROUP BY 
        EXTRACT(YEAR FROM sale_date),
        EXTRACT(QUARTER FROM sale_date),
        region
),
ranked_regions AS (
    SELECT 
        year,
        quarter,
        region,
        total_sales,
        RANK() OVER (
            PARTITION BY year, quarter 
            ORDER BY total_sales DESC
        ) AS region_rank
    FROM quarterly_sales
),
top_regions AS (
    SELECT *
    FROM ranked_regions
    WHERE region_rank <= 3
)
SELECT 
    year,
    quarter,
    STRING_AGG(
        region || ': $' || ROUND(total_sales, 2), 
        ', ' 
        ORDER BY region_rank
    ) AS top_3_regions
FROM top_regions
GROUP BY year, quarter
ORDER BY year, quarter;

3. 재귀 쿼리 (Recursive Queries)

재귀 CTE를 사용하면 계층적 데이터나 그래프 구조를 탐색할 수 있습니다.

-- 조직 계층 구조 탐색
WITH RECURSIVE org_hierarchy AS (
    -- Anchor member: 최상위 관리자
    SELECT 
        employee_id,
        name,
        manager_id,
        1 AS level,
        name AS path,
        CAST(employee_id AS VARCHAR(1000)) AS hierarchy_path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive member
    SELECT 
        e.employee_id,
        e.name,
        e.manager_id,
        oh.level + 1,
        oh.path || ' > ' || e.name,
        oh.hierarchy_path || '/' || CAST(e.employee_id AS VARCHAR(1000))
    FROM employees e
    INNER JOIN org_hierarchy oh ON e.manager_id = oh.employee_id
)
SELECT 
    level,
    REPEAT('  ', level - 1) || name AS org_chart,
    path AS reporting_line
FROM org_hierarchy
ORDER BY hierarchy_path;

-- 재귀를 사용한 날짜 시퀀스 생성
WITH RECURSIVE date_series AS (
    SELECT 
        DATE '2024-01-01' AS date
    
    UNION ALL
    
    SELECT 
        date + INTERVAL '1 day'
    FROM date_series
    WHERE date < DATE '2024-12-31'
)
SELECT 
    ds.date,
    COALESCE(COUNT(s.sale_id), 0) AS sales_count,
    COALESCE(SUM(s.amount), 0) AS total_sales
FROM date_series ds
LEFT JOIN sales s ON ds.date = s.sale_date
GROUP BY ds.date
ORDER BY ds.date;

4. PIVOT과 UNPIVOT

PIVOT은 행 데이터를 열로 변환하고, UNPIVOT은 열 데이터를 행으로 변환합니다.

-- PIVOT 구현 (CASE WHEN 사용)
SELECT 
    region,
    SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 1 
        THEN amount ELSE 0 END) AS Q1,
    SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 2 
        THEN amount ELSE 0 END) AS Q2,
    SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 3 
        THEN amount ELSE 0 END) AS Q3,
    SUM(CASE WHEN EXTRACT(QUARTER FROM sale_date) = 4 
        THEN amount ELSE 0 END) AS Q4,
    SUM(amount) AS total_year
FROM sales
WHERE EXTRACT(YEAR FROM sale_date) = 2024
GROUP BY region
ORDER BY total_year DESC;

-- 동적 PIVOT (PostgreSQL의 crosstab 사용)
CREATE EXTENSION IF NOT EXISTS tablefunc;

SELECT * FROM crosstab(
    $$SELECT 
        department_name,
        EXTRACT(YEAR FROM hire_date)::text AS year,
        COUNT(*) AS emp_count
    FROM employees e
    JOIN departments d ON e.department_id = d.department_id
    GROUP BY department_name, EXTRACT(YEAR FROM hire_date)
    ORDER BY 1, 2$$,
    $$SELECT DISTINCT EXTRACT(YEAR FROM hire_date)::text 
    FROM employees ORDER BY 1$$
) AS pivot_table(
    department VARCHAR(100),
    year_2020 INT,
    year_2021 INT,
    year_2022 INT,
    year_2023 INT,
    year_2024 INT
);

-- UNPIVOT 구현 (UNION ALL 사용)
WITH quarterly_data AS (
    SELECT 
        region,
        2024 AS year,
        100000 AS Q1,
        120000 AS Q2,
        110000 AS Q3,
        130000 AS Q4
    FROM (VALUES ('North'), ('South'), ('East'), ('West')) AS t(region)
)
SELECT 
    region,
    year,
    quarter,
    sales
FROM (
    SELECT region, year, 'Q1' AS quarter, Q1 AS sales FROM quarterly_data
    UNION ALL
    SELECT region, year, 'Q2', Q2 FROM quarterly_data
    UNION ALL
    SELECT region, year, 'Q3', Q3 FROM quarterly_data
    UNION ALL
    SELECT region, year, 'Q4', Q4 FROM quarterly_data
) unpivoted
ORDER BY region, quarter;

5. JSON 데이터 처리

현대 데이터베이스는 JSON 데이터를 네이티브하게 지원하여 NoSQL의 유연성과 SQL의 강력함을 결합할 수 있습니다.

-- JSON 데이터 테이블 생성
CREATE TABLE product_catalog (
    product_id INT PRIMARY KEY,
    product_name VARCHAR(200),
    specifications JSON,
    metadata JSONB  -- PostgreSQL의 바이너리 JSON
);

-- JSON 데이터 삽입
INSERT INTO product_catalog VALUES
(1, 'Laptop Pro', 
 '{"cpu": "Intel i7", "ram": "16GB", "storage": {"type": "SSD", "size": "512GB"}}',
 '{"categories": ["Electronics", "Computers"], "tags": ["premium", "business"]}'),
(2, 'Smartphone X',
 '{"screen": "6.5 inch", "camera": {"front": "12MP", "rear": "48MP"}, "battery": "4000mAh"}',
 '{"categories": ["Electronics", "Mobile"], "tags": ["5G", "flagship"]}');

-- JSON 데이터 쿼리
SELECT 
    product_name,
    specifications->'cpu' AS cpu,
    specifications->'storage'->'size' AS storage_size,
    jsonb_array_elements_text(metadata->'categories') AS category
FROM product_catalog
WHERE specifications->>'ram' = '16GB';

-- JSON 배열 처리
WITH product_tags AS (
    SELECT 
        product_name,
        jsonb_array_elements_text(metadata->'tags') AS tag
    FROM product_catalog
)
SELECT 
    tag,
    STRING_AGG(product_name, ', ') AS products
FROM product_tags
GROUP BY tag;

-- JSON 데이터 업데이트
UPDATE product_catalog
SET metadata = jsonb_set(
    metadata,
    '{price}',
    '1299.99'
)
WHERE product_id = 1;

-- JSON 집계
SELECT 
    json_build_object(
        'total_products', COUNT(*),
        'products', json_agg(
            json_build_object(
                'id', product_id,
                'name', product_name,
                'specs', specifications
            )
        )
    ) AS product_summary
FROM product_catalog;

6. 테이블 파티셔닝과 분할

대용량 테이블을 효율적으로 관리하기 위한 파티셔닝 전략을 구현합니다.

-- 범위 파티셔닝 (Range Partitioning)
CREATE TABLE sales_partitioned (
    sale_id BIGINT,
    sale_date DATE,
    product_id INT,
    amount DECIMAL(10, 2),
    region VARCHAR(50)
) PARTITION BY RANGE (sale_date);

-- 월별 파티션 생성
CREATE TABLE sales_2024_01 PARTITION OF sales_partitioned
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE sales_2024_02 PARTITION OF sales_partitioned
    FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

CREATE TABLE sales_2024_03 PARTITION OF sales_partitioned
    FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');

-- 리스트 파티셔닝 (List Partitioning)
CREATE TABLE sales_by_region (
    sale_id BIGINT,
    sale_date DATE,
    amount DECIMAL(10, 2),
    region VARCHAR(50)
) PARTITION BY LIST (region);

CREATE TABLE sales_north PARTITION OF sales_by_region
    FOR VALUES IN ('North', 'Northeast', 'Northwest');

CREATE TABLE sales_south PARTITION OF sales_by_region
    FOR VALUES IN ('South', 'Southeast', 'Southwest');

-- 해시 파티셔닝 (Hash Partitioning)
CREATE TABLE customer_data (
    customer_id BIGINT,
    customer_name VARCHAR(200),
    registration_date DATE,
    total_purchases DECIMAL(12, 2)
) PARTITION BY HASH (customer_id);

CREATE TABLE customer_data_p0 PARTITION OF customer_data
    FOR VALUES WITH (modulus 4, remainder 0);

CREATE TABLE customer_data_p1 PARTITION OF customer_data
    FOR VALUES WITH (modulus 4, remainder 1);

-- 파티션 관리
ALTER TABLE sales_partitioned DETACH PARTITION sales_2024_01;
ALTER TABLE sales_partitioned ATTACH PARTITION sales_2024_01
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

7. 쿼리 최적화 기법

성능을 극대화하기 위한 고급 쿼리 최적화 기법들을 다룹니다.

-- 실행 계획 분석
EXPLAIN ANALYZE
SELECT 
    e.name,
    d.department_name,
    e.salary
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE e.salary > 50000;

-- 쿼리 힌트 사용 (Oracle/SQL Server)
SELECT /*+ INDEX(e emp_salary_idx) PARALLEL(e, 4) */
    e.employee_id,
    e.name,
    e.salary
FROM employees e
WHERE e.salary BETWEEN 50000 AND 100000;

-- EXISTS vs IN 최적화
-- EXISTS (일반적으로 더 빠름)
SELECT e.*
FROM employees e
WHERE EXISTS (
    SELECT 1
    FROM sales s
    WHERE s.employee_id = e.employee_id
    AND s.amount > 10000
);

-- 서브쿼리 최적화 (JOIN으로 변환)
-- 비효율적인 서브쿼리
SELECT 
    e.name,
    (SELECT COUNT(*) FROM sales s WHERE s.employee_id = e.employee_id) AS sale_count
FROM employees e;

-- 최적화된 JOIN
SELECT 
    e.name,
    COUNT(s.sale_id) AS sale_count
FROM employees e
LEFT JOIN sales s ON e.employee_id = s.employee_id
GROUP BY e.employee_id, e.name;

-- MATERIALIZED VIEW 활용
CREATE MATERIALIZED VIEW mv_monthly_sales_summary AS
SELECT 
    DATE_TRUNC('month', sale_date) AS month,
    region,
    product_id,
    COUNT(*) AS sale_count,
    SUM(amount) AS total_amount,
    AVG(amount) AS avg_amount
FROM sales
GROUP BY DATE_TRUNC('month', sale_date), region, product_id;

CREATE INDEX idx_mv_month_region ON mv_monthly_sales_summary(month, region);

-- 정기적으로 MATERIALIZED VIEW 갱신
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_monthly_sales_summary;

8. 고급 인덱싱 전략

효과적인 인덱스 설계와 관리를 통해 쿼리 성능을 극대화합니다.

-- 복합 인덱스 (Composite Index)
CREATE INDEX idx_emp_dept_salary 
ON employees(department_id, salary DESC, hire_date);

-- 커버링 인덱스 (Covering Index)
CREATE INDEX idx_sales_covering 
ON sales(employee_id, sale_date) 
INCLUDE (amount, product_id);

-- 부분 인덱스 (Partial Index)
CREATE INDEX idx_active_employees 
ON employees(salary) 
WHERE hire_date >= '2023-01-01';

-- 함수 기반 인덱스 (Function-based Index)
CREATE INDEX idx_upper_name 
ON employees(UPPER(name));

-- GIN 인덱스 (JSON/Array 데이터)
CREATE INDEX idx_product_metadata 
ON product_catalog USING GIN (metadata);

-- 전문 검색 인덱스
CREATE INDEX idx_product_search 
ON products USING GIN (to_tsvector('english', product_description));

-- 인덱스 사용 통계 확인
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan AS index_scans,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan DESC;

-- 중복/미사용 인덱스 찾기
WITH index_usage AS (
    SELECT 
        schemaname,
        tablename,
        indexname,
        idx_scan,
        pg_relation_size(indexrelid) AS index_size
    FROM pg_stat_user_indexes
)
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan,
    pg_size_pretty(index_size) AS size,
    CASE 
        WHEN idx_scan = 0 THEN 'UNUSED'
        WHEN idx_scan < 100 THEN 'RARELY USED'
        ELSE 'ACTIVE'
    END AS usage_status
FROM index_usage
ORDER BY idx_scan;

9. 트랜잭션과 격리 수준

데이터 일관성과 동시성을 관리하기 위한 트랜잭션 제어 기법을 다룹니다.

-- 트랜잭션 격리 수준 설정
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

BEGIN TRANSACTION;

-- SAVEPOINT 사용
SAVEPOINT before_update;

UPDATE employees 
SET salary = salary * 1.1 
WHERE performance_score >= 4.0;

-- 조건부 롤백
SELECT COUNT(*) INTO @affected_rows
FROM employees 
WHERE performance_score >= 4.0;

IF @affected_rows > 100 THEN
    ROLLBACK TO SAVEPOINT before_update;
ELSE
    RELEASE SAVEPOINT before_update;
END IF;

COMMIT;

-- 데드락 감지 및 처리
DO $$
DECLARE
    v_retries INT := 0;
    v_max_retries INT := 3;
BEGIN
    WHILE v_retries < v_max_retries LOOP
        BEGIN
            -- 트랜잭션 로직
            UPDATE accounts SET balance = balance - 100 WHERE account_id = 1;
            UPDATE accounts SET balance = balance + 100 WHERE account_id = 2;
            EXIT; -- 성공시 루프 종료
        EXCEPTION
            WHEN deadlock_detected THEN
                v_retries := v_retries + 1;
                RAISE NOTICE 'Deadlock detected, retry % of %', v_retries, v_max_retries;
                PERFORM pg_sleep(0.1 * v_retries);
        END;
    END LOOP;
END $$;

-- 낙관적 잠금 (Optimistic Locking)
UPDATE products 
SET 
    stock_quantity = stock_quantity - 10,
    version = version + 1,
    last_modified = CURRENT_TIMESTAMP
WHERE 
    product_id = 123 
    AND version = 5  -- 버전 체크
RETURNING *;

10. 저장 프로시저와 함수

복잡한 비즈니스 로직을 데이터베이스 레벨에서 구현합니다.

-- 저장 프로시저 생성
CREATE OR REPLACE PROCEDURE process_monthly_bonuses(
    p_month DATE,
    p_bonus_rate DECIMAL
)
LANGUAGE plpgsql
AS $$
DECLARE
    v_total_bonus DECIMAL(12, 2) := 0;
    v_employee_count INT := 0;
    v_employee RECORD;
BEGIN
    -- 트랜잭션 시작
    BEGIN
        -- 각 직원의 보너스 계산
        FOR v_employee IN 
            SELECT 
                e.employee_id,
                e.salary,
                e.performance_score,
                SUM(s.amount) AS total_sales
            FROM employees e
            LEFT JOIN sales s ON e.employee_id = s.employee_id
                AND DATE_TRUNC('month', s.sale_date) = DATE_TRUNC('month', p_month)
            GROUP BY e.employee_id, e.salary, e.performance_score
        LOOP
            DECLARE
                v_bonus DECIMAL(10, 2);
            BEGIN
                -- 보너스 계산 로직
                v_bonus := v_employee.salary * p_bonus_rate;
                
                IF v_employee.performance_score >= 4.5 THEN
                    v_bonus := v_bonus * 1.5;
                ELSIF v_employee.performance_score >= 3.5 THEN
                    v_bonus := v_bonus * 1.2;
                END IF;
                
                IF v_employee.total_sales > 1000000 THEN
                    v_bonus := v_bonus + 5000;
                END IF;
                
                -- 보너스 기록
                INSERT INTO bonus_payments (
                    employee_id, 
                    bonus_month, 
                    bonus_amount, 
                    processed_date
                ) VALUES (
                    v_employee.employee_id,
                    p_month,
                    v_bonus,
                    CURRENT_TIMESTAMP
                );
                
                v_total_bonus := v_total_bonus + v_bonus;
                v_employee_count := v_employee_count + 1;
            END;
        END LOOP;
        
        -- 로그 기록
        INSERT INTO process_log (
            process_name,
            process_date,
            parameters,
            result
        ) VALUES (
            'process_monthly_bonuses',
            CURRENT_TIMESTAMP,
            json_build_object('month', p_month, 'rate', p_bonus_rate),
            json_build_object('total_bonus', v_total_bonus, 'employee_count', v_employee_count)
        );
        
        RAISE NOTICE 'Processed % employees with total bonus: %', v_employee_count, v_total_bonus;
        
    EXCEPTION
        WHEN OTHERS THEN
            RAISE EXCEPTION 'Error processing bonuses: %', SQLERRM;
            ROLLBACK;
    END;
END;

$$;

-- 테이블 반환 함수
CREATE OR REPLACE FUNCTION get_department_statistics(
    p_start_date DATE,
    p_end_date DATE
)
RETURNS TABLE (
    department_name VARCHAR,
    total_employees BIGINT,
    avg_salary NUMERIC,
    total_sales NUMERIC,
    top_performer VARCHAR
)
LANGUAGE plpgsql
AS $$
BEGIN
    RETURN QUERY
    WITH dept_sales AS (
        SELECT 
            e.department_id,
            SUM(s.amount) AS total_sales
        FROM sales s
        JOIN employees e ON s.employee_id = e.employee_id
        WHERE s.sale_date BETWEEN p_start_date AND p_end_date
        GROUP BY e.department_id
    ),
    top_performers AS (
        SELECT DISTINCT ON (department_id)
            department_id,
            name AS top_performer
        FROM employees
        ORDER BY department_id, performance_score DESC
    )
    SELECT 
        d.department_name,
        COUNT(DISTINCT e.employee_id),
        ROUND(AVG(e.salary), 2),
        COALESCE(ds.total_sales, 0),
        tp.top_performer
    FROM departments d
    LEFT JOIN employees e ON d.department_id = e.department_id
    LEFT JOIN dept_sales ds ON d.department_id = ds.department_id
    LEFT JOIN top_performers tp ON d.department_id = tp.department_id
    GROUP BY d.department_id, d.department_name, ds.total_sales, tp.top_performer;
END;

$$;

11. 트리거와 이벤트

데이터 변경을 자동으로 감지하고 처리하는 트리거를 구현합니다.

-- 감사(Audit) 트리거
CREATE TABLE audit_log (
    audit_id SERIAL PRIMARY KEY,
    table_name VARCHAR(100),
    operation VARCHAR(10),
    user_name VARCHAR(100),
    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    old_data JSONB,
    new_data JSONB
);

CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
    IF TG_OP = 'INSERT' THEN
        INSERT INTO audit_log (table_name, operation, user_name, new_data)
        VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(NEW));
        RETURN NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        INSERT INTO audit_log (table_name, operation, user_name, old_data, new_data)
        VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(OLD), to_jsonb(NEW));
        RETURN NEW;
    ELSIF TG_OP = 'DELETE' THEN
        INSERT INTO audit_log (table_name, operation, user_name, old_data)
        VALUES (TG_TABLE_NAME, TG_OP, current_user, to_jsonb(OLD));
        RETURN OLD;
    END IF;
END;

$$;

CREATE TRIGGER employees_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON employees
FOR EACH ROW
EXECUTE FUNCTION audit_trigger_function();

-- 비즈니스 규칙 트리거
CREATE OR REPLACE FUNCTION validate_salary_increase()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
DECLARE
    v_increase_percentage DECIMAL;
BEGIN
    IF NEW.salary > OLD.salary THEN
        v_increase_percentage := ((NEW.salary - OLD.salary) / OLD.salary) * 100;
        
        IF v_increase_percentage > 50 THEN
            RAISE EXCEPTION 'Salary increase cannot exceed 50%';
        END IF;
        
        IF NEW.performance_score < 3.0 AND v_increase_percentage > 10 THEN
            RAISE EXCEPTION 'Employees with performance score < 3.0 cannot receive > 10% raise';
        END IF;
    END IF;
    
    RETURN NEW;
END;

$$;

CREATE TRIGGER validate_salary_trigger
BEFORE UPDATE ON employees
FOR EACH ROW
WHEN (NEW.salary <> OLD.salary)
EXECUTE FUNCTION validate_salary_increase();

-- 이벤트 스케줄러 (MySQL)
CREATE EVENT daily_cleanup
ON SCHEDULE EVERY 1 DAY
STARTS '2024-01-01 02:00:00'
DO
BEGIN
    -- 30일 이상 된 로그 삭제
    DELETE FROM audit_log 
    WHERE timestamp < DATE_SUB(NOW(), INTERVAL 30 DAY);
    
    -- 통계 테이블 업데이트
    CALL update_daily_statistics();
END;

12. 실전 예제: 종합 데이터 분석 시스템

지금까지 배운 모든 고급 SQL 기법을 활용한 종합적인 데이터 분석 시스템을 구현합니다.

-- 종합 분석 대시보드 쿼리
WITH 
-- 1. 기간별 매출 트렌드
sales_trend AS (
    SELECT 
        DATE_TRUNC('month', sale_date) AS month,
        COUNT(DISTINCT customer_id) AS unique_customers,
        COUNT(*) AS transaction_count,
        SUM(amount) AS total_revenue,
        AVG(amount) AS avg_transaction_value,
        SUM(SUM(amount)) OVER (
            ORDER BY DATE_TRUNC('month', sale_date) 
            ROWS UNBOUNDED PRECEDING
        ) AS cumulative_revenue
    FROM sales
    WHERE sale_date >= DATE_TRUNC('year', CURRENT_DATE)
    GROUP BY DATE_TRUNC('month', sale_date)
),

-- 2. 직원 성과 분석
employee_performance AS (
    SELECT 
        e.employee_id,
        e.name,
        e.department_id,
        COUNT(s.sale_id) AS sales_count,
        SUM(s.amount) AS total_sales,
        AVG(s.amount) AS avg_sale_amount,
        RANK() OVER (
            PARTITION BY e.department_id 
            ORDER BY SUM(s.amount) DESC
        ) AS dept_rank,
        PERCENTILE_CONT(0.5) WITHIN GROUP (
            ORDER BY s.amount
        ) AS median_sale
    FROM employees e
    LEFT JOIN sales s ON e.employee_id = s.employee_id
        AND s.sale_date >= DATE_TRUNC('quarter', CURRENT_DATE)
    GROUP BY e.employee_id, e.name, e.department_id
),

-- 3. 제품 카테고리 분석
product_analysis AS (
    SELECT 
        p.category,
        COUNT(DISTINCT s.product_id) AS products_sold,
        SUM(s.quantity) AS units_sold,
        SUM(s.amount) AS revenue,
        SUM(s.amount) / SUM(SUM(s.amount)) OVER () * 100 AS revenue_percentage,
        ARRAY_AGG(
            DISTINCT p.product_name 
            ORDER BY p.product_name 
            LIMIT 5
        ) AS top_products
    FROM sales s
    JOIN products p ON s.product_id = p.product_id
    GROUP BY p.category
),

-- 4. 고객 세분화 (RFM 분석)
customer_rfm AS (
    SELECT 
        customer_id,
        MAX(sale_date) AS last_purchase_date,
        CURRENT_DATE - MAX(sale_date) AS recency_days,
        COUNT(*) AS frequency,
        SUM(amount) AS monetary_value,
        NTILE(5) OVER (ORDER BY CURRENT_DATE - MAX(sale_date) DESC) AS r_score,
        NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
        NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
    FROM sales
    GROUP BY customer_id
),

-- 5. 이상치 탐지
anomaly_detection AS (
    SELECT 
        sale_date,
        amount,
        AVG(amount) OVER (
            ORDER BY sale_date 
            ROWS BETWEEN 30 PRECEDING AND 1 PRECEDING
        ) AS moving_avg,
        STDDEV(amount) OVER (
            ORDER BY sale_date 
            ROWS BETWEEN 30 PRECEDING AND 1 PRECEDING
        ) AS moving_stddev
    FROM sales
)

-- 최종 대시보드 출력
SELECT 
    json_build_object(
        'dashboard_generated', CURRENT_TIMESTAMP,
        'sales_summary', (
            SELECT json_build_object(
                'current_month_revenue', total_revenue,
                'ytd_revenue', cumulative_revenue,
                'avg_transaction', avg_transaction_value,
                'unique_customers', unique_customers
            )
            FROM sales_trend
            WHERE month = DATE_TRUNC('month', CURRENT_DATE)
        ),
        'top_performers', (
            SELECT json_agg(
                json_build_object(
                    'name', name,
                    'sales', total_sales,
                    'rank', dept_rank
                )
            )
            FROM employee_performance
            WHERE dept_rank <= 3
        ),
        'category_breakdown', (
            SELECT json_agg(
                json_build_object(
                    'category', category,
                    'revenue', revenue,
                    'percentage', ROUND(revenue_percentage, 2)
                )
            )
            FROM product_analysis
        ),
        'customer_segments', (
            SELECT json_build_object(
                'champions', COUNT(*) FILTER (WHERE r_score >= 4 AND f_score >= 4),
                'at_risk', COUNT(*) FILTER (WHERE r_score <= 2 AND f_score >= 3),
                'new_customers', COUNT(*) FILTER (WHERE r_score >= 4 AND f_score = 1)
            )
            FROM customer_rfm
        ),
        'anomalies_detected', (
            SELECT COUNT(*)
            FROM anomaly_detection
            WHERE ABS(amount - moving_avg) > 3 * moving_stddev
                AND moving_stddev IS NOT NULL
        )
    ) AS dashboard_data;

마무리

이 튜토리얼에서는 SQL의 고급 기능들을 실제 예제와 함께 살펴보았습니다. 윈도우 함수, CTE, 재귀 쿼리, JSON 처리, 파티셔닝, 최적화 기법, 저장 프로시저, 트리거 등 현대 데이터베이스 시스템의 강력한 기능들을 활용하면 복잡한 비즈니스 요구사항을 효율적으로 구현할 수 있습니다.

고급 SQL 마스터를 위한 추가 학습 팁:
  • 각 DBMS(PostgreSQL, MySQL, Oracle, SQL Server)의 고유 기능을 깊이 학습하세요
  • 실행 계획을 읽고 분석하는 능력을 기르세요
  • 대용량 데이터셋으로 성능 테스트를 수행하며 최적화 기법을 연습하세요
  • NoSQL과 SQL을 결합한 하이브리드 접근법을 탐구하세요
  • 데이터베이스 설계 패턴과 안티패턴을 학습하세요

댓글 남기기

AI, 코딩, 일상 및 다양한 정보 공유에서 더 알아보기

지금 구독하여 계속 읽고 전체 아카이브에 액세스하세요.

계속 읽기