SQL

yueyuan
2 min readJan 15, 2021

Window:

Find the top 3 employees who have the highest salary in each department.

WITH T AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY employee_salary DESC) AS rank_in_dep
FROM employee_salary)

SELECT * FROM T
WHERE rank_in_dep <= 3

COALESCE() to recode NULL / missing data:

COALESCE() function re-codes the NULL to whatever value specified in the second argument. For our example, we can re-code the NULL_VAR to a character value ‘MISSING’:

SELECT
ID_VAR,
NULL_VAR,
COALESCE(NULL_VAR, ‘MISSING’) AS RECODE_NULL_VAR
FROM
CURRENT_TABLE
ORDER BY ID_VAR

Missing values can be encoded in various ways besides NULL. In these cases, COALESCE( ) would not work, but they can be handled with the CASE WHEN statement:

SELECT
ID_VAR,
EMPTY_STR_VAR,
COALESCE(EMPTY_STR_VAR, ‘MISSING’) AS COALESCE_EMPTY_STR_VAR,
CASE WHEN EMPTY_STR_VAR = ‘ ‘ THEN ‘EMPTY_MISSING’ END AS CASEWHEN_EMPTY_STR_VAR,
NA_STR_VAR,
CASE WHEN NA_STR_VAR = ‘NA’ THEN ‘NA_MISSING’ END AS CASEWHEN_NA_STR_VAR
FROM
CURRENT_TABLE
ORDER BY ID_VAR

Compute running total and cumulative frequency

SELECT
DAT.NUM_VAR,
SUM(NUM_VAR) OVER (PARTITION BY JOIN_ID) AS TOTAL_SUM,
ROUND(CUM_SUM / SUM(NUM_VAR) OVER (PARTITION BY JOIN_ID), 4) AS CUM_FREQ
FROM
(
SELECT
T.*,
SUM(NUM_VAR) OVER (ORDER BY NUM_VAR ROWS UNBOUNDED PRECEDING) AS CUM_SUM,
CASE WHEN ID_VAR IS NOT NULL THEN ‘1’ END AS JOIN_ID
FROM CURRENT_TABLE T
) DAT
ORDER BY CUM_FREQ

Find the record(s) with extreme values without self joining

SELECT *
FROM
(
SELECT
DAT.*,
CASE WHEN (NUM_VAR = MAX(NUM_VAR) OVER (PARTITION BY ID_VAR)) THEN ‘Y’ ELSE ’N’ END AS MAX_NUM_IND
FROM
CURRENT_TABLE DAT
) DAT2
WHERE MAX_NUM_IND = ‘Y’

Conditional WHERE clause

SELECT
DAT.ID_VAR,
DAT.SEQ_VAR,
DAT.NUM_VAR,
DATE_VAR1,
DATE_VAR2,
TRUNC(DATE_VAR2) — TRUNC(DATE_VAR1) AS LAG_IN_DATES
FROM
CURRENT_TABLE DAT
WHERE
(TRUNC(DATE_VAR2) — TRUNC(DATE_VAR1)) >= CASE WHEN SEQ_VAR IN (1,2,3) THEN 0 WHEN SEQ_VAR IN (4,5,6) THEN 1 ELSE 2 END
ORDER BY ID_VAR, SEQ_VAR

Lag() and Lead() to work with consecutive rows

SELECT
DAT.ID_VAR,
DAT.SEQ_VAR,
DAT.NUM_VAR,
NUM_VAR — PREV_NUM AS NUM_DIFF
FROM
(
SELECT
T.*,
LAG(NUM_VAR, 1, 0) OVER (PARTITION BY ID_VAR ORDER BY SEQ_VAR) AS PREV_NUM
FROM
CURRENT_TABLE T
) DAT
ORDER BY ID_VAR, SEQ_VAR

--

--