Databricks SQL Interview Questions and Answers (2+ YOE)
1. Cumulative Revenue per Day using Delta Tables
SELECT
date,
SUM(daily_revenue) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED
PRECEDING AND CURRENT ROW) AS cum_revenue
FROM daily_revenue_delta;
2. First and Last Login per User
SELECT
user_id,
MIN(event_time) AS first_login,
MAX(event_time) AS last_login
FROM event_log
WHERE event_type = 'login'
GROUP BY user_id;
3. Top 3 Customers by Spend in Each Region
SELECT *
FROM (
SELECT
region,
customer_id,
SUM(amount) AS total_spend,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY SUM(amount) DESC)
AS rn
FROM sales
GROUP BY region, customer_id
)t
WHERE rn <= 3;
4. Detect and Delete Duplicate Transactions
WITH ranked AS (
SELECT *, ROW_NUMBER() OVER (
PARTITION BY key_col1, key_col2, ...
ORDER BY inserted_at
) AS rn
FROM transactions
)
DELETE FROM transactions
WHERE (key_col1, key_col2, ...) IN (
SELECT key_col1, key_col2, ... FROM ranked WHERE rn > 1
);
5. Users with Purchases in 3 Consecutive Months
WITH months AS (
SELECT
user_id,
DATE_FORMAT(purchase_date, 'yyyy-MM') AS ym
FROM purchases
GROUP BY user_id, ym
),
flagged AS (
SELECT
user_id,
ym,
LEAD(ym, 1) OVER (PARTITION BY user_id ORDER BY ym) AS next1,
LEAD(ym, 2) OVER (PARTITION BY user_id ORDER BY ym) AS next2
FROM months
)
SELECT DISTINCT user_id
FROM flagged
WHERE ADD_MONTHS(CONCAT(ym,'-01'), 1) = CONCAT(next1,'-01')
AND ADD_MONTHS(CONCAT(ym,'-01'), 2) = CONCAT(next2,'-01');
6. Identify Skewed Joins and Fix with Broadcast Hint
SELECT /*+ BROADCAST(small_table) */
b.*, l.*
FROM large_table l
JOIN small_table b
ON [Link] = [Link];
7. 7-Day Moving Average of Product Sales
SELECT
sale_date,
AVG(daily_amount) OVER (
PARTITION BY product_id ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d
FROM (
SELECT product_id, sale_date, SUM(amount) AS daily_amount
FROM sales
GROUP BY product_id, sale_date
) sub;
8. Pivot Daily Sales into Month-Wise Columns
SELECT *
FROM (
SELECT
product_id,
DATE_FORMAT(sale_date,'yyyy-MM') AS month,
SUM(amount) AS total
FROM sales
GROUP BY product_id, month
) src
PIVOT (
SUM(total) FOR month IN ('2025-01', '2025-02', '2025-03', ...)
) AS p;
9. Customers Who Bought Products Every Month in a Year
WITH months AS (
SELECT user_id, COUNT(DISTINCT DATE_FORMAT(order_date,'MM')) AS mcount
FROM orders
WHERE YEAR(order_date) = 2024
GROUP BY user_id
)
SELECT user_id
FROM months
WHERE mcount = 12;
10. Rank Products by Sales per Year
SELECT
year,
product_id,
total_sales,
RANK() OVER (PARTITION BY year ORDER BY total_sales DESC) AS yearly_rank
FROM (
SELECT product_id, YEAR(order_date) AS year, SUM(amount) AS total_sales
FROM sales
GROUP BY year, product_id
) t;
11. Employees Earning More Than Department Average
SELECT e.*
FROM employees e
JOIN (
SELECT dept_id, AVG(salary) AS avg_sal
FROM employees
GROUP BY dept_id
)d
ON e.dept_id = d.dept_id
WHERE [Link] > d.avg_sal;
12. Find Median Transaction Amount (No Built-in)
SELECT
transaction_amount AS median
FROM (
SELECT
transaction_amount,
ROW_NUMBER() OVER (ORDER BY transaction_amount) AS rn_asc,
ROW_NUMBER() OVER (ORDER BY transaction_amount DESC) AS rn_desc
FROM transactions
)t
WHERE rn_asc = rn_desc
OR rn_asc + 1 = rn_desc;
13. Users Who Placed First Order in Last 30 Days
SELECT user_id, MIN(order_date) AS first_order
FROM orders
GROUP BY user_id
HAVING MIN(order_date) >= CURRENT_DATE() - INTERVAL 30 DAYS;
14. Compare Price Change Between Two Dates per Product
WITH prices AS (
SELECT
product_id,
price,
price_date,
ROW_NUMBER() OVER (PARTITION BY product_id ORDER BY price_date) AS
rn_asc,
ROW_NUMBER() OVER (PARTITION BY product_id ORDER BY price_date DESC)
AS rn_desc
FROM product_prices
WHERE price_date IN ('2025-05-01','2025-06-01')
)
SELECT
p1.product_id,
[Link] AS price_d1,
[Link] AS price_d2,
([Link] - [Link]) / [Link] * 100 AS pct_change
FROM (
SELECT product_id, price FROM prices WHERE price_date = '2025-05-01'
) p1
JOIN (
SELECT product_id, price FROM prices WHERE price_date = '2025-06-01'
) p2 USING (product_id);
15. Customers Whose First and Last Transaction on Same Day
SELECT user_id
FROM (
SELECT
user_id,
MIN(trans_date) AS first,
MAX(trans_date) AS last
FROM transactions
GROUP BY user_id
)t
WHERE first = last;
16. Percentage of Returning Users Each Month
WITH monthly_users AS (
SELECT
user_id,
DATE_FORMAT(event_time,'yyyy-MM') AS ym,
MIN(event_time) AS first_visit
FROM events
GROUP BY user_id, ym
)
SELECT
ym,
COUNT(DISTINCT CASE WHEN first_visit > TRUNC(first_visit, 'MM') THEN user_id
END) * 100.0 /
COUNT(DISTINCT user_id) AS returning_pct
FROM monthly_users
GROUP BY ym
ORDER BY ym;