0% found this document useful (0 votes)
58 views6 pages

Databricks SQL Interview Q&A Guide

The document contains a list of SQL interview questions and answers focused on Databricks, covering various topics such as cumulative revenue, user logins, customer spending, duplicate transactions, and more. Each question includes a SQL query that demonstrates the solution to the problem presented. The queries utilize features like window functions, common table expressions, and pivoting to address specific data analysis tasks.
Copyright
© All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
58 views6 pages

Databricks SQL Interview Q&A Guide

The document contains a list of SQL interview questions and answers focused on Databricks, covering various topics such as cumulative revenue, user logins, customer spending, duplicate transactions, and more. Each question includes a SQL query that demonstrates the solution to the problem presented. The queries utilize features like window functions, common table expressions, and pivoting to address specific data analysis tasks.
Copyright
© All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Databricks SQL Interview Questions and Answers (2+ YOE)

1. Cumulative Revenue per Day using Delta Tables

SELECT
date,
SUM(daily_revenue) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED
PRECEDING AND CURRENT ROW) AS cum_revenue
FROM daily_revenue_delta;

2. First and Last Login per User

SELECT
user_id,
MIN(event_time) AS first_login,
MAX(event_time) AS last_login
FROM event_log
WHERE event_type = 'login'
GROUP BY user_id;

3. Top 3 Customers by Spend in Each Region

SELECT *
FROM (
SELECT
region,
customer_id,
SUM(amount) AS total_spend,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY SUM(amount) DESC)
AS rn
FROM sales
GROUP BY region, customer_id
)t
WHERE rn <= 3;
4. Detect and Delete Duplicate Transactions

WITH ranked AS (
SELECT *, ROW_NUMBER() OVER (
PARTITION BY key_col1, key_col2, ...
ORDER BY inserted_at
) AS rn
FROM transactions
)
DELETE FROM transactions
WHERE (key_col1, key_col2, ...) IN (
SELECT key_col1, key_col2, ... FROM ranked WHERE rn > 1
);

5. Users with Purchases in 3 Consecutive Months

WITH months AS (
SELECT
user_id,
DATE_FORMAT(purchase_date, 'yyyy-MM') AS ym
FROM purchases
GROUP BY user_id, ym
),
flagged AS (
SELECT
user_id,
ym,
LEAD(ym, 1) OVER (PARTITION BY user_id ORDER BY ym) AS next1,
LEAD(ym, 2) OVER (PARTITION BY user_id ORDER BY ym) AS next2
FROM months
)
SELECT DISTINCT user_id
FROM flagged
WHERE ADD_MONTHS(CONCAT(ym,'-01'), 1) = CONCAT(next1,'-01')
AND ADD_MONTHS(CONCAT(ym,'-01'), 2) = CONCAT(next2,'-01');
6. Identify Skewed Joins and Fix with Broadcast Hint

SELECT /*+ BROADCAST(small_table) */


b.*, l.*
FROM large_table l
JOIN small_table b
ON [Link] = [Link];

7. 7-Day Moving Average of Product Sales

SELECT
sale_date,
AVG(daily_amount) OVER (
PARTITION BY product_id ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d
FROM (
SELECT product_id, sale_date, SUM(amount) AS daily_amount
FROM sales
GROUP BY product_id, sale_date
) sub;

8. Pivot Daily Sales into Month-Wise Columns

SELECT *
FROM (
SELECT
product_id,
DATE_FORMAT(sale_date,'yyyy-MM') AS month,
SUM(amount) AS total
FROM sales
GROUP BY product_id, month
) src
PIVOT (
SUM(total) FOR month IN ('2025-01', '2025-02', '2025-03', ...)
) AS p;
9. Customers Who Bought Products Every Month in a Year

WITH months AS (
SELECT user_id, COUNT(DISTINCT DATE_FORMAT(order_date,'MM')) AS mcount
FROM orders
WHERE YEAR(order_date) = 2024
GROUP BY user_id
)
SELECT user_id
FROM months
WHERE mcount = 12;

10. Rank Products by Sales per Year

SELECT
year,
product_id,
total_sales,
RANK() OVER (PARTITION BY year ORDER BY total_sales DESC) AS yearly_rank
FROM (
SELECT product_id, YEAR(order_date) AS year, SUM(amount) AS total_sales
FROM sales
GROUP BY year, product_id
) t;

11. Employees Earning More Than Department Average

SELECT e.*
FROM employees e
JOIN (
SELECT dept_id, AVG(salary) AS avg_sal
FROM employees
GROUP BY dept_id
)d
ON e.dept_id = d.dept_id
WHERE [Link] > d.avg_sal;
12. Find Median Transaction Amount (No Built-in)

SELECT
transaction_amount AS median
FROM (
SELECT
transaction_amount,
ROW_NUMBER() OVER (ORDER BY transaction_amount) AS rn_asc,
ROW_NUMBER() OVER (ORDER BY transaction_amount DESC) AS rn_desc
FROM transactions
)t
WHERE rn_asc = rn_desc
OR rn_asc + 1 = rn_desc;

13. Users Who Placed First Order in Last 30 Days

SELECT user_id, MIN(order_date) AS first_order


FROM orders
GROUP BY user_id
HAVING MIN(order_date) >= CURRENT_DATE() - INTERVAL 30 DAYS;

14. Compare Price Change Between Two Dates per Product

WITH prices AS (
SELECT
product_id,
price,
price_date,
ROW_NUMBER() OVER (PARTITION BY product_id ORDER BY price_date) AS
rn_asc,
ROW_NUMBER() OVER (PARTITION BY product_id ORDER BY price_date DESC)
AS rn_desc
FROM product_prices
WHERE price_date IN ('2025-05-01','2025-06-01')
)
SELECT
p1.product_id,
[Link] AS price_d1,
[Link] AS price_d2,
([Link] - [Link]) / [Link] * 100 AS pct_change
FROM (
SELECT product_id, price FROM prices WHERE price_date = '2025-05-01'
) p1
JOIN (
SELECT product_id, price FROM prices WHERE price_date = '2025-06-01'
) p2 USING (product_id);

15. Customers Whose First and Last Transaction on Same Day

SELECT user_id
FROM (
SELECT
user_id,
MIN(trans_date) AS first,
MAX(trans_date) AS last
FROM transactions
GROUP BY user_id
)t
WHERE first = last;

16. Percentage of Returning Users Each Month

WITH monthly_users AS (
SELECT
user_id,
DATE_FORMAT(event_time,'yyyy-MM') AS ym,
MIN(event_time) AS first_visit
FROM events
GROUP BY user_id, ym
)
SELECT
ym,
COUNT(DISTINCT CASE WHEN first_visit > TRUNC(first_visit, 'MM') THEN user_id
END) * 100.0 /
COUNT(DISTINCT user_id) AS returning_pct
FROM monthly_users
GROUP BY ym
ORDER BY ym;

You might also like