From 38076c193bdd706b13363f0f2a98406e6c6311c5 Mon Sep 17 00:00:00 2001 From: wendy Date: Tue, 25 Jun 2024 18:25:26 +0800 Subject: [PATCH 1/5] clarify questions, instructions according to #181 --- data/instruct_advanced_bigquery.csv | 8 ++++---- data/instruct_advanced_mysql.csv | 8 ++++---- data/instruct_advanced_postgres.csv | 5 +++-- data/instruct_advanced_sqlite.csv | 8 ++++---- data/instruct_advanced_tsql.csv | 8 ++++---- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/data/instruct_advanced_bigquery.csv b/data/instruct_advanced_bigquery.csv index 702db86..cf194f0 100644 --- a/data/instruct_advanced_bigquery.csv +++ b/data/instruct_advanced_bigquery.csv @@ -41,7 +41,7 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,bigquery,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM car_dealership.sales AS s JOIN car_dealership.payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(DATE_DIFF(latest_payment_date, sale_date, DAY)), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,bigquery,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST, crtd_ts DESC NULLS FIRST) AS rn FROM car_dealership.inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM car_dealership.cars AS c JOIN car_dealership.sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC NULLS FIRST;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,bigquery,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST, crtd_ts DESC NULLS FIRST) AS rn FROM car_dealership.inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM car_dealership.cars AS c JOIN car_dealership.sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC NULLS FIRST;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,11 +58,11 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,bigquery,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM car_dealership.sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY ss.total_sales DESC NULLS FIRST) AS sales_rank FROM salesperson_sales AS ss JOIN car_dealership.salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,bigquery,instructions_cte_window,"WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATETIME) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;",What is the total payments received per month? Also calculate the MoM change for each month.,MoM change = (current month value - prev month value). Return months with no payments as 0. MoM will always be zero for the first month that appears in your answer.,"To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,bigquery,instructions_cte_window,"WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATETIME) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales -MoM change = (current month value - prev month value). Return months with no payments as 0." -car_dealership,bigquery,instructions_date_join,"WITH date_range AS (SELECT ARRAY(SELECT AS STRUCT DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL x MONTH) AS month_start FROM UNNEST(GENERATE_ARRAY(0, 5)) AS x) AS months), flattened_date_range AS (SELECT month.month_start FROM date_range, UNNEST(months) MONTH), sales_metrics AS (SELECT DATE_TRUNC(s.sale_date, MONTH) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM car_dealership.sales AS s JOIN car_dealership.salespersons AS sp ON s.salesperson_id = sp.id WHERE EXTRACT(YEAR FROM sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL -6 MONTH) AND s.sale_date < DATE_TRUNC(CURRENT_DATE(), MONTH) GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM flattened_date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Include months where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. +MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." +car_dealership,bigquery,instructions_date_join,"WITH date_range AS (SELECT ARRAY(SELECT AS STRUCT DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL x MONTH) AS month_start FROM UNNEST(GENERATE_ARRAY(0, 5)) AS x) AS months), flattened_date_range AS (SELECT month.month_start FROM date_range, UNNEST(months) MONTH), sales_metrics AS (SELECT DATE_TRUNC(s.sale_date, MONTH) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM car_dealership.sales AS s JOIN car_dealership.salespersons AS sp ON s.salesperson_id = sp.id WHERE EXTRACT(YEAR FROM sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL -6 MONTH) AND s.sale_date < DATE_TRUNC(CURRENT_DATE(), MONTH) GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM flattened_date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. ASP = Average Sale Price during a specific timeframe To calculate the average days between a sale date and when the payment was received, join the relevant tables. TSC = Total Sales Count for a given period" diff --git a/data/instruct_advanced_mysql.csv b/data/instruct_advanced_mysql.csv index ad6999b..669c631 100644 --- a/data/instruct_advanced_mysql.csv +++ b/data/instruct_advanced_mysql.csv @@ -41,7 +41,7 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,mysql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id,s.sale_date,MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id,s.sale_date) SELECT ROUND(AVG(DATEDIFF(latest_payment_date,sale_date)),2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,mysql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,mysql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,11 +58,11 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,mysql,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,mysql,instructions_cte_window,"WITH RECURSIVE monthly_range AS (SELECT MIN(DATE_FORMAT(payment_date, '%Y-%m-01')) AS dt FROM payments_received UNION ALL SELECT DATE_ADD(dt, INTERVAL 1 MONTH) FROM monthly_range WHERE dt < (SELECT MAX(DATE_FORMAT(payment_date, '%Y-%m-01')) FROM payments_received)), monthly_totals AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_totals AS mt ON mr.dt = mt.dt) SELECT m.dt AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;;WITH RECURSIVE monthly_range AS (SELECT DATE_FORMAT(MIN(payment_date), '%Y-%m-01') AS month FROM payments_received UNION ALL SELECT DATE_ADD(month, INTERVAL 1 MONTH) FROM monthly_range WHERE month < (SELECT DATE_FORMAT(MAX(payment_date), '%Y-%m-01') FROM payments_received)), monthly_payments AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS month, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY month), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;;",What is the total payments received per month? Also calculate the MoM change for each month.,MoM change = (current month value - prev month value). Return months with no payments as 0. MoM will always be zero for the first month that appears in your answer.,"To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,mysql,instructions_cte_window,"WITH RECURSIVE monthly_range AS (SELECT MIN(DATE_FORMAT(payment_date, '%Y-%m-01')) AS dt FROM payments_received UNION ALL SELECT DATE_ADD(dt, INTERVAL 1 MONTH) FROM monthly_range WHERE dt < (SELECT MAX(DATE_FORMAT(payment_date, '%Y-%m-01')) FROM payments_received)), monthly_totals AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_totals AS mt ON mr.dt = mt.dt) SELECT m.dt AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;;WITH RECURSIVE monthly_range AS (SELECT DATE_FORMAT(MIN(payment_date), '%Y-%m-01') AS month FROM payments_received UNION ALL SELECT DATE_ADD(month, INTERVAL 1 MONTH) FROM monthly_range WHERE month < (SELECT DATE_FORMAT(MAX(payment_date), '%Y-%m-01') FROM payments_received)), monthly_payments AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS month, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY month), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales -MoM change = (current month value - prev month value). Return months with no payments as 0." -car_dealership,mysql,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 6 MONTH), '%Y-%m-01') AS month_start UNION ALL SELECT DATE_FORMAT(DATE_ADD(month_start, INTERVAL 1 MONTH), '%Y-%m-01') FROM date_range WHERE month_start < DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')), sales_metrics AS (SELECT DATE_FORMAT(s.sale_date, '%Y-%m-01') AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM car_dealership.sales AS s JOIN car_dealership.salespersons AS sp ON s.salesperson_id = sp.id WHERE YEAR(sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 6 MONTH), '%Y-%m-01') AND s.sale_date < DATE_FORMAT(CURDATE(), '%Y-%m-01') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Include months where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. +MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." +car_dealership,mysql,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 6 MONTH), '%Y-%m-01') AS month_start UNION ALL SELECT DATE_FORMAT(DATE_ADD(month_start, INTERVAL 1 MONTH), '%Y-%m-01') FROM date_range WHERE month_start < DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')), sales_metrics AS (SELECT DATE_FORMAT(s.sale_date, '%Y-%m-01') AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM car_dealership.sales AS s JOIN car_dealership.salespersons AS sp ON s.salesperson_id = sp.id WHERE YEAR(sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 6 MONTH), '%Y-%m-01') AND s.sale_date < DATE_FORMAT(CURDATE(), '%Y-%m-01') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. ASP = Average Sale Price during a specific timeframe To calculate the average days between a sale date and when the payment was received, join the relevant tables. TSC = Total Sales Count for a given period" diff --git a/data/instruct_advanced_postgres.csv b/data/instruct_advanced_postgres.csv index 02f2c0e..b302a40 100644 --- a/data/instruct_advanced_postgres.csv +++ b/data/instruct_advanced_postgres.csv @@ -41,10 +41,11 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,instructions_cte_join,"What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales s JOIN payments_received p ON s.id = p.sale_id GROUP BY 1,2) SELECT ROUND(AVG(latest_payment_date - sale_date), 2) AS avg_days_to_payment FROM sale_payments","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,instructions_cte_join,"Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER(PARTITION BY car_id ORDER BY snapshot_date DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars c JOIN sales s ON c.id = s.car_id JOIN latest_inventory_status lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC","Recall that a car can have multiple entries in the inventory_snapshot table. +car_dealership,instructions_cte_join,"Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER(PARTITION BY car_id ORDER BY snapshot_date DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars c JOIN sales s ON c.id = s.car_id JOIN latest_inventory_status lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC","Recall that a car can have multiple entries in the inventory_snapshot table. TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. -ASP = Mean sale price for a designated start period" +ASP = Mean sale price for a designated start period +When getting a car's inventory status, always take the latest status from the inventory_snapshots table" car_dealership,instructions_cte_join,"Who are the top 5 salespersons by total sales amount? Return their ID, first name, last name and total sales amount.","To get the total sales amount per salesperson, join the salespersons and sales tables, group by salesperson, and sum the sale_price. Always order results with NULLS last.","WITH salesperson_sales AS (SELECT s.id, s.first_name, s.last_name, SUM(sa.sale_price) AS total_sales FROM salespersons s LEFT JOIN sales sa ON s.id = sa.salesperson_id GROUP BY s.id) SELECT id, first_name, last_name, total_sales FROM salesperson_sales ORDER BY total_sales DESC NULLS LAST LIMIT 5","PMSR = per month sales revenue Always join sales with cars before using the sales table Weekend days are Saturday and Sunday diff --git a/data/instruct_advanced_sqlite.csv b/data/instruct_advanced_sqlite.csv index b6bd00b..b1373ff 100644 --- a/data/instruct_advanced_sqlite.csv +++ b/data/instruct_advanced_sqlite.csv @@ -41,7 +41,7 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,sqlite,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(julianday(latest_payment_date) - julianday(sale_date)), 2) AS avg_days_to_paymen FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,sqlite,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,sqlite,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,11 +58,11 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,sqlite,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,sqlite,instructions_cte_window,"WITH monthly_totals AS (SELECT strftime('%Y-%m', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT dt, total_payments FROM monthly_totals UNION ALL SELECT strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) AS dt, 0 AS total_payments FROM payments_received, (SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10 UNION ALL SELECT 11) WHERE strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) <= strftime('%Y-%m', 'now') GROUP BY dt) SELECT dt AS MONTH, SUM(total_payments) AS total_payments, SUM(total_payments) - LAG(SUM(total_payments), 1, 0) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero GROUP BY dt ORDER BY dt;;WITH monthly_payments AS (SELECT strftime('%Y-%m', pr.payment_date) AS month, SUM(pr.payment_amount) AS total_payments FROM payments_received AS pr GROUP BY month ORDER BY month), monthly_range AS (WITH RECURSIVE date_range AS (SELECT MIN(strftime('%Y-%m', payment_date)) AS month FROM payments_received UNION ALL SELECT strftime('%Y-%m', date(julianday(month) || '+1 month')) FROM date_range WHERE month < (SELECT MAX(strftime('%Y-%m', payment_date)) FROM payments_received)) SELECT month FROM date_range), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;",What is the total payments received per month? Also calculate the MoM change for each month.,MoM change = (current month value - prev month value). Return months with no payments as 0. MoM will always be zero for the first month that appears in your answer.,"To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,sqlite,instructions_cte_window,"WITH monthly_totals AS (SELECT strftime('%Y-%m', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT dt, total_payments FROM monthly_totals UNION ALL SELECT strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) AS dt, 0 AS total_payments FROM payments_received, (SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10 UNION ALL SELECT 11) WHERE strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) <= strftime('%Y-%m', 'now') GROUP BY dt) SELECT dt AS MONTH, SUM(total_payments) AS total_payments, SUM(total_payments) - LAG(SUM(total_payments), 1, 0) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero GROUP BY dt ORDER BY dt;;WITH monthly_payments AS (SELECT strftime('%Y-%m', pr.payment_date) AS month, SUM(pr.payment_amount) AS total_payments FROM payments_received AS pr GROUP BY month ORDER BY month), monthly_range AS (WITH RECURSIVE date_range AS (SELECT MIN(strftime('%Y-%m', payment_date)) AS month FROM payments_received UNION ALL SELECT strftime('%Y-%m', date(julianday(month) || '+1 month')) FROM date_range WHERE month < (SELECT MAX(strftime('%Y-%m', payment_date)) FROM payments_received)) SELECT month FROM date_range), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales -MoM change = (current month value - prev month value). Return months with no payments as 0." -car_dealership,sqlite,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE('now', '-7 months', 'start of month') AS month_start UNION ALL SELECT DATE(month_start, '+1 month') FROM date_range WHERE month_start < DATE('now', '-1 month', 'start of month')), sales_metrics AS (SELECT strftime('%Y-%m', s.sale_date) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE strftime('%Y', sp.hire_date) BETWEEN '2022' AND '2023' AND s.sale_date >= DATE('now', '-7 months', 'start of month') AND s.sale_date < DATE('now', 'start of month') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON strftime('%Y-%m', dr.month_start) = sm.sale_month ORDER BY dr.month_start ASC;;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Include months where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. +MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." +car_dealership,sqlite,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE('now', '-7 months', 'start of month') AS month_start UNION ALL SELECT DATE(month_start, '+1 month') FROM date_range WHERE month_start < DATE('now', '-1 month', 'start of month')), sales_metrics AS (SELECT strftime('%Y-%m', s.sale_date) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE strftime('%Y', sp.hire_date) BETWEEN '2022' AND '2023' AND s.sale_date >= DATE('now', '-7 months', 'start of month') AND s.sale_date < DATE('now', 'start of month') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON strftime('%Y-%m', dr.month_start) = sm.sale_month ORDER BY dr.month_start ASC;;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. ASP = Average Sale Price during a specific timeframe To calculate the average days between a sale date and when the payment was received, join the relevant tables. TSC = Total Sales Count for a given period" diff --git a/data/instruct_advanced_tsql.csv b/data/instruct_advanced_tsql.csv index 26984a7..67e33b1 100644 --- a/data/instruct_advanced_tsql.csv +++ b/data/instruct_advanced_tsql.csv @@ -41,7 +41,7 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,tsql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date AS sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(DATEDIFF(day, sale_date, latest_payment_date)), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,tsql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id AS car_id, is_in_inventory AS is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = 0 AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY MAX(s.sale_price) DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,tsql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id AS car_id, is_in_inventory AS is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = 0 AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY MAX(s.sale_price) DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,11 +58,11 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,tsql,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id AS salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,tsql,instructions_cte_window,"WITH RecursiveDates AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN(payment_date)), 0) AS dt, DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX(payment_date)), 0) AS max_date FROM payments_received UNION ALL SELECT DATEADD(MONTH, 1, dt), max_date FROM RecursiveDates WHERE dt < max_date ), monthly_totals AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0)), monthly_totals_with_zero AS (SELECT rd.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM RecursiveDates rd LEFT JOIN monthly_totals mt ON rd.dt = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt OPTION (MAXRECURSION 0);",What is the total payments received per month? Also calculate the MoM change for each month.,MoM change = (current month value - prev month value). Return months with no payments as 0. MoM will always be zero for the first month that appears in your answer.,"To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,tsql,instructions_cte_window,"WITH RecursiveDates AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN(payment_date)), 0) AS dt, DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX(payment_date)), 0) AS max_date FROM payments_received UNION ALL SELECT DATEADD(MONTH, 1, dt), max_date FROM RecursiveDates WHERE dt < max_date ), monthly_totals AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0)), monthly_totals_with_zero AS (SELECT rd.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM RecursiveDates rd LEFT JOIN monthly_totals mt ON rd.dt = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt OPTION (MAXRECURSION 0);",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales -MoM change = (current month value - prev month value). Return months with no payments as 0." -car_dealership,tsql,instructions_date_join,"WITH date_range AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()) - number - 1, 0) AS month_start FROM master..spt_values WHERE type = 'P' AND number BETWEEN 0 AND 5), sales_metrics AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, s.sale_date), 0) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE YEAR(sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()) - 6, 0) AND s.sale_date < DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()), 0) GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, s.sale_date), 0)) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Include months where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. +MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." +car_dealership,tsql,instructions_date_join,"WITH date_range AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()) - number - 1, 0) AS month_start FROM master..spt_values WHERE type = 'P' AND number BETWEEN 0 AND 5), sales_metrics AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, s.sale_date), 0) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE YEAR(sp.hire_date) BETWEEN 2022 AND 2023 AND s.sale_date >= DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()) - 6, 0) AND s.sale_date < DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()), 0) GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, s.sale_date), 0)) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON dr.month_start = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. ASP = Average Sale Price during a specific timeframe To calculate the average days between a sale date and when the payment was received, join the relevant tables. TSC = Total Sales Count for a given period" From 082417b9412bbcc3f92a16648032cb82ab976ce3 Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 26 Jun 2024 10:50:41 +0800 Subject: [PATCH 2/5] - Standardize changes of #181 across dialects - Remove 'MoM will always be zero for the first month that appears in your answer.' --- data/instruct_advanced_bigquery.csv | 5 +++-- data/instruct_advanced_mysql.csv | 5 +++-- data/instruct_advanced_postgres.csv | 2 +- data/instruct_advanced_sqlite.csv | 9 +++++---- data/instruct_advanced_tsql.csv | 5 +++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/data/instruct_advanced_bigquery.csv b/data/instruct_advanced_bigquery.csv index cf194f0..b927d91 100644 --- a/data/instruct_advanced_bigquery.csv +++ b/data/instruct_advanced_bigquery.csv @@ -41,7 +41,8 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,bigquery,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM car_dealership.sales AS s JOIN car_dealership.payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(DATE_DIFF(latest_payment_date, sale_date, DAY)), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,bigquery,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST, crtd_ts DESC NULLS FIRST) AS rn FROM car_dealership.inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM car_dealership.cars AS c JOIN car_dealership.sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC NULLS FIRST;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,bigquery,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST) AS rn FROM car_dealership.inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM car_dealership.cars AS c JOIN car_dealership.sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY highest_sale_price DESC NULLS FIRST;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","Recall that a car can have multiple entries in the inventory_snapshot table. +TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,7 +59,7 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,bigquery,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM car_dealership.sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY ss.total_sales DESC NULLS FIRST) AS sales_rank FROM salesperson_sales AS ss JOIN car_dealership.salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,bigquery,instructions_cte_window,"WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATETIME) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,bigquery,instructions_cte_window,"WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;WITH monthly_totals AS (SELECT TIMESTAMP_TRUNC(payment_date, MONTH) AS dt, SUM(payment_amount) AS total_payments FROM car_dealership.payments_received GROUP BY dt), monthly_range AS (SELECT GENERATE_DATE_ARRAY(DATE(TIMESTAMP_TRUNC(MIN(payment_date), MONTH)), DATE(TIMESTAMP_TRUNC(MAX(payment_date), MONTH)), INTERVAL 1 MONTH) AS date_range FROM car_dealership.payments_received), monthly_totals_with_zero AS (SELECT date_range AS dt, COALESCE(mt.total_payments, 0) AS total_payments FROM UNNEST((SELECT date_range FROM monthly_range)) AS date_range LEFT JOIN monthly_totals AS mt ON date_range = mt.dt) SELECT CAST(m.dt AS DATETIME) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." diff --git a/data/instruct_advanced_mysql.csv b/data/instruct_advanced_mysql.csv index 669c631..a9d237b 100644 --- a/data/instruct_advanced_mysql.csv +++ b/data/instruct_advanced_mysql.csv @@ -41,7 +41,8 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,mysql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id,s.sale_date,MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id,s.sale_date) SELECT ROUND(AVG(DATEDIFF(latest_payment_date,sale_date)),2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,mysql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,mysql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","Recall that a car can have multiple entries in the inventory_snapshot table. +TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,7 +59,7 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,mysql,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,mysql,instructions_cte_window,"WITH RECURSIVE monthly_range AS (SELECT MIN(DATE_FORMAT(payment_date, '%Y-%m-01')) AS dt FROM payments_received UNION ALL SELECT DATE_ADD(dt, INTERVAL 1 MONTH) FROM monthly_range WHERE dt < (SELECT MAX(DATE_FORMAT(payment_date, '%Y-%m-01')) FROM payments_received)), monthly_totals AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_totals AS mt ON mr.dt = mt.dt) SELECT m.dt AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;;WITH RECURSIVE monthly_range AS (SELECT DATE_FORMAT(MIN(payment_date), '%Y-%m-01') AS month FROM payments_received UNION ALL SELECT DATE_ADD(month, INTERVAL 1 MONTH) FROM monthly_range WHERE month < (SELECT DATE_FORMAT(MAX(payment_date), '%Y-%m-01') FROM payments_received)), monthly_payments AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS month, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY month), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,mysql,instructions_cte_window,"WITH RECURSIVE monthly_range AS (SELECT MIN(DATE_FORMAT(payment_date, '%Y-%m-01')) AS dt FROM payments_received UNION ALL SELECT DATE_ADD(dt, INTERVAL 1 MONTH) FROM monthly_range WHERE dt < (SELECT MAX(DATE_FORMAT(payment_date, '%Y-%m-01')) FROM payments_received)), monthly_totals AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_totals AS mt ON mr.dt = mt.dt) SELECT m.dt AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero AS m ORDER BY m.dt;WITH RECURSIVE monthly_range AS (SELECT DATE_FORMAT(MIN(payment_date), '%Y-%m-01') AS month FROM payments_received UNION ALL SELECT DATE_ADD(month, INTERVAL 1 MONTH) FROM monthly_range WHERE month < (SELECT DATE_FORMAT(MAX(payment_date), '%Y-%m-01') FROM payments_received)), monthly_payments AS (SELECT DATE_FORMAT(payment_date, '%Y-%m-01') AS month, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY month), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." diff --git a/data/instruct_advanced_postgres.csv b/data/instruct_advanced_postgres.csv index b302a40..fdce4d6 100644 --- a/data/instruct_advanced_postgres.csv +++ b/data/instruct_advanced_postgres.csv @@ -59,7 +59,7 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,instructions_cte_window,"Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY ss.total_sales DESC) AS sales_rank FROM salesperson_sales ss JOIN salespersons s ON ss.salesperson_id = s.id","SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,instructions_cte_window,What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","WITH monthly_totals AS (SELECT DATE_TRUNC('month', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_range AS (SELECT generate_series(DATE_TRUNC('month', MIN(payment_date)), DATE_TRUNC('month', MAX(payment_date)), '1 month'::interval) AS dt FROM payments_received), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range mr LEFT JOIN monthly_totals mt ON mr.dt = mt.dt) SELECT m.dt::DATE AS MONTH, m.total_payments, m.total_payments - lag(m.total_payments, 1) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt;WITH monthly_payments AS (SELECT DATE_TRUNC('month', pr.payment_date) AS MONTH, SUM(pr.payment_amount) AS total_payments FROM payments_received pr GROUP BY MONTH ORDER BY MONTH), monthly_range AS (SELECT generate_series(DATE_TRUNC('month', MIN(pr.payment_date)), DATE_TRUNC('month', MAX(pr.payment_date)), '1 month'::interval) AS MONTH FROM payments_received pr), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range mr LEFT JOIN monthly_payments mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, mp.total_payments - lag(mp.total_payments, 1) OVER (ORDER BY mp.month) AS mom_change FROM monthly_payments_with_zeros mp ORDER BY mp.month;","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,instructions_cte_window,What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments.","WITH monthly_totals AS (SELECT DATE_TRUNC('month', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_range AS (SELECT generate_series(DATE_TRUNC('month', MIN(payment_date)), DATE_TRUNC('month', MAX(payment_date)), '1 month'::interval) AS dt FROM payments_received), monthly_totals_with_zero AS (SELECT mr.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM monthly_range mr LEFT JOIN monthly_totals mt ON mr.dt = mt.dt) SELECT m.dt::DATE AS MONTH, m.total_payments, m.total_payments - lag(m.total_payments, 1) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt;WITH monthly_payments AS (SELECT DATE_TRUNC('month', pr.payment_date) AS MONTH, SUM(pr.payment_amount) AS total_payments FROM payments_received pr GROUP BY MONTH ORDER BY MONTH), monthly_range AS (SELECT generate_series(DATE_TRUNC('month', MIN(pr.payment_date)), DATE_TRUNC('month', MAX(pr.payment_date)), '1 month'::interval) AS MONTH FROM payments_received pr), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range mr LEFT JOIN monthly_payments mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, mp.total_payments - lag(mp.total_payments, 1) OVER (ORDER BY mp.month) AS mom_change FROM monthly_payments_with_zeros mp ORDER BY mp.month;","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." diff --git a/data/instruct_advanced_sqlite.csv b/data/instruct_advanced_sqlite.csv index b1373ff..83cca02 100644 --- a/data/instruct_advanced_sqlite.csv +++ b/data/instruct_advanced_sqlite.csv @@ -41,7 +41,8 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,sqlite,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(julianday(latest_payment_date) - julianday(sale_date)), 2) AS avg_days_to_paymen FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,sqlite,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,sqlite,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","Recall that a car can have multiple entries in the inventory_snapshot table. +TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,11 +59,11 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,sqlite,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,sqlite,instructions_cte_window,"WITH monthly_totals AS (SELECT strftime('%Y-%m', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT dt, total_payments FROM monthly_totals UNION ALL SELECT strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) AS dt, 0 AS total_payments FROM payments_received, (SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10 UNION ALL SELECT 11) WHERE strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) <= strftime('%Y-%m', 'now') GROUP BY dt) SELECT dt AS MONTH, SUM(total_payments) AS total_payments, SUM(total_payments) - LAG(SUM(total_payments), 1, 0) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero GROUP BY dt ORDER BY dt;;WITH monthly_payments AS (SELECT strftime('%Y-%m', pr.payment_date) AS month, SUM(pr.payment_amount) AS total_payments FROM payments_received AS pr GROUP BY month ORDER BY month), monthly_range AS (WITH RECURSIVE date_range AS (SELECT MIN(strftime('%Y-%m', payment_date)) AS month FROM payments_received UNION ALL SELECT strftime('%Y-%m', date(julianday(month) || '+1 month')) FROM date_range WHERE month < (SELECT MAX(strftime('%Y-%m', payment_date)) FROM payments_received)) SELECT month FROM date_range), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, COALESCE(mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month), 0) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,sqlite,instructions_cte_window,"WITH monthly_totals AS (SELECT strftime('%Y-%m', payment_date) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY dt), monthly_totals_with_zero AS (SELECT dt, total_payments FROM monthly_totals UNION ALL SELECT strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) AS dt, 0 AS total_payments FROM payments_received, (SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10 UNION ALL SELECT 11) WHERE strftime('%Y-%m', date(payment_date, 'start of month', '+' || (n || ' month'))) <= strftime('%Y-%m', 'now') GROUP BY dt) SELECT dt AS MONTH, SUM(total_payments) AS total_payments, SUM(total_payments) - LAG(SUM(total_payments), 1) OVER (ORDER BY dt) AS mom_change FROM monthly_totals_with_zero GROUP BY dt ORDER BY dt;WITH RECURSIVE date_range AS (SELECT MIN(strftime('%Y-%m', payment_date)) AS month FROM payments_received UNION ALL SELECT strftime('%Y-%m', date(month || '-01', '+1 month')) FROM date_range WHERE month < (SELECT MAX(strftime('%Y-%m', payment_date)) FROM payments_received)), monthly_payments AS (SELECT strftime('%Y-%m', pr.payment_date) AS month, SUM(pr.payment_amount) AS total_payments FROM payments_received AS pr GROUP BY month ORDER BY month), monthly_range AS (SELECT month FROM date_range), monthly_payments_with_zeros AS (SELECT mr.month, COALESCE(mp.total_payments, 0) AS total_payments FROM monthly_range AS mr LEFT JOIN monthly_payments AS mp ON mr.month = mp.month) SELECT mp.month, mp.total_payments, mp.total_payments - LAG(mp.total_payments, 1) OVER (ORDER BY mp.month) AS mom_change FROM monthly_payments_with_zeros AS mp ORDER BY mp.month;",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." -car_dealership,sqlite,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE('now', '-7 months', 'start of month') AS month_start UNION ALL SELECT DATE(month_start, '+1 month') FROM date_range WHERE month_start < DATE('now', '-1 month', 'start of month')), sales_metrics AS (SELECT strftime('%Y-%m', s.sale_date) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE strftime('%Y', sp.hire_date) BETWEEN '2022' AND '2023' AND s.sale_date >= DATE('now', '-7 months', 'start of month') AND s.sale_date < DATE('now', 'start of month') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON strftime('%Y-%m', dr.month_start) = sm.sale_month ORDER BY dr.month_start ASC;;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. +car_dealership,sqlite,instructions_date_join,"WITH RECURSIVE date_range AS (SELECT DATE('now', '-7 months', 'start of month') AS month_start UNION ALL SELECT DATE(month_start, '+1 month') FROM date_range WHERE month_start < DATE('now', '-1 month', 'start of month')), sales_metrics AS (SELECT strftime('%Y-%m', s.sale_date) AS sale_month, COUNT(s.id) AS PMSPS, SUM(s.sale_price) AS PMSR FROM sales AS s JOIN salespersons AS sp ON s.salesperson_id = sp.id WHERE strftime('%Y', sp.hire_date) BETWEEN '2022' AND '2023' AND s.sale_date >= DATE('now', '-7 months', 'start of month') AND s.sale_date < DATE('now', 'start of month') GROUP BY sale_month) SELECT dr.month_start, COALESCE(sm.PMSPS, 0) AS PMSPS, COALESCE(sm.PMSR, 0) AS PMSR FROM date_range AS dr LEFT JOIN sales_metrics AS sm ON strftime('%Y-%m', dr.month_start) = sm.sale_month ORDER BY dr.month_start ASC;","What are the PMSPS and PMSR in the last 6 months excluding the current month, for salespersons hired between 2022 and 2023 (both inclusive)? Return all months in your answer, including those where metrics are 0. Order by month ascending.",PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation.,"PMSPS = per month salesperson sales count. PMSR = per month sales revenue in dollars. Truncate date to month for aggregation. ASP = Average Sale Price during a specific timeframe To calculate the average days between a sale date and when the payment was received, join the relevant tables. TSC = Total Sales Count for a given period" @@ -70,7 +71,7 @@ car_dealership,sqlite,instructions_date_join,"SELECT COUNT(s.id) AS num_sales, S To get the list of cars that were sold and their sale price, join the cars and sales tables Last 30 days = CURRENT_DATE - INTERVAL '30 days' to CURRENT_DATE. Always join sales with cars before using the sales table. When using car makes, model names, engine_type, and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards." -car_dealership,sqlite,instructions_date_join,"SELECT strftime('%Y-%W', p.payment_date) AS week, COUNT(p.id) AS total_payments, COUNT(CASE WHEN strftime('%w', p.payment_date) IN ('0', '6') THEN 1 END) AS weekend_payments FROM payments_received AS p JOIN sales AS s ON p.sale_id = s.id WHERE s.sale_price > 30000 AND p.payment_date >= date('now', '-8 weeks', 'weekday 0') AND p.payment_date < date('now', 'weekday 0') GROUP BY week ORDER BY week ASC;;","For sales with sale price over $30,000, how many payments were received in total and on weekends in each of the last 8 calendar weeks (excluding the current week)? Return the week (as a date), total payments received, and weekend payments received in ascending order.",Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation.,"To calculate the average days between sale date and payment received date, join the sales and payments received tables Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation. When using car makes, model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards. To get the total sales amount per salesperson, join the salespersons and sales tables, group by salesperson, and sum the sale_price" +car_dealership,sqlite,instructions_date_join,"SELECT strftime('%Y-%W', p.payment_date) AS week, COUNT(p.id) AS total_payments, COUNT(CASE WHEN strftime('%w', p.payment_date) IN ('0', '6') THEN 1 END) AS weekend_payments FROM payments_received AS p JOIN sales AS s ON p.sale_id = s.id WHERE s.sale_price > 30000 AND p.payment_date >= date('now', '-8 weeks', 'weekday 0') AND p.payment_date < date('now', 'weekday 0') GROUP BY week ORDER BY week ASC;","For sales with sale price over $30,000, how many payments were received in total and on weekends in each of the last 8 calendar weeks (excluding the current week)? Return the week (as a date), total payments received, and weekend payments received in ascending order.",Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation.,"To calculate the average days between sale date and payment received date, join the sales and payments received tables Weekend days are Saturday (6) and Sunday (0). Truncate date to week for aggregation. When using car makes, model names, engine_type and vin_number, match case-insensitively and allow partial matches using LIKE with wildcards. To get the total sales amount per salesperson, join the salespersons and sales tables, group by salesperson, and sum the sale_price" car_dealership,sqlite,instructions_date_join,"SELECT c.make, c.model, s.sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN inventory_snapshots AS i ON c.id = i.car_id AND DATE(s.sale_date) = DATE(i.snapshot_date) WHERE i.is_in_inventory = 0 ORDER BY s.sale_price DESC LIMIT 1;","What is the make, model and sale price of the car with the highest sale price that was sold on the same day it went out of inventory?","If inventory snapshots and sales from the same day are to be joined, join on the truncated date fields eg FROM inventory_snapshots i JOIN sales s ON DATE_TRUNC('day', i.snapshot_date) = DATE_TRUNC('day', s.sale_date).","ASP (average selling price) = total sales amount / number of sales To calculate the gross profit margin, join the appropriate tables and calculate the margin If inventory snapshots and sales from the same day are to be joined, join on the truncated date fields eg FROM inventory_snapshots i JOIN sales s ON DATE_TRUNC('day', i.snapshot_date) = DATE_TRUNC('day', s.sale_date). diff --git a/data/instruct_advanced_tsql.csv b/data/instruct_advanced_tsql.csv index 67e33b1..eb6e40f 100644 --- a/data/instruct_advanced_tsql.csv +++ b/data/instruct_advanced_tsql.csv @@ -41,7 +41,8 @@ SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / T TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." car_dealership,tsql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date AS sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(DATEDIFF(day, sale_date, latest_payment_date)), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" -car_dealership,tsql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id AS car_id, is_in_inventory AS is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = 0 AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY MAX(s.sale_price) DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period +car_dealership,tsql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id AS car_id, is_in_inventory AS is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = 0 AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY MAX(s.sale_price) DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest. Use the most recent date in the inventory_snapshots table to determine that car's inventory status.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","Recall that a car can have multiple entries in the inventory_snapshot table. +TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period When getting a car's inventory status, always take the latest status from the inventory_snapshots table" @@ -58,7 +59,7 @@ To get the number of sales made by each salesperson in the past 30 days, join th ASP = Calculate the average sale price without specifying the period GPM = Define gross profit margin as a ratio without specifying how to calculate total revenue or total cost" car_dealership,tsql,instructions_cte_window,"WITH salesperson_sales AS (SELECT salesperson_id AS salesperson_id, SUM(sale_price) AS total_sales, COUNT(*) AS num_sales FROM sales GROUP BY salesperson_id) SELECT s.first_name, s.last_name, ss.total_sales, ss.num_sales, RANK() OVER (ORDER BY CASE WHEN ss.total_sales IS NULL THEN 1 ELSE 0 END DESC, ss.total_sales DESC) AS sales_rank FROM salesperson_sales AS ss JOIN salespersons AS s ON ss.salesperson_id = s.id;","Return the first name, last name, total sales amount, number of sales, and SR for each salesperson",SR = sales rank of each salesperson ordered by their total sales amount descending,"SR = sales rank of each salesperson ordered by their total sales amount descending To determine the sales performance per territory, sum the sales amount and count the sales, grouping by territory To calculate the average sale price, join the sales table with itself on the salesperson_id and find the ratio of total sales amount to number of sales To assess inventory turnover, compare inventory snapshots with sales on matching days, focusing on the quantity of items sold." -car_dealership,tsql,instructions_cte_window,"WITH RecursiveDates AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN(payment_date)), 0) AS dt, DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX(payment_date)), 0) AS max_date FROM payments_received UNION ALL SELECT DATEADD(MONTH, 1, dt), max_date FROM RecursiveDates WHERE dt < max_date ), monthly_totals AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0)), monthly_totals_with_zero AS (SELECT rd.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM RecursiveDates rd LEFT JOIN monthly_totals mt ON rd.dt = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt OPTION (MAXRECURSION 0);",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments. MoM will always be zero for the first month that appears in your answer.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. +car_dealership,tsql,instructions_cte_window,"WITH RecursiveDates AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, MIN(payment_date)), 0) AS dt, DATEADD(MONTH, DATEDIFF(MONTH, 0, MAX(payment_date)), 0) AS max_date FROM payments_received UNION ALL SELECT DATEADD(MONTH, 1, dt), max_date FROM RecursiveDates WHERE dt < max_date ), monthly_totals AS (SELECT DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0) AS dt, SUM(payment_amount) AS total_payments FROM payments_received GROUP BY DATEADD(MONTH, DATEDIFF(MONTH, 0, payment_date), 0)), monthly_totals_with_zero AS (SELECT rd.dt, COALESCE(mt.total_payments, 0) AS total_payments FROM RecursiveDates rd LEFT JOIN monthly_totals mt ON rd.dt = mt.dt) SELECT CAST(m.dt AS DATE) AS MONTH, m.total_payments, m.total_payments - LAG(m.total_payments, 1) OVER (ORDER BY m.dt) AS mom_change FROM monthly_totals_with_zero m ORDER BY m.dt OPTION (MAXRECURSION 0);",What is the total payments received per month? Also calculate the MoM change for each month.,"MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments.","To ascertain the volume of sales conducted by each salesperson over a recent period, merge the salespersons and sales tables, applying a filter for recent sales transactions. To determine the average duration from sale date to payment date, perform a join between the sales and payments tables To calculate the average selling price, join the sales and products tables, group by product name, and compute the ratio of total sales amount to the number of sales MoM change = (current month value - prev month value). Return all months in your answer, including those where there were no payments." From 0b0ed691aecf6542b52cc257c7d793a16abccfb6 Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 26 Jun 2024 10:51:29 +0800 Subject: [PATCH 3/5] correct typo "Only included" --- data/instruct_basic_bigquery.csv | 2 +- data/instruct_basic_mysql.csv | 2 +- data/instruct_basic_postgres.csv | 2 +- data/instruct_basic_sqlite.csv | 2 +- data/instruct_basic_tsql.csv | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data/instruct_basic_bigquery.csv b/data/instruct_basic_bigquery.csv index 392c457..c4d530f 100644 --- a/data/instruct_basic_bigquery.csv +++ b/data/instruct_basic_bigquery.csv @@ -30,7 +30,7 @@ derm_treatment,bigquery,basic_group_order_limit,"SELECT specialty, COUNT(*) AS n derm_treatment,bigquery,basic_left_join,"SELECT p.patient_id, p.first_name, p.last_name FROM derm_treatment.patients AS p LEFT JOIN derm_treatment.treatments AS t ON p.patient_id = t.patient_id WHERE t.patient_id IS NULL;","Return the patient IDs, first names and last names of patients who have not received any treatments." derm_treatment,bigquery,basic_left_join,"SELECT d.drug_id, d.drug_name FROM derm_treatment.drugs AS d LEFT JOIN derm_treatment.treatments AS t ON d.drug_id = t.drug_id WHERE t.drug_id IS NULL;",Return the drug IDs and names of drugs that have not been used in any treatments. ewallet,bigquery,basic_join_date_group_order_limit,"SELECT m.name AS merchant_name, COUNT(t.txid) AS total_transactions, SUM(t.amount) AS total_amount FROM ewallet.merchants AS m JOIN ewallet.wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 AND t.created_at >= CURRENT_DATE - INTERVAL '30' DAY GROUP BY merchant_name ORDER BY total_amount DESC NULLS FIRST LIMIT 5;","Who are the top 5 merchants (receiver type 1) by total transaction amount in the past 30 days (inclusive of 30 days ago)? Return the merchant name, total number of transactions, and total transaction amount." -ewallet,bigquery,basic_join_date_group_order_limit,"SELECT TIMESTAMP_TRUNC(t.created_at, MONTH) AS MONTH, COUNT(DISTINCT t.sender_id) AS active_users FROM ewallet.wallet_transactions_daily AS t JOIN ewallet.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY MONTH ORDER BY MONTH NULLS LAST;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only included successful transactions." +ewallet,bigquery,basic_join_date_group_order_limit,"SELECT TIMESTAMP_TRUNC(t.created_at, MONTH) AS MONTH, COUNT(DISTINCT t.sender_id) AS active_users FROM ewallet.wallet_transactions_daily AS t JOIN ewallet.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY MONTH ORDER BY MONTH NULLS LAST;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only include successful transactions." ewallet,bigquery,basic_join_group_order_limit,"SELECT c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM ewallet.coupons AS c JOIN ewallet.wallet_transactions_daily AS t ON c.cid = t.coupon_id GROUP BY coupon_code ORDER BY redemption_count DESC NULLS FIRST LIMIT 3;","What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed." ewallet,bigquery,basic_join_group_order_limit,"SELECT u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM ewallet.users AS u JOIN ewallet.wallet_transactions_daily AS t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY total_amount DESC NULLS FIRST LIMIT 5;","Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount." ewallet,bigquery,basic_join_distinct,SELECT DISTINCT m.mid AS merchant_id FROM ewallet.merchants AS m JOIN ewallet.wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1;,"Return the distinct list of merchant IDs that have received money from a transaction. Consider all transaction types in the results you return, but only include the merchant ids in your final answer." diff --git a/data/instruct_basic_mysql.csv b/data/instruct_basic_mysql.csv index 43eee65..b3c457f 100644 --- a/data/instruct_basic_mysql.csv +++ b/data/instruct_basic_mysql.csv @@ -30,7 +30,7 @@ derm_treatment,mysql,basic_group_order_limit,"SELECT specialty, COUNT(*) AS num_ derm_treatment,mysql,basic_left_join,"SELECT p.patient_id, p.first_name, p.last_name FROM patients AS p LEFT JOIN treatments AS t ON p.patient_id = t.patient_id WHERE t.patient_id IS NULL;","Return the patient IDs, first names and last names of patients who have not received any treatments." derm_treatment,mysql,basic_left_join,"SELECT d.drug_id, d.drug_name FROM drugs AS d LEFT JOIN treatments AS t ON d.drug_id = t.drug_id WHERE t.drug_id IS NULL;",Return the drug IDs and names of drugs that have not been used in any treatments. ewallet,mysql,basic_join_date_group_order_limit,"SELECT m.name AS merchant_name, COUNT(t.txid) AS total_transactions, SUM(t.amount) AS total_amount FROM ewallet.merchants AS m JOIN ewallet.wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 AND t.created_at >= CURRENT_DATE - INTERVAL 30 DAY GROUP BY m.name ORDER BY total_amount DESC LIMIT 5;","Who are the top 5 merchants (receiver type 1) by total transaction amount in the past 30 days (inclusive of 30 days ago)? Return the merchant name, total number of transactions, and total transaction amount." -ewallet,mysql,basic_join_date_group_order_limit,"SELECT DATE_FORMAT(t.created_at, '%Y-%m-01') AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM ewallet.wallet_transactions_daily AS t JOIN ewallet.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY month ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only included successful transactions." +ewallet,mysql,basic_join_date_group_order_limit,"SELECT DATE_FORMAT(t.created_at, '%Y-%m-01') AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM ewallet.wallet_transactions_daily AS t JOIN ewallet.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY month ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only include successful transactions." ewallet,mysql,basic_join_group_order_limit,"SELECT c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM coupons AS c JOIN wallet_transactions_daily AS t ON c.cid = t.coupon_id GROUP BY c.code ORDER BY CASE WHEN redemption_count IS NULL THEN 1 ELSE 0 END DESC, redemption_count DESC LIMIT 3;","What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed." ewallet,mysql,basic_join_group_order_limit,"SELECT u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM users AS u JOIN wallet_transactions_daily AS t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY CASE WHEN total_amount IS NULL THEN 1 ELSE 0 END DESC, total_amount DESC LIMIT 5;","Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount." ewallet,mysql,basic_join_distinct,SELECT DISTINCT m.mid AS merchant_id FROM merchants AS m JOIN wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1;,"Return the distinct list of merchant IDs that have received money from a transaction. Consider all transaction types in the results you return, but only include the merchant ids in your final answer." diff --git a/data/instruct_basic_postgres.csv b/data/instruct_basic_postgres.csv index b3b80cc..1fd0ddb 100644 --- a/data/instruct_basic_postgres.csv +++ b/data/instruct_basic_postgres.csv @@ -30,7 +30,7 @@ derm_treatment,basic_group_order_limit,What are the top 2 specialties by number derm_treatment,basic_left_join,"Return the patient IDs, first names and last names of patients who have not received any treatments.","SELECT p.patient_id, p.first_name, p.last_name FROM patients p LEFT JOIN treatments t ON p.patient_id = t.patient_id WHERE t.patient_id IS NULL" derm_treatment,basic_left_join,Return the drug IDs and names of drugs that have not been used in any treatments.,"SELECT d.drug_id, d.drug_name FROM drugs d LEFT JOIN treatments t ON d.drug_id = t.drug_id WHERE t.drug_id IS NULL" ewallet,basic_join_date_group_order_limit,"Who are the top 5 merchants (receiver type 1) by total transaction amount in the past 30 days (inclusive of 30 days ago)? Return the merchant name, total number of transactions, and total transaction amount.","SELECT m.name AS merchant_name, COUNT(t.txid) AS total_transactions, SUM(t.amount) AS total_amount FROM consumer_div.merchants m JOIN consumer_div.wallet_transactions_daily t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 AND t.created_at >= CURRENT_DATE - INTERVAL '30 days' GROUP BY m.name ORDER BY total_amount DESC LIMIT 5" -ewallet,basic_join_date_group_order_limit,"How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only included successful transactions.","SELECT DATE_TRUNC('month', t.created_at) AS MONTH, COUNT(DISTINCT t.sender_id) AS active_users FROM consumer_div.wallet_transactions_daily t JOIN consumer_div.users u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY MONTH ORDER BY MONTH" +ewallet,basic_join_date_group_order_limit,"How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only include successful transactions.","SELECT DATE_TRUNC('month', t.created_at) AS MONTH, COUNT(DISTINCT t.sender_id) AS active_users FROM consumer_div.wallet_transactions_daily t JOIN consumer_div.users u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY MONTH ORDER BY MONTH" ewallet,basic_join_group_order_limit,"What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed.","SELECT c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM consumer_div.coupons c JOIN consumer_div.wallet_transactions_daily t ON c.cid = t.coupon_id GROUP BY c.code ORDER BY redemption_count DESC LIMIT 3" ewallet,basic_join_group_order_limit,"Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount.","SELECT u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM consumer_div.users u JOIN consumer_div.wallet_transactions_daily t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY total_amount DESC LIMIT 5" ewallet,basic_join_distinct,"Return the distinct list of merchant IDs that have received money from a transaction. Consider all transaction types in the results you return, but only include the merchant ids in your final answer.",SELECT DISTINCT m.mid AS merchant_id FROM consumer_div.merchants m JOIN consumer_div.wallet_transactions_daily t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 diff --git a/data/instruct_basic_sqlite.csv b/data/instruct_basic_sqlite.csv index 083c963..994b571 100644 --- a/data/instruct_basic_sqlite.csv +++ b/data/instruct_basic_sqlite.csv @@ -30,7 +30,7 @@ derm_treatment,sqlite,basic_group_order_limit,"SELECT specialty, COUNT(*) AS num derm_treatment,sqlite,basic_left_join,"SELECT p.patient_id, p.first_name, p.last_name FROM patients AS p LEFT JOIN treatments AS t ON p.patient_id = t.patient_id WHERE t.patient_id IS NULL;","Return the patient IDs, first names and last names of patients who have not received any treatments." derm_treatment,sqlite,basic_left_join,"SELECT d.drug_id, d.drug_name FROM drugs AS d LEFT JOIN treatments AS t ON d.drug_id = t.drug_id WHERE t.drug_id IS NULL;",Return the drug IDs and names of drugs that have not been used in any treatments. ewallet,sqlite,basic_join_date_group_order_limit,"SELECT m.name AS merchant_name, COUNT(t.txid) AS total_transactions, SUM(t.amount) AS total_amount FROM merchants AS m JOIN wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 AND t.created_at >= DATE('now', '-30 days') GROUP BY m.name ORDER BY total_amount DESC LIMIT 5;","Who are the top 5 merchants (receiver type 1) by total transaction amount in the past 30 days (inclusive of 30 days ago)? Return the merchant name, total number of transactions, and total transaction amount." -ewallet,sqlite,basic_join_date_group_order_limit,"SELECT strftime('%Y-%m', t.created_at) AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM wallet_transactions_daily AS t JOIN users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY month ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only included successful transactions." +ewallet,sqlite,basic_join_date_group_order_limit,"SELECT strftime('%Y-%m', t.created_at) AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM wallet_transactions_daily AS t JOIN users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY month ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only include successful transactions." ewallet,sqlite,basic_join_group_order_limit,"SELECT c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM coupons AS c JOIN wallet_transactions_daily AS t ON c.cid = t.coupon_id GROUP BY c.code ORDER BY CASE WHEN redemption_count IS NULL THEN 1 ELSE 0 END DESC, redemption_count DESC LIMIT 3;","What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed." ewallet,sqlite,basic_join_group_order_limit,"SELECT u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM users AS u JOIN wallet_transactions_daily AS t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY CASE WHEN total_amount IS NULL THEN 1 ELSE 0 END DESC, total_amount DESC LIMIT 5;","Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount." ewallet,sqlite,basic_join_distinct,SELECT DISTINCT m.mid AS merchant_id FROM merchants AS m JOIN wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1;,"Return the distinct list of merchant IDs that have received money from a transaction. Consider all transaction types in the results you return, but only include the merchant ids in your final answer." diff --git a/data/instruct_basic_tsql.csv b/data/instruct_basic_tsql.csv index c9a8229..65896d4 100644 --- a/data/instruct_basic_tsql.csv +++ b/data/instruct_basic_tsql.csv @@ -30,7 +30,7 @@ derm_treatment,tsql,basic_group_order_limit,"SELECT TOP 2 specialty, COUNT(*) AS derm_treatment,tsql,basic_left_join,"SELECT p.patient_id, p.first_name, p.last_name FROM patients AS p LEFT JOIN treatments AS t ON p.patient_id = t.patient_id WHERE t.patient_id IS NULL;","Return the patient IDs, first names and last names of patients who have not received any treatments." derm_treatment,tsql,basic_left_join,"SELECT d.drug_id, d.drug_name FROM drugs AS d LEFT JOIN treatments AS t ON d.drug_id = t.drug_id WHERE t.drug_id IS NULL;",Return the drug IDs and names of drugs that have not been used in any treatments. ewallet,tsql,basic_join_date_group_order_limit,"SELECT TOP 5 m.name AS merchant_name, COUNT(t.txid) AS total_transactions, SUM(t.amount) AS total_amount FROM consumer_div.merchants AS m JOIN consumer_div.wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 AND t.created_at >= DATEADD(DAY, -30, GETDATE()) GROUP BY m.name ORDER BY SUM(t.amount) DESC;","Who are the top 5 merchants (receiver type 1) by total transaction amount in the past 30 days (inclusive of 30 days ago)? Return the merchant name, total number of transactions, and total transaction amount." -ewallet,tsql,basic_join_date_group_order_limit,"SELECT DATEFROMPARTS(YEAR(t.created_at), MONTH(t.created_at), 1) AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM consumer_div.wallet_transactions_daily AS t JOIN consumer_div.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY DATEFROMPARTS(YEAR(t.created_at), MONTH(t.created_at), 1) ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only included successful transactions." +ewallet,tsql,basic_join_date_group_order_limit,"SELECT DATEFROMPARTS(YEAR(t.created_at), MONTH(t.created_at), 1) AS month, COUNT(DISTINCT t.sender_id) AS active_users FROM consumer_div.wallet_transactions_daily AS t JOIN consumer_div.users AS u ON t.sender_id = u.uid WHERE t.sender_type = 0 AND t.status = 'success' AND u.status = 'active' AND t.created_at >= '2023-01-01' AND t.created_at < '2024-01-01' GROUP BY DATEFROMPARTS(YEAR(t.created_at), MONTH(t.created_at), 1) ORDER BY month;","How many distinct active users sent money per month in 2023? Return the number of active users per month (as a date), starting from the earliest date. Do not include merchants in the query. Only include successful transactions." ewallet,tsql,basic_join_group_order_limit,"SELECT TOP 3 c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM consumer_div.coupons AS c JOIN consumer_div.wallet_transactions_daily AS t ON c.cid = t.coupon_id GROUP BY c.code ORDER BY COUNT(t.txid) DESC;","What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed." ewallet,tsql,basic_join_group_order_limit,"SELECT TOP 5 u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM consumer_div.users AS u JOIN consumer_div.wallet_transactions_daily AS t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY SUM(t.amount) DESC;","Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount." ewallet,tsql,basic_join_distinct,SELECT DISTINCT m.mid AS merchant_id FROM consumer_div.merchants AS m JOIN consumer_div.wallet_transactions_daily AS t ON m.mid = t.receiver_id WHERE t.receiver_type = 1;,"Return the distinct list of merchant IDs that have received money from a transaction. Consider all transaction types in the results you return, but only include the merchant ids in your final answer." From ad711455ebbd0836a6ddfe573c20a1e2c3fd2745 Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 26 Jun 2024 10:52:43 +0800 Subject: [PATCH 4/5] remove schema name in instruction --- data/instruct_advanced_bigquery.csv | 4 ++-- data/instruct_advanced_mysql.csv | 4 ++-- data/instruct_advanced_sqlite.csv | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/instruct_advanced_bigquery.csv b/data/instruct_advanced_bigquery.csv index b927d91..34f57c8 100644 --- a/data/instruct_advanced_bigquery.csv +++ b/data/instruct_advanced_bigquery.csv @@ -159,9 +159,9 @@ ewallet,bigquery,instructions_cte_window,"WITH user_balances AS (SELECT user_id, LUB = Latest User Balance, which is the most recent balance for each user To determine user notification preferences, use a join between the users and user_setting_snapshot tables in a CTE, focusing on selecting the most recent snapshot for each user. For analyzing coupon usage, start with a join between the coupons and wallet_transactions_daily tables in a CTE, apply filtering as needed, and then perform aggregation for the total discount amount" -ewallet,bigquery,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user +ewallet,bigquery,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM ewallet.merchants AS m INNER JOIN ewallet.wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY total_revenue DESC NULLS FIRST) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user Merchant category should be matched case-insensitively with wildcards, e.g., using LOWER(merchants.category) LIKE '%...%'. -MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. +MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. To analyze user engagement, join the users and user_sessions tables in a CTE, then aggregate to calculate total session duration per user" ewallet,bigquery,instructions_date_join,"SELECT TIMESTAMP_TRUNC(n.created_at, WEEK) AS WEEK, COUNT(*) AS total_notifications, COUNT(CASE WHEN EXTRACT(DAYOFWEEK FROM n.created_at) IN (1, 7) THEN 1 END) AS weekend_notifications FROM ewallet.notifications AS n JOIN ewallet.users AS u ON n.user_id = u.uid WHERE u.country IN ('US', 'CA') AND n.created_at >= TIMESTAMP_TRUNC(CURRENT_DATE, WEEK) - INTERVAL 3 WEEK AND n.created_at < TIMESTAMP_TRUNC(CURRENT_DATE, WEEK) GROUP BY WEEK;","For users in the US and Canada, how many total notifications were sent in each of the last 3 weeks excluding the current week? How many of those were sent on weekends?",Weekends are Saturdays and Sundays. Truncate created_at to week for aggregation.,"To gauge user engagement, link users to their sessions without defining a specific date range for total session duration analysis To analyze coupon usage, join the coupons and transactions tables to aggregate total discount amounts without specifying a merchant diff --git a/data/instruct_advanced_mysql.csv b/data/instruct_advanced_mysql.csv index a9d237b..f3d0e08 100644 --- a/data/instruct_advanced_mysql.csv +++ b/data/instruct_advanced_mysql.csv @@ -159,9 +159,9 @@ ewallet,mysql,instructions_cte_window,"WITH user_balances AS (SELECT user_id, ba LUB = Latest User Balance, which is the most recent balance for each user To determine user notification preferences, use a join between the users and user_setting_snapshot tables in a CTE, focusing on selecting the most recent snapshot for each user. For analyzing coupon usage, start with a join between the coupons and wallet_transactions_daily tables in a CTE, apply filtering as needed, and then perform aggregation for the total discount amount" -ewallet,mysql,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user +ewallet,mysql,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user Merchant category should be matched case-insensitively with wildcards, e.g., using LOWER(merchants.category) LIKE '%...%'. -MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. +MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. To analyze user engagement, join the users and user_sessions tables in a CTE, then aggregate to calculate total session duration per user" ewallet,mysql,instructions_date_join,"SELECT DATE_FORMAT(n.created_at, '%Y-%u') AS WEEK, COUNT(*) AS total_notifications, COUNT(CASE WHEN WEEKDAY(n.created_at) IN (5, 6) THEN 1 END) AS weekend_notifications FROM ewallet.notifications AS n JOIN ewallet.users AS u ON n.user_id = u.uid WHERE u.country IN ('US', 'CA') AND n.created_at >= DATE_SUB(DATE_FORMAT(CURDATE(), '%Y-%m-%d'), INTERVAL 3 WEEK) AND n.created_at < DATE_FORMAT(CURDATE(), '%Y-%m-%d') GROUP BY WEEK;","For users in the US and Canada, how many total notifications were sent in each of the last 3 weeks excluding the current week? How many of those were sent on weekends?",Weekends are Saturdays and Sundays. Truncate created_at to week for aggregation.,"To gauge user engagement, link users to their sessions without defining a specific date range for total session duration analysis To analyze coupon usage, join the coupons and transactions tables to aggregate total discount amounts without specifying a merchant diff --git a/data/instruct_advanced_sqlite.csv b/data/instruct_advanced_sqlite.csv index 83cca02..20caffb 100644 --- a/data/instruct_advanced_sqlite.csv +++ b/data/instruct_advanced_sqlite.csv @@ -159,9 +159,9 @@ ewallet,sqlite,instructions_cte_window,"WITH user_balances AS (SELECT user_id, b LUB = Latest User Balance, which is the most recent balance for each user To determine user notification preferences, use a join between the users and user_setting_snapshot tables in a CTE, focusing on selecting the most recent snapshot for each user. For analyzing coupon usage, start with a join between the coupons and wallet_transactions_daily tables in a CTE, apply filtering as needed, and then perform aggregation for the total discount amount" -ewallet,sqlite,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user +ewallet,sqlite,instructions_cte_window,"WITH merchant_revenue AS (SELECT m.mid, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;WITH merchant_revenue AS (SELECT m.mid, m.name, m.category AS merchant_category, SUM(w.amount) AS total_revenue FROM merchants AS m INNER JOIN wallet_transactions_daily AS w ON m.mid = w.receiver_id AND w.receiver_type = 1 WHERE w.status = 'success' GROUP BY m.mid, m.name, m.category) SELECT *, RANK() OVER (ORDER BY CASE WHEN total_revenue IS NULL THEN 1 ELSE 0 END DESC, total_revenue DESC) AS mrr FROM merchant_revenue;","What is the MRR for each merchant? Return the merchant name, category, revenue amount, and revenue rank.","MRR = Merchant Revenue Rank, which ranks merchants based on amounts from successfully received transactions only. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue.","To get user notification preferences, join the users and user_setting_snapshot tables in a CTE, then select the latest snapshot for each user Merchant category should be matched case-insensitively with wildcards, e.g., using LOWER(merchants.category) LIKE '%...%'. -MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in consumer_div.wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. +MRR = Merchant Revenue Rank, which ranks merchants based on their total successful received transaction amounts. Filter receiver_type=1 in wallet_transactions_daily for merchants. Merchant with rank 1 has the highest revenue. To analyze user engagement, join the users and user_sessions tables in a CTE, then aggregate to calculate total session duration per user" ewallet,sqlite,instructions_date_join,"SELECT strftime('%Y-%W', n.created_at) AS WEEK, COUNT(*) AS total_notifications, COUNT(CASE WHEN strftime('%w', n.created_at) IN ('0', '6') THEN 1 END) AS weekend_notifications FROM notifications AS n JOIN users AS u ON n.user_id = u.uid WHERE u.country IN ('US', 'CA') AND n.created_at >= date('now', '-21 days', 'weekday 0', '-7 days') AND n.created_at < date('now', 'weekday 0') GROUP BY WEEK;","For users in the US and Canada, how many total notifications were sent in each of the last 3 weeks excluding the current week? How many of those were sent on weekends?",Weekends are Saturdays and Sundays. Truncate created_at to week for aggregation.,"To gauge user engagement, link users to their sessions without defining a specific date range for total session duration analysis To analyze coupon usage, join the coupons and transactions tables to aggregate total discount amounts without specifying a merchant From 6183a530eb9807049c04d39ed81cf00d0b89c01d Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 26 Jun 2024 10:54:12 +0800 Subject: [PATCH 5/5] clarify DDD question --- data/instruct_advanced_bigquery.csv | 2 +- data/instruct_advanced_mysql.csv | 2 +- data/instruct_advanced_postgres.csv | 2 +- data/instruct_advanced_sqlite.csv | 2 +- data/instruct_advanced_tsql.csv | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data/instruct_advanced_bigquery.csv b/data/instruct_advanced_bigquery.csv index 34f57c8..b3cc28e 100644 --- a/data/instruct_advanced_bigquery.csv +++ b/data/instruct_advanced_bigquery.csv @@ -137,7 +137,7 @@ derm_treatment,bigquery,keywords_aggregate,SELECT AVG(weight_kg) AS caw FROM der CAW = cohort average weight in kilograms To calculate the D7D100PIR, subtract the average PASI score at the beginning of the period from the average at the end, divide by the initial average, and multiply by 100 The Defined Daily Dose (DDD) is calculated as the total consumed medication divided by the treatment duration" -derm_treatment,bigquery,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF(DATE_DIFF(t.end_dt, t.start_dt, DAY), 0)) AS ddd FROM derm_treatment.treatments AS t JOIN derm_treatment.drugs AS d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name;",Calculate the DDD for each drug. Return the drug name and DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." +derm_treatment,bigquery,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF(DATE_DIFF(t.end_dt, t.start_dt, DAY), 0)) AS ddd FROM derm_treatment.treatments AS t JOIN derm_treatment.drugs AS d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name;",Calculate the average DDD for each drug. Return the drug name and average DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." derm_treatment,bigquery,keywords_ratio,SELECT (AVG(day100_pasi_score) - AVG(day7_pasi_score)) / AVG(day7_pasi_score) * 100 AS d7d100pir FROM derm_treatment.outcomes WHERE NOT day7_pasi_score IS NULL AND NOT day100_pasi_score IS NULL;,What is the overall D7D100PIR across all treatments? Return the percentage value.,D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints.,"To discover the average weight of patients who have been prescribed a specific medication, begin by associating patients with treatments on patient_id, and then apply a filter by the drug name. D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints. To identify doctors who have prescribed a certain type of drug and their state of practice, initially join doctors with treatments on doc_id, followed by filtering based on the drug type diff --git a/data/instruct_advanced_mysql.csv b/data/instruct_advanced_mysql.csv index f3d0e08..6068bd5 100644 --- a/data/instruct_advanced_mysql.csv +++ b/data/instruct_advanced_mysql.csv @@ -137,7 +137,7 @@ derm_treatment,mysql,keywords_aggregate,SELECT AVG(weight_kg) AS caw FROM patien CAW = cohort average weight in kilograms To calculate the D7D100PIR, subtract the average PASI score at the beginning of the period from the average at the end, divide by the initial average, and multiply by 100 The Defined Daily Dose (DDD) is calculated as the total consumed medication divided by the treatment duration" -derm_treatment,mysql,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE NOT t.end_dt IS NULL GROUP BY d.drug_name;",Calculate the DDD for each drug. Return the drug name and DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." +derm_treatment,mysql,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE NOT t.end_dt IS NULL GROUP BY d.drug_name;",Calculate the average DDD for each drug. Return the drug name and average DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." derm_treatment,mysql,keywords_ratio,SELECT (AVG(day100_pasi_score) - AVG(day7_pasi_score)) / AVG(day7_pasi_score) * 100 AS d7d100pir FROM outcomes WHERE NOT day7_pasi_score IS NULL AND NOT day100_pasi_score IS NULL;,What is the overall D7D100PIR across all treatments? Return the percentage value.,D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints.,"To discover the average weight of patients who have been prescribed a specific medication, begin by associating patients with treatments on patient_id, and then apply a filter by the drug name. D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints. To identify doctors who have prescribed a certain type of drug and their state of practice, initially join doctors with treatments on doc_id, followed by filtering based on the drug type diff --git a/data/instruct_advanced_postgres.csv b/data/instruct_advanced_postgres.csv index fdce4d6..8d708d3 100644 --- a/data/instruct_advanced_postgres.csv +++ b/data/instruct_advanced_postgres.csv @@ -137,7 +137,7 @@ derm_treatment,keywords_aggregate,What is the CAW for male patients,CAW = cohort CAW = cohort average weight in kilograms To calculate the D7D100PIR, subtract the average PASI score at the beginning of the period from the average at the end, divide by the initial average, and multiply by 100 The Defined Daily Dose (DDD) is calculated as the total consumed medication divided by the treatment duration" -derm_treatment,keywords_ratio,Calculate the DDD for each drug. Return the drug name and DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments t JOIN drugs d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." +derm_treatment,keywords_ratio,Calculate the average DDD for each drug. Return the drug name and average DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments t JOIN drugs d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." derm_treatment,keywords_ratio,What is the overall D7D100PIR across all treatments? Return the percentage value.,D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints.,SELECT (AVG(day100_pasi_score) - AVG(day7_pasi_score)) / AVG(day7_pasi_score) * 100 AS d7d100pir FROM outcomes WHERE day7_pasi_score IS NOT NULL AND day100_pasi_score IS NOT NULL,"To discover the average weight of patients who have been prescribed a specific medication, begin by associating patients with treatments on patient_id, and then apply a filter by the drug name. D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints. To identify doctors who have prescribed a certain type of drug and their state of practice, initially join doctors with treatments on doc_id, followed by filtering based on the drug type diff --git a/data/instruct_advanced_sqlite.csv b/data/instruct_advanced_sqlite.csv index 20caffb..827f431 100644 --- a/data/instruct_advanced_sqlite.csv +++ b/data/instruct_advanced_sqlite.csv @@ -137,7 +137,7 @@ derm_treatment,sqlite,keywords_aggregate,SELECT AVG(weight_kg) AS caw FROM patie CAW = cohort average weight in kilograms To calculate the D7D100PIR, subtract the average PASI score at the beginning of the period from the average at the end, divide by the initial average, and multiply by 100 The Defined Daily Dose (DDD) is calculated as the total consumed medication divided by the treatment duration" -derm_treatment,sqlite,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE NOT t.end_dt IS NULL GROUP BY d.drug_name;",Calculate the DDD for each drug. Return the drug name and DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." +derm_treatment,sqlite,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF((t.end_dt - t.start_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE NOT t.end_dt IS NULL GROUP BY d.drug_name;",Calculate the average DDD for each drug. Return the drug name and average DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." derm_treatment,sqlite,keywords_ratio,SELECT (AVG(day100_pasi_score) - AVG(day7_pasi_score)) / AVG(day7_pasi_score) * 100 AS d7d100pir FROM outcomes WHERE NOT day7_pasi_score IS NULL AND NOT day100_pasi_score IS NULL;,What is the overall D7D100PIR across all treatments? Return the percentage value.,D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints.,"To discover the average weight of patients who have been prescribed a specific medication, begin by associating patients with treatments on patient_id, and then apply a filter by the drug name. D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints. To identify doctors who have prescribed a certain type of drug and their state of practice, initially join doctors with treatments on doc_id, followed by filtering based on the drug type diff --git a/data/instruct_advanced_tsql.csv b/data/instruct_advanced_tsql.csv index eb6e40f..c395d80 100644 --- a/data/instruct_advanced_tsql.csv +++ b/data/instruct_advanced_tsql.csv @@ -137,7 +137,7 @@ derm_treatment,tsql,keywords_aggregate,SELECT AVG(weight_kg) AS caw FROM patient CAW = cohort average weight in kilograms To calculate the D7D100PIR, subtract the average PASI score at the beginning of the period from the average at the end, divide by the initial average, and multiply by 100 The Defined Daily Dose (DDD) is calculated as the total consumed medication divided by the treatment duration" -derm_treatment,tsql,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF(DATEDIFF(day, t.start_dt, t.end_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name;",Calculate the DDD for each drug. Return the drug name and DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." +derm_treatment,tsql,keywords_ratio,"SELECT d.drug_name, AVG(t.tot_drug_amt / NULLIF(DATEDIFF(day, t.start_dt, t.end_dt), 0)) AS ddd FROM treatments AS t JOIN drugs AS d ON t.drug_id = d.drug_id WHERE t.end_dt IS NOT NULL GROUP BY d.drug_name;",Calculate the average DDD for each drug. Return the drug name and average DDD value.,"DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days), where end date is not null","DDD (defined daily dose) = total drug amount consumed / total days of treatment (end - start date in days). To find the average weight of patients treated with a specific drug, first join patients with treatments on patient_id, then filter by the drug name. To identify doctors who have prescribed a certain drug type and their respective locations, first join doctors with treatments on doc_id, then filter by the drug type. To calculate the total number of adverse events reported for treatments involving certain drug types, first join treatments with adverse_events on treatment_id, then filter by the drug type." derm_treatment,tsql,keywords_ratio,SELECT (AVG(day100_pasi_score) - AVG(day7_pasi_score)) / AVG(day7_pasi_score) * 100 AS d7d100pir FROM outcomes WHERE NOT day7_pasi_score IS NULL AND NOT day100_pasi_score IS NULL;,What is the overall D7D100PIR across all treatments? Return the percentage value.,D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints.,"To discover the average weight of patients who have been prescribed a specific medication, begin by associating patients with treatments on patient_id, and then apply a filter by the drug name. D7D100PIR (day 7 to day 100 PASI improvement rate) = (avg PASI score on day 100 - avg PASI score on day 7) / avg PASI score on day 7 * 100. This should only include patients who have non-null PASI scores for both timepoints. To identify doctors who have prescribed a certain type of drug and their state of practice, initially join doctors with treatments on doc_id, followed by filtering based on the drug type