aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Cleberg <hello@cleberg.net>2023-06-16 16:43:28 -0500
committerChristian Cleberg <hello@cleberg.net>2023-06-16 16:43:28 -0500
commit6cdd053ccbafe5c479ae379448a85569bccdf19d (patch)
tree65d5e1f9f2da99afd94960a8c9e14c69241a8248
parent836841dba83ae4b79009799642a7acdef13ffd48 (diff)
downloaddata-science-6cdd053ccbafe5c479ae379448a85569bccdf19d.tar.gz
data-science-6cdd053ccbafe5c479ae379448a85569bccdf19d.tar.bz2
data-science-6cdd053ccbafe5c479ae379448a85569bccdf19d.zip
add basic sqlite3 analysis script
-rw-r--r--projects/sqlite3-analysis/README.md26
-rw-r--r--projects/sqlite3-analysis/chinook.dbbin0 -> 884736 bytes
-rw-r--r--projects/sqlite3-analysis/employee_sales.csv4
-rw-r--r--projects/sqlite3-analysis/employee_sales.sql49
4 files changed, 79 insertions, 0 deletions
diff --git a/projects/sqlite3-analysis/README.md b/projects/sqlite3-analysis/README.md
new file mode 100644
index 0000000..18a1a43
--- /dev/null
+++ b/projects/sqlite3-analysis/README.md
@@ -0,0 +1,26 @@
+## Instructions
+
+```bash
+# Open the database:
+sqlite3 ./chinook.db
+```
+
+```sql
+# Enable some features and export the CSV results of the script
+.headers on
+.mode csv
+.output /home/cmc/git/data-science/projects/sql-dashboard/employee_sales.csv
+.read employee_sales.sql
+```
+
+```bash
+# Inspect the results
+cat ./employee_sales.csv
+```
+
+```csv
+EmployeeId,LastName,FirstName,Title,CustomerId,SupportRepId,State,Country,InvoiceId,CustomerId,InvoiceDate,InvoiceId,UnitPrice,Quantity,TotalPrice
+3,Peacock,Jane,"Sales Support Agent",1,3,SP,Brazil,98,1,"2010-03-11 00:00:00",98,1.99,1,1.99
+4,Park,Margaret,"Sales Support Agent",4,4,,Norway,2,4,"2009-01-02 00:00:00",2,0.99,1,0.99
+5,Johnson,Steve,"Sales Support Agent",2,5,,Germany,1,2,"2009-01-01 00:00:00",1,0.99,1,0.99
+```
diff --git a/projects/sqlite3-analysis/chinook.db b/projects/sqlite3-analysis/chinook.db
new file mode 100644
index 0000000..2ede95c
--- /dev/null
+++ b/projects/sqlite3-analysis/chinook.db
Binary files differ
diff --git a/projects/sqlite3-analysis/employee_sales.csv b/projects/sqlite3-analysis/employee_sales.csv
new file mode 100644
index 0000000..aef9b99
--- /dev/null
+++ b/projects/sqlite3-analysis/employee_sales.csv
@@ -0,0 +1,4 @@
+EmployeeId,LastName,FirstName,Title,CustomerId,SupportRepId,State,Country,InvoiceId,CustomerId,InvoiceDate,InvoiceId,UnitPrice,Quantity,TotalPrice
+3,Peacock,Jane,"Sales Support Agent",1,3,SP,Brazil,98,1,"2010-03-11 00:00:00",98,1.99,1,1.99
+4,Park,Margaret,"Sales Support Agent",4,4,,Norway,2,4,"2009-01-02 00:00:00",2,0.99,1,0.99
+5,Johnson,Steve,"Sales Support Agent",2,5,,Germany,1,2,"2009-01-01 00:00:00",1,0.99,1,0.99
diff --git a/projects/sqlite3-analysis/employee_sales.sql b/projects/sqlite3-analysis/employee_sales.sql
new file mode 100644
index 0000000..3336f8c
--- /dev/null
+++ b/projects/sqlite3-analysis/employee_sales.sql
@@ -0,0 +1,49 @@
+-- Data ETL business script in sqlite3
+
+-- Create new field(s) for business-requested calculations
+ALTER TABLE invoice_items
+ADD COLUMN TotalPrice
+GENERATED ALWAYS AS (UnitPrice * Quantity);
+
+-- Extract and summarize data to show sales totals per employee
+WITH cst AS (
+ SELECT
+ CustomerId
+ ,SupportRepId
+ ,State
+ ,Country
+ FROM customers
+),
+emp AS (
+ SELECT
+ EmployeeId
+ ,LastName
+ ,FirstName
+ ,Title
+ FROM employees
+),
+inv AS (
+ SELECT
+ InvoiceId
+ ,CustomerId
+ ,InvoiceDate
+ FROM invoices
+),
+tot AS (
+ SELECT
+ InvoiceId
+ ,UnitPrice
+ ,Quantity
+ ,TotalPrice
+ FROM invoice_items
+)
+
+SELECT DISTINCT * FROM emp
+LEFT JOIN cst ON emp.EmployeeId = cst.SupportRepId
+LEFT JOIN inv ON cst.CustomerId = inv.CustomerId
+LEFT JOIN tot ON inv.InvoiceId = tot.InvoiceId
+WHERE inv.InvoiceDate <= date()
+GROUP BY EmployeeId;
+
+-- Drop col used for calculations
+ALTER TABLE invoice_items DROP TotalPrice;