17 files changed, 480 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..33e4788
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.db filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e43b0f9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..72d36f9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+# Omaha Incidents
+
+Data from the Omaha police department, used to analyze and visualize statistics.
+
+## TODO
+
+- [x] Import script
+- [ ] Remove duplicate instances of headers being inserted into the database as
+  records
+- [ ] Analysis script
+- [~] Visualization script
+- [ ] Build API to connect to database?
diff --git a/notebooks/db_exploration.ipynb b/notebooks/db_exploration.ipynb
new file mode 100644
index 0000000..68ac55e
--- /dev/null
+++ b/notebooks/db_exploration.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Omaha Incidents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Exploration\n",
+    "\n",
+    "Let\"s explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip3 install ipykernel\n",
+    "!pip3 install --upgrade pandas plotly dash \"nbformat>=4.2.0\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import sqlite3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "connection = sqlite3.connect(\"../raw_data/ingress.db\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cursor = connection.cursor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test query to see if the data loaded\n",
+    "select_all = \"SELECT * FROM incidents;\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_sql_query(select_all, connection)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test plotting by sorting & plotting top 5 crime categories\n",
+    "s = df.value_counts(subset=[\"description\"])\n",
+    "t = s.nlargest(5)\n",
+    "t.head()\n",
+    "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filtered_df = df[(df['date'] > '01/01/2023') & (df['date'] < '12/31/2023')]\n",
+    "filtered_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = px.scatter_mapbox(\n",
+    "    filtered_df,\n",
+    "    lat=\"lat\",\n",
+    "    lon=\"lon\",\n",
+    "    color=\"description\",\n",
+    "    hover_name=\"description\",\n",
+    "    hover_data=[\"date\", \"time\"],\n",
+    "    title=\"Incident Count by Coordinates\",\n",
+    "    center={\"lat\": 41.257160, \"lon\": -95.995102},\n",
+    "    zoom=10\n",
+    ")\n",
+    "\n",
+    "# fig.update_layout(showlegend=False)\n",
+    "fig.update_layout(mapbox_style=\"open-street-map\")\n",
+    "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n",
+    "fig.update_layout(mapbox_bounds={\"west\": -180, \"east\": -50, \"south\": 20, \"north\": 90})\n",
+    "# fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import plotly.io as pio\n",
+    "pio.write_html(fig, file=\"test.html\", auto_open=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up and close it out\n",
+    "connection.commit()\n",
+    "connection.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/raw_data_exploration.ipynb b/notebooks/raw_data_exploration.ipynb
new file mode 100644
index 0000000..46c3a43
--- /dev/null
+++ b/notebooks/raw_data_exploration.ipynb
@@ -0,0 +1,99 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Omaha Incidents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "You must download the data from the URL below first.\n",
+    "\n",
+    "https://police.cityofomaha.org/crime-information/incident-data-download"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Exploration\n",
+    "\n",
+    "Let's explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import data\n",
+    "df = pd.read_csv(\"../raw_data/Incidents_2015.csv\")\n",
+    "\n",
+    "# test to see what the dataframe looks like\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install \"matplotlib\"\n",
+    "import numpy\n",
+    "import matplotlib\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test plotting by sorting & plotting top 5 crime categories\n",
+    "s = df.value_counts(subset=[\"Statute/Ordinance Description\"])\n",
+    "t = s.nlargest(5)\n",
+    "t.head()\n",
+    "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/raw_data/Incidents_2015.csv b/raw_data/Incidents_2015.csv
new file mode 100644
index 0000000..b11b4d3
--- /dev/null
+++ b/raw_data/Incidents_2015.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46504a13c49c3133e4ebfb7329cb6ac04b4571c513782700ca00da2679ba11af
+size 3252033
diff --git a/raw_data/Incidents_2016.csv b/raw_data/Incidents_2016.csv
new file mode 100644
index 0000000..9b1fba0
--- /dev/null
+++ b/raw_data/Incidents_2016.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8faab205c00118f2157b239f71fa184f0d79adba781259931ba81407140fcf92
+size 6418868
diff --git a/raw_data/Incidents_2017.csv b/raw_data/Incidents_2017.csv
new file mode 100644
index 0000000..9c05a93
--- /dev/null
+++ b/raw_data/Incidents_2017.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60b0cff58ec5a0ab7b5ff8bdcd6177f6a8548b4b05715e380c032b1cde5a98b8
+size 6750996
diff --git a/raw_data/Incidents_2018.csv b/raw_data/Incidents_2018.csv
new file mode 100644
index 0000000..2fc1b22
--- /dev/null
+++ b/raw_data/Incidents_2018.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6f3bbdd3832db4d23856620b006a9fc7788df88ce3c107d25729ffafd15af7
+size 6077309
diff --git a/raw_data/Incidents_2019.csv b/raw_data/Incidents_2019.csv
new file mode 100644
index 0000000..ea6de20
--- /dev/null
+++ b/raw_data/Incidents_2019.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:841b4d15ae718ce4dda8382d25140df4452055b603c879c38b57c10c560da11d
+size 5885305
diff --git a/raw_data/Incidents_2020.csv b/raw_data/Incidents_2020.csv
new file mode 100644
index 0000000..9c65927
--- /dev/null
+++ b/raw_data/Incidents_2020.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d49127d47b098f925f168ff9191205b6f1779e8d93914f7c1595c980429276b
+size 5345985
diff --git a/raw_data/Incidents_2021.csv b/raw_data/Incidents_2021.csv
new file mode 100644
index 0000000..d15abff
--- /dev/null
+++ b/raw_data/Incidents_2021.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4742625748b87d0da6e747512343c0476d6dfb8d8be01f6391036ece57cb60
+size 7013108
diff --git a/raw_data/Incidents_2022.csv b/raw_data/Incidents_2022.csv
new file mode 100644
index 0000000..8ccc37c
--- /dev/null
+++ b/raw_data/Incidents_2022.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d511a12df510be0a7aaf0d757ce4240209f539559ffa71c55c9db8da7b7fc0d4
+size 7347606
diff --git a/raw_data/Incidents_2023.csv b/raw_data/Incidents_2023.csv
new file mode 100644
index 0000000..47e44ca
--- /dev/null
+++ b/raw_data/Incidents_2023.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3d38c8415a5fb53b3e5b0cdf5c5d5af8b67f2e2a5da0d0d86df5d7c68da193
+size 7140489
diff --git a/raw_data/ingress.db b/raw_data/ingress.db
new file mode 100644
index 0000000..9ce6d2a
--- /dev/null
+++ b/raw_data/ingress.db
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff5293a40fb466e45ee6b69c85756f3aec3e8704094324616048321decdaae87
+size 43061248
diff --git a/scripts/dashboard.py b/scripts/dashboard.py
new file mode 100644
index 0000000..23e72c4
--- /dev/null
+++ b/scripts/dashboard.py
@@ -0,0 +1,76 @@
+from dash import Dash, html, dcc, callback, Output, Input
+import plotly.express as px
+import pandas as pd
+import sqlite3
+
+# Connect to database and query all incidents
+connection = sqlite3.connect("../raw_data/ingress.db")
+cursor = connection.cursor()
+query = "SELECT * FROM incidents;"
+df = pd.read_sql_query(query, connection).sort_values(by="description")
+
+# Create custom YEAR column to use in dropdown
+df['year'] = df['date'].str[-4:]
+
+# Configure HTML layout
+app = Dash(__name__)
+app.layout = html.Div(children = [
+    html.Div([
+        html.H1(children="Omaha Police Invidents", style={"textAlign":"center"}),
+        dcc.Dropdown(df.sort_values("description").description.unique(), "INJURY", id="bar-dropdown"),
+        dcc.Dropdown(df.sort_values("year").year.unique(), "2023", id="bar-year-dropdown"),
+        dcc.Graph(id="bar-graph")
+    ]),
+    html.Div([
+        html.H2(children="Map Coordinates", style={"textAlign":"center"}),
+        dcc.Dropdown(df.sort_values("description").description.unique(), "INJURY", id="map-dropdown"),
+        dcc.Dropdown(df.sort_values("year").year.unique(), "2023", id="map-year-dropdown"),
+        dcc.Graph(id="map-graph")
+    ])
+])
+
+# Create bar graph
+@callback(
+    Output("bar-graph", "figure"),
+    Input("bar-dropdown", "value"),
+    Input("bar-year-dropdown", "value")
+)
+def update_bar_graph(description, year):
+    dff = df[df.year == year]
+    dff = dff.value_counts(subset=["description"])
+    dff = dff.reset_index()
+    dff = dff[dff.description == description]
+    return px.bar(dff, x="description", y="count")
+
+# Create map
+@callback(
+    Output("map-graph", "figure"),
+    Input("map-dropdown", "value"),
+    Input("map-year-dropdown", "value")
+)
+def update_map(description, year):
+    dff = df[df.year == year]
+    dff = dff.reset_index()
+    dff = dff[dff.description == description]
+
+    fig = px.scatter_mapbox(
+        dff,
+        lat="lat",
+        lon="lon",
+        color="description",
+        hover_name="description",
+        hover_data=["date", "time"],
+        title="Incident Count by Coordinates",
+        center={"lat": 41.257160, "lon": -95.995102},
+        zoom=10
+    )
+    
+    fig.update_layout(showlegend=False)
+    fig.update_layout(mapbox_style="open-street-map")
+    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
+    fig.update_layout(mapbox_bounds={"west": -180, "east": -50, "south": 20, "north": 90})
+
+    return fig
+
+if __name__ == "__main__":
+    app.run(debug=True)
diff --git a/scripts/load.py b/scripts/load.py
new file mode 100644
index 0000000..ea362cb
--- /dev/null
+++ b/scripts/load.py
@@ -0,0 +1,73 @@
+# Import required modules
+import csv
+import sqlite3
+import os
+
+# Create the database file
+connection = sqlite3.connect('../raw_data/ingress.db')
+
+# Creating a cursor object to execute SQL queries
+cursor = connection.cursor()
+
+# Table Definition
+# rb          = RB Number
+# date        = Reported Date
+# time        = Reported Time
+# description = Statute/Ordinance Description
+# location    = Occurred Location
+# district    = Occurred District
+# lat         = Occurred Block LAT
+# lon         = Occurred Block LON
+create_table = '''CREATE TABLE incidents(
+				id INTEGER PRIMARY KEY AUTOINCREMENT,
+                rb TEXT NOT NULL,
+				date TEXT NOT NULL,
+				time TEXT NOT NULL,
+                description TEXT NOT NULL,
+                location TEXT NOT NULL,
+                district TEXT NOT NULL,
+                lat REAL NOT NULL,
+                lon REAL NOT NULL);
+				'''
+
+# Create the table
+cursor.execute(create_table)
+
+# Point to the data directory
+directory = os.fsencode("../raw_data/")
+
+# Loop through all raw data files
+for file in os.listdir(directory):
+    filename = os.fsdecode(file)
+    if filename.endswith(".csv"): 
+        # Opening the file
+        file = open("../raw_data/" + filename)
+
+        # Reading the contents of the file
+        contents = csv.reader(file)
+
+        # SQL query to insert data into the
+        # table
+        insert_records = "INSERT INTO incidents (rb, date, time, description, location, district, lat, lon) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"
+
+        # Importing the contents of the file 
+        # into our table
+        cursor.executemany(insert_records, contents)
+        print("Inserted data from: ", filename)
+        continue
+    else:
+        continue
+
+# Test query to see if the data loaded
+select_all = "SELECT * FROM incidents"
+rows = cursor.execute(select_all).fetchall()
+
+# Output to the console screen
+for r in rows:
+	print(r)
+
+# Commit the changes
+connection.commit()
+
+# Close the database connection
+connection.close()