aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes2
-rw-r--r--.gitignore1
-rw-r--r--README.md12
-rw-r--r--notebooks/db_exploration.ipynb187
-rw-r--r--notebooks/raw_data_exploration.ipynb99
-rw-r--r--raw_data/Incidents_2015.csv3
-rw-r--r--raw_data/Incidents_2016.csv3
-rw-r--r--raw_data/Incidents_2017.csv3
-rw-r--r--raw_data/Incidents_2018.csv3
-rw-r--r--raw_data/Incidents_2019.csv3
-rw-r--r--raw_data/Incidents_2020.csv3
-rw-r--r--raw_data/Incidents_2021.csv3
-rw-r--r--raw_data/Incidents_2022.csv3
-rw-r--r--raw_data/Incidents_2023.csv3
-rw-r--r--raw_data/ingress.db3
-rw-r--r--scripts/dashboard.py76
-rw-r--r--scripts/load.py73
17 files changed, 480 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..33e4788
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.db filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e43b0f9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..72d36f9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+# Omaha Incidents
+
+Data from the Omaha police department, used to analyze and visualize statistics.
+
+## TODO
+
+- [x] Import script
+- [ ] Remove duplicate instances of headers being inserted into the database as
+ records
+- [ ] Analysis script
+- [~] Visualization script
+- [ ] Build API to connect to database?
diff --git a/notebooks/db_exploration.ipynb b/notebooks/db_exploration.ipynb
new file mode 100644
index 0000000..68ac55e
--- /dev/null
+++ b/notebooks/db_exploration.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Omaha Incidents"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Exploration\n",
+ "\n",
+ "Let\"s explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip3 install ipykernel\n",
+ "!pip3 install --upgrade pandas plotly dash \"nbformat>=4.2.0\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import sqlite3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "connection = sqlite3.connect(\"../raw_data/ingress.db\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cursor = connection.cursor()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Test query to see if the data loaded\n",
+ "select_all = \"SELECT * FROM incidents;\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_sql_query(select_all, connection)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test plotting by sorting & plotting top 5 crime categories\n",
+ "s = df.value_counts(subset=[\"description\"])\n",
+ "t = s.nlargest(5)\n",
+ "t.head()\n",
+ "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "s.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filtered_df = df[(df['date'] > '01/01/2023') & (df['date'] < '12/31/2023')]\n",
+ "filtered_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = px.scatter_mapbox(\n",
+ " filtered_df,\n",
+ " lat=\"lat\",\n",
+ " lon=\"lon\",\n",
+ " color=\"description\",\n",
+ " hover_name=\"description\",\n",
+ " hover_data=[\"date\", \"time\"],\n",
+ " title=\"Incident Count by Coordinates\",\n",
+ " center={\"lat\": 41.257160, \"lon\": -95.995102},\n",
+ " zoom=10\n",
+ ")\n",
+ "\n",
+ "# fig.update_layout(showlegend=False)\n",
+ "fig.update_layout(mapbox_style=\"open-street-map\")\n",
+ "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n",
+ "fig.update_layout(mapbox_bounds={\"west\": -180, \"east\": -50, \"south\": 20, \"north\": 90})\n",
+ "# fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import plotly.io as pio\n",
+ "pio.write_html(fig, file=\"test.html\", auto_open=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# clean up and close it out\n",
+ "connection.commit()\n",
+ "connection.close()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/raw_data_exploration.ipynb b/notebooks/raw_data_exploration.ipynb
new file mode 100644
index 0000000..46c3a43
--- /dev/null
+++ b/notebooks/raw_data_exploration.ipynb
@@ -0,0 +1,99 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Omaha Incidents"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "You must download the data from the URL below first.\n",
+ "\n",
+ "https://police.cityofomaha.org/crime-information/incident-data-download"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Exploration\n",
+ "\n",
+ "Let's explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import data\n",
+ "df = pd.read_csv(\"../raw_data/Incidents_2015.csv\")\n",
+ "\n",
+ "# test to see what the dataframe looks like\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !pip install \"matplotlib\"\n",
+ "import numpy\n",
+ "import matplotlib\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test plotting by sorting & plotting top 5 crime categories\n",
+ "s = df.value_counts(subset=[\"Statute/Ordinance Description\"])\n",
+ "t = s.nlargest(5)\n",
+ "t.head()\n",
+ "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/raw_data/Incidents_2015.csv b/raw_data/Incidents_2015.csv
new file mode 100644
index 0000000..b11b4d3
--- /dev/null
+++ b/raw_data/Incidents_2015.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46504a13c49c3133e4ebfb7329cb6ac04b4571c513782700ca00da2679ba11af
+size 3252033
diff --git a/raw_data/Incidents_2016.csv b/raw_data/Incidents_2016.csv
new file mode 100644
index 0000000..9b1fba0
--- /dev/null
+++ b/raw_data/Incidents_2016.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8faab205c00118f2157b239f71fa184f0d79adba781259931ba81407140fcf92
+size 6418868
diff --git a/raw_data/Incidents_2017.csv b/raw_data/Incidents_2017.csv
new file mode 100644
index 0000000..9c05a93
--- /dev/null
+++ b/raw_data/Incidents_2017.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60b0cff58ec5a0ab7b5ff8bdcd6177f6a8548b4b05715e380c032b1cde5a98b8
+size 6750996
diff --git a/raw_data/Incidents_2018.csv b/raw_data/Incidents_2018.csv
new file mode 100644
index 0000000..2fc1b22
--- /dev/null
+++ b/raw_data/Incidents_2018.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6f3bbdd3832db4d23856620b006a9fc7788df88ce3c107d25729ffafd15af7
+size 6077309
diff --git a/raw_data/Incidents_2019.csv b/raw_data/Incidents_2019.csv
new file mode 100644
index 0000000..ea6de20
--- /dev/null
+++ b/raw_data/Incidents_2019.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:841b4d15ae718ce4dda8382d25140df4452055b603c879c38b57c10c560da11d
+size 5885305
diff --git a/raw_data/Incidents_2020.csv b/raw_data/Incidents_2020.csv
new file mode 100644
index 0000000..9c65927
--- /dev/null
+++ b/raw_data/Incidents_2020.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d49127d47b098f925f168ff9191205b6f1779e8d93914f7c1595c980429276b
+size 5345985
diff --git a/raw_data/Incidents_2021.csv b/raw_data/Incidents_2021.csv
new file mode 100644
index 0000000..d15abff
--- /dev/null
+++ b/raw_data/Incidents_2021.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4742625748b87d0da6e747512343c0476d6dfb8d8be01f6391036ece57cb60
+size 7013108
diff --git a/raw_data/Incidents_2022.csv b/raw_data/Incidents_2022.csv
new file mode 100644
index 0000000..8ccc37c
--- /dev/null
+++ b/raw_data/Incidents_2022.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d511a12df510be0a7aaf0d757ce4240209f539559ffa71c55c9db8da7b7fc0d4
+size 7347606
diff --git a/raw_data/Incidents_2023.csv b/raw_data/Incidents_2023.csv
new file mode 100644
index 0000000..47e44ca
--- /dev/null
+++ b/raw_data/Incidents_2023.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3d38c8415a5fb53b3e5b0cdf5c5d5af8b67f2e2a5da0d0d86df5d7c68da193
+size 7140489
diff --git a/raw_data/ingress.db b/raw_data/ingress.db
new file mode 100644
index 0000000..9ce6d2a
--- /dev/null
+++ b/raw_data/ingress.db
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff5293a40fb466e45ee6b69c85756f3aec3e8704094324616048321decdaae87
+size 43061248
diff --git a/scripts/dashboard.py b/scripts/dashboard.py
new file mode 100644
index 0000000..23e72c4
--- /dev/null
+++ b/scripts/dashboard.py
@@ -0,0 +1,76 @@
+from dash import Dash, html, dcc, callback, Output, Input
+import plotly.express as px
+import pandas as pd
+import sqlite3
+
+# Connect to database and query all incidents
+connection = sqlite3.connect("../raw_data/ingress.db")
+cursor = connection.cursor()
+query = "SELECT * FROM incidents;"
+df = pd.read_sql_query(query, connection).sort_values(by="description")
+
+# Create custom YEAR column to use in dropdown
+df['year'] = df['date'].str[-4:]
+
+# Configure HTML layout
+app = Dash(__name__)
+app.layout = html.Div(children = [
+ html.Div([
+ html.H1(children="Omaha Police Invidents", style={"textAlign":"center"}),
+ dcc.Dropdown(df.sort_values("description").description.unique(), "INJURY", id="bar-dropdown"),
+ dcc.Dropdown(df.sort_values("year").year.unique(), "2023", id="bar-year-dropdown"),
+ dcc.Graph(id="bar-graph")
+ ]),
+ html.Div([
+ html.H2(children="Map Coordinates", style={"textAlign":"center"}),
+ dcc.Dropdown(df.sort_values("description").description.unique(), "INJURY", id="map-dropdown"),
+ dcc.Dropdown(df.sort_values("year").year.unique(), "2023", id="map-year-dropdown"),
+ dcc.Graph(id="map-graph")
+ ])
+])
+
+# Create bar graph
+@callback(
+ Output("bar-graph", "figure"),
+ Input("bar-dropdown", "value"),
+ Input("bar-year-dropdown", "value")
+)
+def update_bar_graph(description, year):
+ dff = df[df.year == year]
+ dff = dff.value_counts(subset=["description"])
+ dff = dff.reset_index()
+ dff = dff[dff.description == description]
+ return px.bar(dff, x="description", y="count")
+
+# Create map
+@callback(
+ Output("map-graph", "figure"),
+ Input("map-dropdown", "value"),
+ Input("map-year-dropdown", "value")
+)
+def update_map(description, year):
+ dff = df[df.year == year]
+ dff = dff.reset_index()
+ dff = dff[dff.description == description]
+
+ fig = px.scatter_mapbox(
+ dff,
+ lat="lat",
+ lon="lon",
+ color="description",
+ hover_name="description",
+ hover_data=["date", "time"],
+ title="Incident Count by Coordinates",
+ center={"lat": 41.257160, "lon": -95.995102},
+ zoom=10
+ )
+
+ fig.update_layout(showlegend=False)
+ fig.update_layout(mapbox_style="open-street-map")
+ fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
+ fig.update_layout(mapbox_bounds={"west": -180, "east": -50, "south": 20, "north": 90})
+
+ return fig
+
+if __name__ == "__main__":
+ app.run(debug=True)
diff --git a/scripts/load.py b/scripts/load.py
new file mode 100644
index 0000000..ea362cb
--- /dev/null
+++ b/scripts/load.py
@@ -0,0 +1,73 @@
+# Import required modules
+import csv
+import sqlite3
+import os
+
+# Create the database file
+connection = sqlite3.connect('../raw_data/ingress.db')
+
+# Creating a cursor object to execute SQL queries
+cursor = connection.cursor()
+
+# Table Definition
+# rb = RB Number
+# date = Reported Date
+# time = Reported Time
+# description = Statute/Ordinance Description
+# location = Occurred Location
+# district = Occurred District
+# lat = Occurred Block LAT
+# lon = Occurred Block LON
+create_table = '''CREATE TABLE incidents(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ rb TEXT NOT NULL,
+ date TEXT NOT NULL,
+ time TEXT NOT NULL,
+ description TEXT NOT NULL,
+ location TEXT NOT NULL,
+ district TEXT NOT NULL,
+ lat REAL NOT NULL,
+ lon REAL NOT NULL);
+ '''
+
+# Create the table
+cursor.execute(create_table)
+
+# Point to the data directory
+directory = os.fsencode("../raw_data/")
+
+# Loop through all raw data files
+for file in os.listdir(directory):
+ filename = os.fsdecode(file)
+ if filename.endswith(".csv"):
+ # Opening the file
+ file = open("../raw_data/" + filename)
+
+ # Reading the contents of the file
+ contents = csv.reader(file)
+
+ # SQL query to insert data into the
+ # table
+ insert_records = "INSERT INTO incidents (rb, date, time, description, location, district, lat, lon) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"
+
+ # Importing the contents of the file
+ # into our table
+ cursor.executemany(insert_records, contents)
+ print("Inserted data from: ", filename)
+ continue
+ else:
+ continue
+
+# Test query to see if the data loaded
+select_all = "SELECT * FROM incidents"
+rows = cursor.execute(select_all).fetchall()
+
+# Output to the console screen
+for r in rows:
+ print(r)
+
+# Commit the changes
+connection.commit()
+
+# Close the database connection
+connection.close()