aboutsummaryrefslogtreecommitdiff
path: root/notebooks
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks')
-rw-r--r--notebooks/db_exploration.ipynb187
-rw-r--r--notebooks/raw_data_exploration.ipynb99
2 files changed, 286 insertions, 0 deletions
diff --git a/notebooks/db_exploration.ipynb b/notebooks/db_exploration.ipynb
new file mode 100644
index 0000000..68ac55e
--- /dev/null
+++ b/notebooks/db_exploration.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Omaha Incidents"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Exploration\n",
+ "\n",
+ "Let\"s explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip3 install ipykernel\n",
+ "!pip3 install --upgrade pandas plotly dash \"nbformat>=4.2.0\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import sqlite3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "connection = sqlite3.connect(\"../raw_data/ingress.db\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cursor = connection.cursor()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Test query to see if the data loaded\n",
+ "select_all = \"SELECT * FROM incidents;\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_sql_query(select_all, connection)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test plotting by sorting & plotting top 5 crime categories\n",
+ "s = df.value_counts(subset=[\"description\"])\n",
+ "t = s.nlargest(5)\n",
+ "t.head()\n",
+ "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import plotly.express as px\n",
+ "import plotly.graph_objects as go"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "s.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filtered_df = df[(df['date'] > '01/01/2023') & (df['date'] < '12/31/2023')]\n",
+ "filtered_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fig = px.scatter_mapbox(\n",
+ " filtered_df,\n",
+ " lat=\"lat\",\n",
+ " lon=\"lon\",\n",
+ " color=\"description\",\n",
+ " hover_name=\"description\",\n",
+ " hover_data=[\"date\", \"time\"],\n",
+ " title=\"Incident Count by Coordinates\",\n",
+ " center={\"lat\": 41.257160, \"lon\": -95.995102},\n",
+ " zoom=10\n",
+ ")\n",
+ "\n",
+ "# fig.update_layout(showlegend=False)\n",
+ "fig.update_layout(mapbox_style=\"open-street-map\")\n",
+ "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n",
+ "fig.update_layout(mapbox_bounds={\"west\": -180, \"east\": -50, \"south\": 20, \"north\": 90})\n",
+ "# fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import plotly.io as pio\n",
+ "pio.write_html(fig, file=\"test.html\", auto_open=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# clean up and close it out\n",
+ "connection.commit()\n",
+ "connection.close()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/raw_data_exploration.ipynb b/notebooks/raw_data_exploration.ipynb
new file mode 100644
index 0000000..46c3a43
--- /dev/null
+++ b/notebooks/raw_data_exploration.ipynb
@@ -0,0 +1,99 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Omaha Incidents"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "You must download the data from the URL below first.\n",
+ "\n",
+ "https://police.cityofomaha.org/crime-information/incident-data-download"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Exploration\n",
+ "\n",
+ "Let's explore the data a little bit to see what kind of analysis and visualizations we want to implement."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import data\n",
+ "df = pd.read_csv(\"../raw_data/Incidents_2015.csv\")\n",
+ "\n",
+ "# test to see what the dataframe looks like\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !pip install \"matplotlib\"\n",
+ "import numpy\n",
+ "import matplotlib\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test plotting by sorting & plotting top 5 crime categories\n",
+ "s = df.value_counts(subset=[\"Statute/Ordinance Description\"])\n",
+ "t = s.nlargest(5)\n",
+ "t.head()\n",
+ "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}