diff options
Diffstat (limited to 'notebooks/db_exploration.ipynb')
-rw-r--r-- | notebooks/db_exploration.ipynb | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/notebooks/db_exploration.ipynb b/notebooks/db_exploration.ipynb new file mode 100644 index 0000000..68ac55e --- /dev/null +++ b/notebooks/db_exploration.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Omaha Incidents" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Exploration\n", + "\n", + "Let\"s explore the data a little bit to see what kind of analysis and visualizations we want to implement." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install ipykernel\n", + "!pip3 install --upgrade pandas plotly dash \"nbformat>=4.2.0\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "connection = sqlite3.connect(\"../raw_data/ingress.db\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cursor = connection.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test query to see if the data loaded\n", + "select_all = \"SELECT * FROM incidents;\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_sql_query(select_all, connection)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test plotting by sorting & plotting top 5 crime categories\n", + "s = df.value_counts(subset=[\"description\"])\n", + "t = s.nlargest(5)\n", + "t.head()\n", + "t.plot(kind=\"bar\", title=\"Top 5 Incident Categories\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.express as px\n", + "import plotly.graph_objects as go" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_df = df[(df['date'] > '01/01/2023') & (df['date'] < '12/31/2023')]\n", + "filtered_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = px.scatter_mapbox(\n", + " filtered_df,\n", + " lat=\"lat\",\n", + " lon=\"lon\",\n", + " color=\"description\",\n", + " hover_name=\"description\",\n", + " hover_data=[\"date\", \"time\"],\n", + " title=\"Incident Count by Coordinates\",\n", + " center={\"lat\": 41.257160, \"lon\": -95.995102},\n", + " zoom=10\n", + ")\n", + "\n", + "# fig.update_layout(showlegend=False)\n", + "fig.update_layout(mapbox_style=\"open-street-map\")\n", + "fig.update_layout(margin={\"r\": 0, \"t\": 0, \"l\": 0, \"b\": 0})\n", + "fig.update_layout(mapbox_bounds={\"west\": -180, \"east\": -50, \"south\": 20, \"north\": 90})\n", + "# fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.io as pio\n", + "pio.write_html(fig, file=\"test.html\", auto_open=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# clean up and close it out\n", + "connection.commit()\n", + "connection.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} |