From 836841dba83ae4b79009799642a7acdef13ffd48 Mon Sep 17 00:00:00 2001 From: Christian Cleberg Date: Mon, 22 May 2023 15:39:44 -0500 Subject: initial commit --- notebooks/Lincoln_Business_Clusters.ipynb | 1610 +++++++++++++++++++++++++++++ 1 file changed, 1610 insertions(+) create mode 100644 notebooks/Lincoln_Business_Clusters.ipynb (limited to 'notebooks/Lincoln_Business_Clusters.ipynb') diff --git a/notebooks/Lincoln_Business_Clusters.ipynb b/notebooks/Lincoln_Business_Clusters.ipynb new file mode 100644 index 0000000..c01526e --- /dev/null +++ b/notebooks/Lincoln_Business_Clusters.ipynb @@ -0,0 +1,1610 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting package metadata (current_repodata.json): ...working... done\n", + "Solving environment: ...working... done\n", + "\n", + "# All requested packages already installed.\n", + "\n", + "Requirement already satisfied: geopy in c:\\users\\my_user\\anaconda3\\lib\\site-packages (2.0.0)\n", + "Requirement already satisfied: geographiclib<2,>=1.49 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geopy) (1.50)\n", + "Requirement already satisfied: folium in c:\\users\\my_user\\anaconda3\\lib\\site-packages (0.11.0)\n", + "Requirement already satisfied: numpy in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (1.18.1)\n", + "Requirement already satisfied: jinja2>=2.9 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (2.11.1)\n", + "Requirement already satisfied: branca>=0.3.0 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (0.4.1)\n", + "Requirement already satisfied: requests in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (2.22.0)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from jinja2>=2.9->folium) (1.1.1)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (2.8)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (1.25.8)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (2019.11.28)\n" + ] + } + ], + "source": [ + "# Optionally, install needed libraries\n", + "!conda install -c conda-forge geocoder --yes\n", + "!pip install geopy\n", + "!pip install folium" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the necessary libraries\n", + "import pandas as pd\n", + "import requests\n", + "import folium\n", + "import math\n", + "import json\n", + "from pandas.io.json import json_normalize\n", + "from sklearn.cluster import KMeans" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Define the latitude and longitude of Lincoln, then map the results\n", + "latitude = 40.806862\n", + "longitude = -96.681679\n", + "map_LNK = folium.Map(location=[latitude, longitude], zoom_start=12)\n", + " \n", + "map_LNK" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Important: The Foursquare API only returns 100 venues at a time using this endpoint.\n", + "Pull data back in 3 groups to get all 232 results" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Foursquare API credentials\n", + "CLIENT_ID = 'MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN'\n", + "CLIENT_SECRET = 'YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ'\n", + "VERSION = '20180604'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set up the URL to fetch the first 100 results\n", + "LIMIT = 100\n", + "radius = 10000\n", + "url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n", + " CLIENT_ID, \n", + " CLIENT_SECRET, \n", + " VERSION, \n", + " latitude, \n", + " longitude, \n", + " radius, \n", + " LIMIT)\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Fetch the first 100 results\n", + "results = requests.get(url).json()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "232" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Determine the total number of results needed to fetch\n", + "totalResults = results['response']['totalResults']\n", + "totalResults" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100&offset=100'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set up the URL to fetch the second 100 results (101-200)\n", + "LIMIT = 100\n", + "offset = 100\n", + "radius = 10000\n", + "url2 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&offset={}'.format(\n", + " CLIENT_ID, \n", + " CLIENT_SECRET, \n", + " VERSION, \n", + " latitude, \n", + " longitude, \n", + " radius, \n", + " LIMIT,\n", + " offset)\n", + "url2" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Fetch the second 100 results (101-200)\n", + "results2 = requests.get(url2).json()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100&offset=200'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set up the URL to fetch the final results (201 - 232)\n", + "LIMIT = 100\n", + "offset = 200\n", + "radius = 10000\n", + "url3 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&offset={}'.format(\n", + " CLIENT_ID, \n", + " CLIENT_SECRET, \n", + " VERSION, \n", + " latitude, \n", + " longitude, \n", + " radius, \n", + " LIMIT,\n", + " offset)\n", + "url3" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Fetch the final results (201 - 232)\n", + "results3 = requests.get(url3).json()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# This function will extract the category of the venue from the API dictionary\n", + "def get_category_type(row):\n", + " try:\n", + " categories_list = row['categories']\n", + " except:\n", + " categories_list = row['venue.categories']\n", + " \n", + " if len(categories_list) == 0:\n", + " return None\n", + " else:\n", + " return categories_list[0]['name']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
95Jack's Bar & GrillBar40.813895-96.709815
96Trade A Tape Comic Book CenterComic Shop40.812996-96.708490
97MazatlanMexican Restaurant40.815196-96.625641
98Rosie'sBar40.765226-96.700645
99Buzzard Billy'sCajun / Creole Restaurant40.815546-96.710123
\n", + "

100 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " name categories lat \\\n", + "0 Sunken Gardens Garden 40.802319 \n", + "1 Lincoln Children's Zoo Zoo 40.800463 \n", + "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 \n", + "3 Yia Yia's Pizza Place 40.813599 \n", + "4 Antelope Park Park 40.796821 \n", + ".. ... ... ... \n", + "95 Jack's Bar & Grill Bar 40.813895 \n", + "96 Trade A Tape Comic Book Center Comic Shop 40.812996 \n", + "97 Mazatlan Mexican Restaurant 40.815196 \n", + "98 Rosie's Bar 40.765226 \n", + "99 Buzzard Billy's Cajun / Creole Restaurant 40.815546 \n", + "\n", + " lng \n", + "0 -96.683149 \n", + "1 -96.680036 \n", + "2 -96.701008 \n", + "3 -96.700540 \n", + "4 -96.674826 \n", + ".. ... \n", + "95 -96.709815 \n", + "96 -96.708490 \n", + "97 -96.625641 \n", + "98 -96.700645 \n", + "99 -96.710123 \n", + "\n", + "[100 rows x 4 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the first 100 venues\n", + "venues = results['response']['groups'][0]['items']\n", + "nearby_venues = json_normalize(venues)\n", + "\n", + "# filter columns\n", + "filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", + "nearby_venues = nearby_venues.loc[:, filtered_columns]\n", + "\n", + "# filter the category for each row\n", + "nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)\n", + "\n", + "# clean columns\n", + "nearby_venues.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n", + "nearby_venues" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
95Engine House CafeAmerican Restaurant40.857195-96.637721
96Marcus Edgewood CinemaMovie Theater40.760190-96.642499
97Victoria's SecretLingerie Store40.742087-96.679395
98Pancho Villa Mexican GrillMexican Restaurant40.860718-96.640711
99Popeyes Louisiana KitchenFried Chicken Joint40.768581-96.624462
\n", + "

200 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " name categories lat lng\n", + "0 Sunken Gardens Garden 40.802319 -96.683149\n", + "1 Lincoln Children's Zoo Zoo 40.800463 -96.680036\n", + "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 -96.701008\n", + "3 Yia Yia's Pizza Place 40.813599 -96.700540\n", + "4 Antelope Park Park 40.796821 -96.674826\n", + ".. ... ... ... ...\n", + "95 Engine House Cafe American Restaurant 40.857195 -96.637721\n", + "96 Marcus Edgewood Cinema Movie Theater 40.760190 -96.642499\n", + "97 Victoria's Secret Lingerie Store 40.742087 -96.679395\n", + "98 Pancho Villa Mexican Grill Mexican Restaurant 40.860718 -96.640711\n", + "99 Popeyes Louisiana Kitchen Fried Chicken Joint 40.768581 -96.624462\n", + "\n", + "[200 rows x 4 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the second 100 venues\n", + "venues2 = results2['response']['groups'][0]['items']\n", + "nearby_venues2 = json_normalize(venues2) # flatten JSON\n", + "\n", + "# filter columns\n", + "filtered_columns2 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", + "nearby_venues2 = nearby_venues2.loc[:, filtered_columns]\n", + "\n", + "# filter the category for each row\n", + "nearby_venues2['venue.categories'] = nearby_venues2.apply(get_category_type, axis=1)\n", + "\n", + "# clean columns\n", + "nearby_venues2.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n", + "\n", + "nearby_venues = nearby_venues.append(nearby_venues2)\n", + "nearby_venues" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
227PepperJax GrillAmerican Restaurant40.738810-96.680150
228ALDIGrocery Store40.723974-96.681705
229Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
230Hampton Inn & SuitesHotel40.887348-96.678617
231Super SaverSupermarket40.878144-96.735121
\n", + "

232 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " name categories lat lng\n", + "0 Sunken Gardens Garden 40.802319 -96.683149\n", + "1 Lincoln Children's Zoo Zoo 40.800463 -96.680036\n", + "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 -96.701008\n", + "3 Yia Yia's Pizza Place 40.813599 -96.700540\n", + "4 Antelope Park Park 40.796821 -96.674826\n", + ".. ... ... ... ...\n", + "227 PepperJax Grill American Restaurant 40.738810 -96.680150\n", + "228 ALDI Grocery Store 40.723974 -96.681705\n", + "229 Crete Carrier/Shaffer Trucking Building 40.816108 -96.795131\n", + "230 Hampton Inn & Suites Hotel 40.887348 -96.678617\n", + "231 Super Saver Supermarket 40.878144 -96.735121\n", + "\n", + "[232 rows x 4 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the rest of the venues\n", + "venues3 = results3['response']['groups'][0]['items']\n", + "nearby_venues3 = json_normalize(venues3) # flatten JSON\n", + "\n", + "# filter columns\n", + "filtered_columns3 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", + "nearby_venues3 = nearby_venues3.loc[:, filtered_columns]\n", + "\n", + "# filter the category for each row\n", + "nearby_venues3['venue.categories'] = nearby_venues3.apply(get_category_type, axis=1)\n", + "\n", + "# clean columns\n", + "nearby_venues3.columns = [col.split(\".\")[-1] for col in nearby_venues3.columns]\n", + "\n", + "nearby_venues = nearby_venues.append(nearby_venues3)\n", + "nearby_venues = nearby_venues.reset_index(drop=True)\n", + "nearby_venues" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add markers to map\n", + "for lat, lng, name, categories in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'], nearby_venues['categories']):\n", + " label = '{} ({})'.format(name, categories)\n", + " label = folium.Popup(label, parse_html=True)\n", + " folium.CircleMarker(\n", + " [lat, lng],\n", + " radius=5,\n", + " popup=label,\n", + " color='blue',\n", + " fill=True,\n", + " fill_color='#3186cc',\n", + " fill_opacity=0.7,\n", + " ).add_to(map_LNK)\n", + "\n", + "map_LNK" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# This function will return the sum of squares found in the data\n", + "def calculate_wcss(data):\n", + " wcss = []\n", + " for n in range(2, 21):\n", + " kmeans = KMeans(n_clusters=n)\n", + " kmeans.fit(X=data)\n", + " wcss.append(kmeans.inertia_)\n", + "\n", + " return wcss" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
latlng
040.802319-96.683149
140.800463-96.680036
240.814234-96.701008
340.813599-96.700540
440.796821-96.674826
.........
22740.738810-96.680150
22840.723974-96.681705
22940.816108-96.795131
23040.887348-96.678617
23140.878144-96.735121
\n", + "

232 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " lat lng\n", + "0 40.802319 -96.683149\n", + "1 40.800463 -96.680036\n", + "2 40.814234 -96.701008\n", + "3 40.813599 -96.700540\n", + "4 40.796821 -96.674826\n", + ".. ... ...\n", + "227 40.738810 -96.680150\n", + "228 40.723974 -96.681705\n", + "229 40.816108 -96.795131\n", + "230 40.887348 -96.678617\n", + "231 40.878144 -96.735121\n", + "\n", + "[232 rows x 2 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Drop 'str' cols so we can use k-means clustering\n", + "cluster_df = nearby_venues.drop(columns=['name', 'categories'])\n", + "cluster_df" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.34804817245236686,\n", + " 0.2123621655263781,\n", + " 0.15941308819059363,\n", + " 0.1124422749740121,\n", + " 0.08793974648902303,\n", + " 0.07787546934076488,\n", + " 0.06760201626926661,\n", + " 0.058603531149823695,\n", + " 0.05307694673631565,\n", + " 0.046405418408475035,\n", + " 0.04248508032750019,\n", + " 0.03737141937875458,\n", + " 0.03418960068046657,\n", + " 0.030142252086911396,\n", + " 0.027788439492930205,\n", + " 0.025640780223738995,\n", + " 0.023750332291644125,\n", + " 0.022590054827965667,\n", + " 0.019589286338306075]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# calculating the within clusters sum-of-squares for 19 cluster amounts\n", + "sum_of_squares = calculate_wcss(cluster_df)\n", + "sum_of_squares" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# This function will return the optimal number of clusters\n", + "def optimal_number_of_clusters(wcss):\n", + " x1, y1 = 2, wcss[0]\n", + " x2, y2 = 20, wcss[len(wcss)-1]\n", + "\n", + " distances = []\n", + " for i in range(len(wcss)):\n", + " x0 = i+2\n", + " y0 = wcss[i]\n", + " numerator = abs((y2-y1)*x0 - (x2-x1)*y0 + x2*y1 - y2*x1)\n", + " denominator = math.sqrt((y2 - y1)**2 + (x2 - x1)**2)\n", + " distances.append(numerator/denominator)\n", + " \n", + " return distances.index(max(distances)) + 2" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# calculating the optimal number of clusters\n", + "n = optimal_number_of_clusters(sum_of_squares)\n", + "n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Labels: \n", + " [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 2 2 0 2 2 2 2 2 2\n", + " 0 2 2 2 2 2 2 2 0 0 2 3 0 0 2 0 2 2 2 3 2 2 2 0 2 2 2 4 0 0 4 2 2 0 3 0 0\n", + " 2 3 5 2 3 3 0 3 5 0 2 0 2 0 2 0 2 0 0 2 0 2 2 0 1 2 2 5 2 1 0 0 3 3 5 3 3\n", + " 0 0 0 5 4 1 0 0 3 3 3 3 1 1 0 4 5 1 3 0 0 3 3 4 4 3 1 3 3 5 3 4 4 0 3 3 3\n", + " 1 0 2 1 1 3 1 1 3 5 1 1 3 4 4 0 3 0 0 1 5 3 3 3 3 0 1 3 3 1 1 3 4 3 3 1 1\n", + " 4 1 5 3 1 3 1 4 3 1 0 3 1 0 3 1 3 1 1 3 4 1 3 1 1 1 4 1 1 1 1 1 1 1 3 1 4\n", + " 4 4 3 4 5 1 1 5 4 4] \n", + "\n", + "Cluster centers: \n", + " [[ 40.820588 -96.63913461]\n", + " [ 40.74178794 -96.68440311]\n", + " [ 40.81084834 -96.70181345]\n", + " [ 40.76406174 -96.64015094]\n", + " [ 40.86489106 -96.69252935]\n", + " [ 40.80880071 -96.75189142]]\n" + ] + } + ], + "source": [ + "# set number of clusters equal to the optimal number\n", + "kclusters = n\n", + "\n", + "# run k-means clustering\n", + "kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cluster_df)\n", + "\n", + "# check cluster labels generated for each row in the dataframe\n", + "print(\"Labels:\", \"\\n\", kmeans.labels_, \"\\n\")\n", + "print(\"Cluster centers:\", \"\\n\", kmeans.cluster_centers_)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Cluster Labelsnamecategorieslatlng
02Sunken GardensGarden40.802319-96.683149
12Lincoln Children's ZooZoo40.800463-96.680036
22Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
32Yia Yia'sPizza Place40.813599-96.700540
42Antelope ParkPark40.796821-96.674826
..................
2271PepperJax GrillAmerican Restaurant40.738810-96.680150
2281ALDIGrocery Store40.723974-96.681705
2295Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
2304Hampton Inn & SuitesHotel40.887348-96.678617
2314Super SaverSupermarket40.878144-96.735121
\n", + "

232 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Cluster Labels name categories \\\n", + "0 2 Sunken Gardens Garden \n", + "1 2 Lincoln Children's Zoo Zoo \n", + "2 2 Honest Abe's Burgers & Freedom Burger Joint \n", + "3 2 Yia Yia's Pizza Place \n", + "4 2 Antelope Park Park \n", + ".. ... ... ... \n", + "227 1 PepperJax Grill American Restaurant \n", + "228 1 ALDI Grocery Store \n", + "229 5 Crete Carrier/Shaffer Trucking Building \n", + "230 4 Hampton Inn & Suites Hotel \n", + "231 4 Super Saver Supermarket \n", + "\n", + " lat lng \n", + "0 40.802319 -96.683149 \n", + "1 40.800463 -96.680036 \n", + "2 40.814234 -96.701008 \n", + "3 40.813599 -96.700540 \n", + "4 40.796821 -96.674826 \n", + ".. ... ... \n", + "227 40.738810 -96.680150 \n", + "228 40.723974 -96.681705 \n", + "229 40.816108 -96.795131 \n", + "230 40.887348 -96.678617 \n", + "231 40.878144 -96.735121 \n", + "\n", + "[232 rows x 5 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add clustering labels to dataframe\n", + "nearby_venues.insert(0, 'Cluster Labels', kmeans.labels_)\n", + "nearby_venues" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create map with clusters\n", + "map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)\n", + "colors = ['#0F9D58', '#DB4437', '#4285F4', '#800080', '#ce12c0', '#171717']\n", + "\n", + "# add markers to the map\n", + "for lat, lng, name, categories, cluster in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'], nearby_venues['categories'], nearby_venues['Cluster Labels']):\n", + " label = '[{}] {} ({})'.format(cluster, name, categories)\n", + " label = folium.Popup(label, parse_html=True)\n", + " folium.CircleMarker(\n", + " [lat, lng],\n", + " radius=5,\n", + " popup=label,\n", + " color=colors[int(cluster)],\n", + " fill=True,\n", + " fill_color=colors[int(cluster)],\n", + " fill_opacity=0.7).add_to(map_clusters)\n", + "\n", + "map_clusters" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Color of Cluster 0 : Dark Green\n", + "Venues found in Cluster 0 : 42\n", + "---\n", + "Color of Cluster 1 : Red\n", + "Venues found in Cluster 1 : 42\n", + "---\n", + "Color of Cluster 2 : Blue\n", + "Venues found in Cluster 2 : 67\n", + "---\n", + "Color of Cluster 3 : Purple\n", + "Venues found in Cluster 3 : 48\n", + "---\n", + "Color of Cluster 4 : Pink\n", + "Venues found in Cluster 4 : 21\n", + "---\n", + "Color of Cluster 5 : Black\n", + "Venues found in Cluster 5 : 12\n", + "---\n" + ] + } + ], + "source": [ + "# Show how many venues are in each cluster\n", + "color_names = ['Dark Green', 'Red', 'Blue', 'Purple', 'Pink', 'Black']\n", + "for x in range(0,6):\n", + " print(\"Color of Cluster\", x, \":\", color_names[x])\n", + " print(\"Venues found in Cluster\", x, \":\", nearby_venues.loc[nearby_venues['Cluster Labels'] == x, nearby_venues.columns[:]].shape[0])\n", + " print(\"---\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Cluster Labelsnamecategorieslatlng
02Sunken GardensGarden40.802319-96.683149
12Lincoln Children's ZooZoo40.800463-96.680036
22Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
32Yia Yia'sPizza Place40.813599-96.700540
42Antelope ParkPark40.796821-96.674826
..................
2271PepperJax GrillAmerican Restaurant40.738810-96.680150
2281ALDIGrocery Store40.723974-96.681705
2295Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
2304Hampton Inn & SuitesHotel40.887348-96.678617
2314Super SaverSupermarket40.878144-96.735121
\n", + "

232 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Cluster Labels name categories \\\n", + "0 2 Sunken Gardens Garden \n", + "1 2 Lincoln Children's Zoo Zoo \n", + "2 2 Honest Abe's Burgers & Freedom Burger Joint \n", + "3 2 Yia Yia's Pizza Place \n", + "4 2 Antelope Park Park \n", + ".. ... ... ... \n", + "227 1 PepperJax Grill American Restaurant \n", + "228 1 ALDI Grocery Store \n", + "229 5 Crete Carrier/Shaffer Trucking Building \n", + "230 4 Hampton Inn & Suites Hotel \n", + "231 4 Super Saver Supermarket \n", + "\n", + " lat lng \n", + "0 40.802319 -96.683149 \n", + "1 40.800463 -96.680036 \n", + "2 40.814234 -96.701008 \n", + "3 40.813599 -96.700540 \n", + "4 40.796821 -96.674826 \n", + ".. ... ... \n", + "227 40.738810 -96.680150 \n", + "228 40.723974 -96.681705 \n", + "229 40.816108 -96.795131 \n", + "230 40.887348 -96.678617 \n", + "231 40.878144 -96.735121 \n", + "\n", + "[232 rows x 5 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reminder of what the dataframe looks like\n", + "nearby_venues" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " Cluster 0: \n", + " Cluster Labels\n", + "categories \n", + "Mexican Restaurant 6\n", + "Chinese Restaurant 4\n", + "Grocery Store 4\n", + "American Restaurant 2\n", + "Bakery 2\n", + "Taco Place 2\n", + "\n", + "\n", + " Cluster 1: \n", + " Cluster Labels\n", + "categories \n", + "Grocery Store 3\n", + "Sandwich Place 3\n", + "Burger Joint 2\n", + "Pharmacy 2\n", + "Mexican Restaurant 2\n", + "Fast Food Restaurant 2\n", + "Coffee Shop 2\n", + "Chinese Restaurant 2\n", + "American Restaurant 2\n", + "\n", + "\n", + " Cluster 2: \n", + " Cluster Labels\n", + "categories \n", + "Coffee Shop 5\n", + "Brewery 4\n", + "Bar 3\n", + "Park 3\n", + "Italian Restaurant 2\n", + "Café 2\n", + "Pizza Place 2\n", + "Sushi Restaurant 2\n", + "Hotel 2\n", + "Mexican Restaurant 2\n", + "Beer Garden 2\n", + "\n", + "\n", + " Cluster 3: \n", + " Cluster Labels\n", + "categories \n", + "Pizza Place 4\n", + "American Restaurant 3\n", + "Donut Shop 3\n", + "Coffee Shop 3\n", + "Grocery Store 2\n", + "Fried Chicken Joint 2\n", + "New American Restaurant 2\n", + "Burger Joint 2\n", + "Sandwich Place 2\n", + "\n", + "\n", + " Cluster 4: \n", + " Cluster Labels\n", + "categories \n", + "Hotel 2\n", + "Gym / Fitness Center 2\n", + "\n", + "\n", + " Cluster 5: \n", + " Cluster Labels\n", + "categories \n", + "Brewery 2\n" + ] + } + ], + "source": [ + "# Calculate how many venues there are in each category\n", + "# Sort from largest to smallest\n", + "temp_df = nearby_venues.drop(columns=['name', 'lat', 'lng'])\n", + "\n", + "cluster0_grouped = temp_df.loc[temp_df['Cluster Labels'] == 0].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "cluster1_grouped = temp_df.loc[temp_df['Cluster Labels'] == 1].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "cluster2_grouped = temp_df.loc[temp_df['Cluster Labels'] == 2].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "cluster3_grouped = temp_df.loc[temp_df['Cluster Labels'] == 3].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "cluster4_grouped = temp_df.loc[temp_df['Cluster Labels'] == 4].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "cluster5_grouped = temp_df.loc[temp_df['Cluster Labels'] == 5].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", + "\n", + "# show how many venues there are in each cluster (> 1)\n", + "with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n", + " print(\"\\n\\n\", \"Cluster 0:\", \"\\n\", cluster0_grouped.loc[cluster0_grouped['Cluster Labels'] > 1])\n", + " print(\"\\n\\n\", \"Cluster 1:\", \"\\n\", cluster1_grouped.loc[cluster1_grouped['Cluster Labels'] > 1])\n", + " print(\"\\n\\n\", \"Cluster 2:\", \"\\n\", cluster2_grouped.loc[cluster2_grouped['Cluster Labels'] > 1])\n", + " print(\"\\n\\n\", \"Cluster 3:\", \"\\n\", cluster3_grouped.loc[cluster3_grouped['Cluster Labels'] > 1])\n", + " print(\"\\n\\n\", \"Cluster 4:\", \"\\n\", cluster4_grouped.loc[cluster4_grouped['Cluster Labels'] > 1])\n", + " print(\"\\n\\n\", \"Cluster 5:\", \"\\n\", cluster5_grouped.loc[cluster5_grouped['Cluster Labels'] > 1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- cgit v1.2.3-70-g09d2