{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting package metadata (current_repodata.json): ...working... done\n", "Solving environment: ...working... done\n", "\n", "# All requested packages already installed.\n", "\n", "Requirement already satisfied: geopy in c:\\users\\my_user\\anaconda3\\lib\\site-packages (2.0.0)\n", "Requirement already satisfied: geographiclib<2,>=1.49 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geopy) (1.50)\n", "Requirement already satisfied: folium in c:\\users\\my_user\\anaconda3\\lib\\site-packages (0.11.0)\n", "Requirement already satisfied: numpy in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (1.18.1)\n", "Requirement already satisfied: jinja2>=2.9 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (2.11.1)\n", "Requirement already satisfied: branca>=0.3.0 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (0.4.1)\n", "Requirement already satisfied: requests in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from folium) (2.22.0)\n", "Requirement already satisfied: MarkupSafe>=0.23 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from jinja2>=2.9->folium) (1.1.1)\n", "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (2.8)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (1.25.8)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->folium) (2019.11.28)\n" ] } ], "source": [ "# Optionally, install needed libraries\n", "!conda install -c conda-forge geocoder --yes\n", "!pip install geopy\n", "!pip install folium" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Import the necessary libraries\n", "import pandas as pd\n", "import requests\n", "import folium\n", "import math\n", "import json\n", "from pandas.io.json import json_normalize\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Define the latitude and longitude of Lincoln, then map the results\n", "latitude = 40.806862\n", "longitude = -96.681679\n", "map_LNK = folium.Map(location=[latitude, longitude], zoom_start=12)\n", " \n", "map_LNK" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Important: The Foursquare API only returns 100 venues at a time using this endpoint.\n", "Pull data back in 3 groups to get all 232 results" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Foursquare API credentials\n", "CLIENT_ID = 'MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN'\n", "CLIENT_SECRET = 'YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ'\n", "VERSION = '20180604'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set up the URL to fetch the first 100 results\n", "LIMIT = 100\n", "radius = 10000\n", "url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n", " CLIENT_ID, \n", " CLIENT_SECRET, \n", " VERSION, \n", " latitude, \n", " longitude, \n", " radius, \n", " LIMIT)\n", "url" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Fetch the first 100 results\n", "results = requests.get(url).json()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "232" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Determine the total number of results needed to fetch\n", "totalResults = results['response']['totalResults']\n", "totalResults" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100&offset=100'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set up the URL to fetch the second 100 results (101-200)\n", "LIMIT = 100\n", "offset = 100\n", "radius = 10000\n", "url2 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&offset={}'.format(\n", " CLIENT_ID, \n", " CLIENT_SECRET, \n", " VERSION, \n", " latitude, \n", " longitude, \n", " radius, \n", " LIMIT,\n", " offset)\n", "url2" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Fetch the second 100 results (101-200)\n", "results2 = requests.get(url2).json()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=40.806862,-96.681679&radius=10000&limit=100&offset=200'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set up the URL to fetch the final results (201 - 232)\n", "LIMIT = 100\n", "offset = 200\n", "radius = 10000\n", "url3 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&offset={}'.format(\n", " CLIENT_ID, \n", " CLIENT_SECRET, \n", " VERSION, \n", " latitude, \n", " longitude, \n", " radius, \n", " LIMIT,\n", " offset)\n", "url3" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Fetch the final results (201 - 232)\n", "results3 = requests.get(url3).json()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# This function will extract the category of the venue from the API dictionary\n", "def get_category_type(row):\n", " try:\n", " categories_list = row['categories']\n", " except:\n", " categories_list = row['venue.categories']\n", " \n", " if len(categories_list) == 0:\n", " return None\n", " else:\n", " return categories_list[0]['name']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
95Jack's Bar & GrillBar40.813895-96.709815
96Trade A Tape Comic Book CenterComic Shop40.812996-96.708490
97MazatlanMexican Restaurant40.815196-96.625641
98Rosie'sBar40.765226-96.700645
99Buzzard Billy'sCajun / Creole Restaurant40.815546-96.710123
\n", "

100 rows × 4 columns

\n", "
" ], "text/plain": [ " name categories lat \\\n", "0 Sunken Gardens Garden 40.802319 \n", "1 Lincoln Children's Zoo Zoo 40.800463 \n", "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 \n", "3 Yia Yia's Pizza Place 40.813599 \n", "4 Antelope Park Park 40.796821 \n", ".. ... ... ... \n", "95 Jack's Bar & Grill Bar 40.813895 \n", "96 Trade A Tape Comic Book Center Comic Shop 40.812996 \n", "97 Mazatlan Mexican Restaurant 40.815196 \n", "98 Rosie's Bar 40.765226 \n", "99 Buzzard Billy's Cajun / Creole Restaurant 40.815546 \n", "\n", " lng \n", "0 -96.683149 \n", "1 -96.680036 \n", "2 -96.701008 \n", "3 -96.700540 \n", "4 -96.674826 \n", ".. ... \n", "95 -96.709815 \n", "96 -96.708490 \n", "97 -96.625641 \n", "98 -96.700645 \n", "99 -96.710123 \n", "\n", "[100 rows x 4 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get the first 100 venues\n", "venues = results['response']['groups'][0]['items']\n", "nearby_venues = json_normalize(venues)\n", "\n", "# filter columns\n", "filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", "nearby_venues = nearby_venues.loc[:, filtered_columns]\n", "\n", "# filter the category for each row\n", "nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)\n", "\n", "# clean columns\n", "nearby_venues.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n", "nearby_venues" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
95Engine House CafeAmerican Restaurant40.857195-96.637721
96Marcus Edgewood CinemaMovie Theater40.760190-96.642499
97Victoria's SecretLingerie Store40.742087-96.679395
98Pancho Villa Mexican GrillMexican Restaurant40.860718-96.640711
99Popeyes Louisiana KitchenFried Chicken Joint40.768581-96.624462
\n", "

200 rows × 4 columns

\n", "
" ], "text/plain": [ " name categories lat lng\n", "0 Sunken Gardens Garden 40.802319 -96.683149\n", "1 Lincoln Children's Zoo Zoo 40.800463 -96.680036\n", "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 -96.701008\n", "3 Yia Yia's Pizza Place 40.813599 -96.700540\n", "4 Antelope Park Park 40.796821 -96.674826\n", ".. ... ... ... ...\n", "95 Engine House Cafe American Restaurant 40.857195 -96.637721\n", "96 Marcus Edgewood Cinema Movie Theater 40.760190 -96.642499\n", "97 Victoria's Secret Lingerie Store 40.742087 -96.679395\n", "98 Pancho Villa Mexican Grill Mexican Restaurant 40.860718 -96.640711\n", "99 Popeyes Louisiana Kitchen Fried Chicken Joint 40.768581 -96.624462\n", "\n", "[200 rows x 4 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get the second 100 venues\n", "venues2 = results2['response']['groups'][0]['items']\n", "nearby_venues2 = json_normalize(venues2) # flatten JSON\n", "\n", "# filter columns\n", "filtered_columns2 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", "nearby_venues2 = nearby_venues2.loc[:, filtered_columns]\n", "\n", "# filter the category for each row\n", "nearby_venues2['venue.categories'] = nearby_venues2.apply(get_category_type, axis=1)\n", "\n", "# clean columns\n", "nearby_venues2.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n", "\n", "nearby_venues = nearby_venues.append(nearby_venues2)\n", "nearby_venues" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecategorieslatlng
0Sunken GardensGarden40.802319-96.683149
1Lincoln Children's ZooZoo40.800463-96.680036
2Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
3Yia Yia'sPizza Place40.813599-96.700540
4Antelope ParkPark40.796821-96.674826
...............
227PepperJax GrillAmerican Restaurant40.738810-96.680150
228ALDIGrocery Store40.723974-96.681705
229Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
230Hampton Inn & SuitesHotel40.887348-96.678617
231Super SaverSupermarket40.878144-96.735121
\n", "

232 rows × 4 columns

\n", "
" ], "text/plain": [ " name categories lat lng\n", "0 Sunken Gardens Garden 40.802319 -96.683149\n", "1 Lincoln Children's Zoo Zoo 40.800463 -96.680036\n", "2 Honest Abe's Burgers & Freedom Burger Joint 40.814234 -96.701008\n", "3 Yia Yia's Pizza Place 40.813599 -96.700540\n", "4 Antelope Park Park 40.796821 -96.674826\n", ".. ... ... ... ...\n", "227 PepperJax Grill American Restaurant 40.738810 -96.680150\n", "228 ALDI Grocery Store 40.723974 -96.681705\n", "229 Crete Carrier/Shaffer Trucking Building 40.816108 -96.795131\n", "230 Hampton Inn & Suites Hotel 40.887348 -96.678617\n", "231 Super Saver Supermarket 40.878144 -96.735121\n", "\n", "[232 rows x 4 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get the rest of the venues\n", "venues3 = results3['response']['groups'][0]['items']\n", "nearby_venues3 = json_normalize(venues3) # flatten JSON\n", "\n", "# filter columns\n", "filtered_columns3 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", "nearby_venues3 = nearby_venues3.loc[:, filtered_columns]\n", "\n", "# filter the category for each row\n", "nearby_venues3['venue.categories'] = nearby_venues3.apply(get_category_type, axis=1)\n", "\n", "# clean columns\n", "nearby_venues3.columns = [col.split(\".\")[-1] for col in nearby_venues3.columns]\n", "\n", "nearby_venues = nearby_venues.append(nearby_venues3)\n", "nearby_venues = nearby_venues.reset_index(drop=True)\n", "nearby_venues" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# add markers to map\n", "for lat, lng, name, categories in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'], nearby_venues['categories']):\n", " label = '{} ({})'.format(name, categories)\n", " label = folium.Popup(label, parse_html=True)\n", " folium.CircleMarker(\n", " [lat, lng],\n", " radius=5,\n", " popup=label,\n", " color='blue',\n", " fill=True,\n", " fill_color='#3186cc',\n", " fill_opacity=0.7,\n", " ).add_to(map_LNK)\n", "\n", "map_LNK" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# This function will return the sum of squares found in the data\n", "def calculate_wcss(data):\n", " wcss = []\n", " for n in range(2, 21):\n", " kmeans = KMeans(n_clusters=n)\n", " kmeans.fit(X=data)\n", " wcss.append(kmeans.inertia_)\n", "\n", " return wcss" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latlng
040.802319-96.683149
140.800463-96.680036
240.814234-96.701008
340.813599-96.700540
440.796821-96.674826
.........
22740.738810-96.680150
22840.723974-96.681705
22940.816108-96.795131
23040.887348-96.678617
23140.878144-96.735121
\n", "

232 rows × 2 columns

\n", "
" ], "text/plain": [ " lat lng\n", "0 40.802319 -96.683149\n", "1 40.800463 -96.680036\n", "2 40.814234 -96.701008\n", "3 40.813599 -96.700540\n", "4 40.796821 -96.674826\n", ".. ... ...\n", "227 40.738810 -96.680150\n", "228 40.723974 -96.681705\n", "229 40.816108 -96.795131\n", "230 40.887348 -96.678617\n", "231 40.878144 -96.735121\n", "\n", "[232 rows x 2 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Drop 'str' cols so we can use k-means clustering\n", "cluster_df = nearby_venues.drop(columns=['name', 'categories'])\n", "cluster_df" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.34804817245236686,\n", " 0.2123621655263781,\n", " 0.15941308819059363,\n", " 0.1124422749740121,\n", " 0.08793974648902303,\n", " 0.07787546934076488,\n", " 0.06760201626926661,\n", " 0.058603531149823695,\n", " 0.05307694673631565,\n", " 0.046405418408475035,\n", " 0.04248508032750019,\n", " 0.03737141937875458,\n", " 0.03418960068046657,\n", " 0.030142252086911396,\n", " 0.027788439492930205,\n", " 0.025640780223738995,\n", " 0.023750332291644125,\n", " 0.022590054827965667,\n", " 0.019589286338306075]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# calculating the within clusters sum-of-squares for 19 cluster amounts\n", "sum_of_squares = calculate_wcss(cluster_df)\n", "sum_of_squares" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# This function will return the optimal number of clusters\n", "def optimal_number_of_clusters(wcss):\n", " x1, y1 = 2, wcss[0]\n", " x2, y2 = 20, wcss[len(wcss)-1]\n", "\n", " distances = []\n", " for i in range(len(wcss)):\n", " x0 = i+2\n", " y0 = wcss[i]\n", " numerator = abs((y2-y1)*x0 - (x2-x1)*y0 + x2*y1 - y2*x1)\n", " denominator = math.sqrt((y2 - y1)**2 + (x2 - x1)**2)\n", " distances.append(numerator/denominator)\n", " \n", " return distances.index(max(distances)) + 2" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# calculating the optimal number of clusters\n", "n = optimal_number_of_clusters(sum_of_squares)\n", "n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Labels: \n", " [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 2 2 0 2 2 2 2 2 2\n", " 0 2 2 2 2 2 2 2 0 0 2 3 0 0 2 0 2 2 2 3 2 2 2 0 2 2 2 4 0 0 4 2 2 0 3 0 0\n", " 2 3 5 2 3 3 0 3 5 0 2 0 2 0 2 0 2 0 0 2 0 2 2 0 1 2 2 5 2 1 0 0 3 3 5 3 3\n", " 0 0 0 5 4 1 0 0 3 3 3 3 1 1 0 4 5 1 3 0 0 3 3 4 4 3 1 3 3 5 3 4 4 0 3 3 3\n", " 1 0 2 1 1 3 1 1 3 5 1 1 3 4 4 0 3 0 0 1 5 3 3 3 3 0 1 3 3 1 1 3 4 3 3 1 1\n", " 4 1 5 3 1 3 1 4 3 1 0 3 1 0 3 1 3 1 1 3 4 1 3 1 1 1 4 1 1 1 1 1 1 1 3 1 4\n", " 4 4 3 4 5 1 1 5 4 4] \n", "\n", "Cluster centers: \n", " [[ 40.820588 -96.63913461]\n", " [ 40.74178794 -96.68440311]\n", " [ 40.81084834 -96.70181345]\n", " [ 40.76406174 -96.64015094]\n", " [ 40.86489106 -96.69252935]\n", " [ 40.80880071 -96.75189142]]\n" ] } ], "source": [ "# set number of clusters equal to the optimal number\n", "kclusters = n\n", "\n", "# run k-means clustering\n", "kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cluster_df)\n", "\n", "# check cluster labels generated for each row in the dataframe\n", "print(\"Labels:\", \"\\n\", kmeans.labels_, \"\\n\")\n", "print(\"Cluster centers:\", \"\\n\", kmeans.cluster_centers_)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Cluster Labelsnamecategorieslatlng
02Sunken GardensGarden40.802319-96.683149
12Lincoln Children's ZooZoo40.800463-96.680036
22Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
32Yia Yia'sPizza Place40.813599-96.700540
42Antelope ParkPark40.796821-96.674826
..................
2271PepperJax GrillAmerican Restaurant40.738810-96.680150
2281ALDIGrocery Store40.723974-96.681705
2295Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
2304Hampton Inn & SuitesHotel40.887348-96.678617
2314Super SaverSupermarket40.878144-96.735121
\n", "

232 rows × 5 columns

\n", "
" ], "text/plain": [ " Cluster Labels name categories \\\n", "0 2 Sunken Gardens Garden \n", "1 2 Lincoln Children's Zoo Zoo \n", "2 2 Honest Abe's Burgers & Freedom Burger Joint \n", "3 2 Yia Yia's Pizza Place \n", "4 2 Antelope Park Park \n", ".. ... ... ... \n", "227 1 PepperJax Grill American Restaurant \n", "228 1 ALDI Grocery Store \n", "229 5 Crete Carrier/Shaffer Trucking Building \n", "230 4 Hampton Inn & Suites Hotel \n", "231 4 Super Saver Supermarket \n", "\n", " lat lng \n", "0 40.802319 -96.683149 \n", "1 40.800463 -96.680036 \n", "2 40.814234 -96.701008 \n", "3 40.813599 -96.700540 \n", "4 40.796821 -96.674826 \n", ".. ... ... \n", "227 40.738810 -96.680150 \n", "228 40.723974 -96.681705 \n", "229 40.816108 -96.795131 \n", "230 40.887348 -96.678617 \n", "231 40.878144 -96.735121 \n", "\n", "[232 rows x 5 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# add clustering labels to dataframe\n", "nearby_venues.insert(0, 'Cluster Labels', kmeans.labels_)\n", "nearby_venues" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create map with clusters\n", "map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)\n", "colors = ['#0F9D58', '#DB4437', '#4285F4', '#800080', '#ce12c0', '#171717']\n", "\n", "# add markers to the map\n", "for lat, lng, name, categories, cluster in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'], nearby_venues['categories'], nearby_venues['Cluster Labels']):\n", " label = '[{}] {} ({})'.format(cluster, name, categories)\n", " label = folium.Popup(label, parse_html=True)\n", " folium.CircleMarker(\n", " [lat, lng],\n", " radius=5,\n", " popup=label,\n", " color=colors[int(cluster)],\n", " fill=True,\n", " fill_color=colors[int(cluster)],\n", " fill_opacity=0.7).add_to(map_clusters)\n", "\n", "map_clusters" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Color of Cluster 0 : Dark Green\n", "Venues found in Cluster 0 : 42\n", "---\n", "Color of Cluster 1 : Red\n", "Venues found in Cluster 1 : 42\n", "---\n", "Color of Cluster 2 : Blue\n", "Venues found in Cluster 2 : 67\n", "---\n", "Color of Cluster 3 : Purple\n", "Venues found in Cluster 3 : 48\n", "---\n", "Color of Cluster 4 : Pink\n", "Venues found in Cluster 4 : 21\n", "---\n", "Color of Cluster 5 : Black\n", "Venues found in Cluster 5 : 12\n", "---\n" ] } ], "source": [ "# Show how many venues are in each cluster\n", "color_names = ['Dark Green', 'Red', 'Blue', 'Purple', 'Pink', 'Black']\n", "for x in range(0,6):\n", " print(\"Color of Cluster\", x, \":\", color_names[x])\n", " print(\"Venues found in Cluster\", x, \":\", nearby_venues.loc[nearby_venues['Cluster Labels'] == x, nearby_venues.columns[:]].shape[0])\n", " print(\"---\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Cluster Labelsnamecategorieslatlng
02Sunken GardensGarden40.802319-96.683149
12Lincoln Children's ZooZoo40.800463-96.680036
22Honest Abe's Burgers & FreedomBurger Joint40.814234-96.701008
32Yia Yia'sPizza Place40.813599-96.700540
42Antelope ParkPark40.796821-96.674826
..................
2271PepperJax GrillAmerican Restaurant40.738810-96.680150
2281ALDIGrocery Store40.723974-96.681705
2295Crete Carrier/Shaffer TruckingBuilding40.816108-96.795131
2304Hampton Inn & SuitesHotel40.887348-96.678617
2314Super SaverSupermarket40.878144-96.735121
\n", "

232 rows × 5 columns

\n", "
" ], "text/plain": [ " Cluster Labels name categories \\\n", "0 2 Sunken Gardens Garden \n", "1 2 Lincoln Children's Zoo Zoo \n", "2 2 Honest Abe's Burgers & Freedom Burger Joint \n", "3 2 Yia Yia's Pizza Place \n", "4 2 Antelope Park Park \n", ".. ... ... ... \n", "227 1 PepperJax Grill American Restaurant \n", "228 1 ALDI Grocery Store \n", "229 5 Crete Carrier/Shaffer Trucking Building \n", "230 4 Hampton Inn & Suites Hotel \n", "231 4 Super Saver Supermarket \n", "\n", " lat lng \n", "0 40.802319 -96.683149 \n", "1 40.800463 -96.680036 \n", "2 40.814234 -96.701008 \n", "3 40.813599 -96.700540 \n", "4 40.796821 -96.674826 \n", ".. ... ... \n", "227 40.738810 -96.680150 \n", "228 40.723974 -96.681705 \n", "229 40.816108 -96.795131 \n", "230 40.887348 -96.678617 \n", "231 40.878144 -96.735121 \n", "\n", "[232 rows x 5 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Reminder of what the dataframe looks like\n", "nearby_venues" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " Cluster 0: \n", " Cluster Labels\n", "categories \n", "Mexican Restaurant 6\n", "Chinese Restaurant 4\n", "Grocery Store 4\n", "American Restaurant 2\n", "Bakery 2\n", "Taco Place 2\n", "\n", "\n", " Cluster 1: \n", " Cluster Labels\n", "categories \n", "Grocery Store 3\n", "Sandwich Place 3\n", "Burger Joint 2\n", "Pharmacy 2\n", "Mexican Restaurant 2\n", "Fast Food Restaurant 2\n", "Coffee Shop 2\n", "Chinese Restaurant 2\n", "American Restaurant 2\n", "\n", "\n", " Cluster 2: \n", " Cluster Labels\n", "categories \n", "Coffee Shop 5\n", "Brewery 4\n", "Bar 3\n", "Park 3\n", "Italian Restaurant 2\n", "Café 2\n", "Pizza Place 2\n", "Sushi Restaurant 2\n", "Hotel 2\n", "Mexican Restaurant 2\n", "Beer Garden 2\n", "\n", "\n", " Cluster 3: \n", " Cluster Labels\n", "categories \n", "Pizza Place 4\n", "American Restaurant 3\n", "Donut Shop 3\n", "Coffee Shop 3\n", "Grocery Store 2\n", "Fried Chicken Joint 2\n", "New American Restaurant 2\n", "Burger Joint 2\n", "Sandwich Place 2\n", "\n", "\n", " Cluster 4: \n", " Cluster Labels\n", "categories \n", "Hotel 2\n", "Gym / Fitness Center 2\n", "\n", "\n", " Cluster 5: \n", " Cluster Labels\n", "categories \n", "Brewery 2\n" ] } ], "source": [ "# Calculate how many venues there are in each category\n", "# Sort from largest to smallest\n", "temp_df = nearby_venues.drop(columns=['name', 'lat', 'lng'])\n", "\n", "cluster0_grouped = temp_df.loc[temp_df['Cluster Labels'] == 0].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "cluster1_grouped = temp_df.loc[temp_df['Cluster Labels'] == 1].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "cluster2_grouped = temp_df.loc[temp_df['Cluster Labels'] == 2].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "cluster3_grouped = temp_df.loc[temp_df['Cluster Labels'] == 3].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "cluster4_grouped = temp_df.loc[temp_df['Cluster Labels'] == 4].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "cluster5_grouped = temp_df.loc[temp_df['Cluster Labels'] == 5].groupby(['categories']).count().sort_values(by='Cluster Labels', ascending=False)\n", "\n", "# show how many venues there are in each cluster (> 1)\n", "with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n", " print(\"\\n\\n\", \"Cluster 0:\", \"\\n\", cluster0_grouped.loc[cluster0_grouped['Cluster Labels'] > 1])\n", " print(\"\\n\\n\", \"Cluster 1:\", \"\\n\", cluster1_grouped.loc[cluster1_grouped['Cluster Labels'] > 1])\n", " print(\"\\n\\n\", \"Cluster 2:\", \"\\n\", cluster2_grouped.loc[cluster2_grouped['Cluster Labels'] > 1])\n", " print(\"\\n\\n\", \"Cluster 3:\", \"\\n\", cluster3_grouped.loc[cluster3_grouped['Cluster Labels'] > 1])\n", " print(\"\\n\\n\", \"Cluster 4:\", \"\\n\", cluster4_grouped.loc[cluster4_grouped['Cluster Labels'] > 1])\n", " print(\"\\n\\n\", \"Cluster 5:\", \"\\n\", cluster5_grouped.loc[cluster5_grouped['Cluster Labels'] > 1])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }