From 836841dba83ae4b79009799642a7acdef13ffd48 Mon Sep 17 00:00:00 2001 From: Christian Cleberg Date: Mon, 22 May 2023 15:39:44 -0500 Subject: initial commit --- ..._and_Clustering_Neighbourhoods_in_Toronto.ipynb | 3650 ++++++++++++++++++++ 1 file changed, 3650 insertions(+) create mode 100644 notebooks/Segmenting_and_Clustering_Neighbourhoods_in_Toronto.ipynb (limited to 'notebooks/Segmenting_and_Clustering_Neighbourhoods_in_Toronto.ipynb') diff --git a/notebooks/Segmenting_and_Clustering_Neighbourhoods_in_Toronto.ipynb b/notebooks/Segmenting_and_Clustering_Neighbourhoods_in_Toronto.ipynb new file mode 100644 index 0000000..b5ec4cc --- /dev/null +++ b/notebooks/Segmenting_and_Clustering_Neighbourhoods_in_Toronto.ipynb @@ -0,0 +1,3650 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Segmenting and Clustering Neighbourhoods in Toronto\n", + "---\n", + "# Part 1\n", + "Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the HTML page of Wikipedia and use read_html we convert the html data into list of Data frame objects.\n", + "\n", + "Remove cells which have neighbourhood as \"Not assigned.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeBoroughNeighbourhood
0M3ANorth YorkParkwoods
1M4ANorth YorkVictoria Village
2M5ADowntown TorontoRegent Park, Harbourfront
3M6ANorth YorkLawrence Manor, Lawrence Heights
4M7ADowntown TorontoQueen's Park, Ontario Provincial Government
\n", + "
" + ], + "text/plain": [ + " Postal Code Borough Neighbourhood\n", + "0 M3A North York Parkwoods\n", + "1 M4A North York Victoria Village\n", + "2 M5A Downtown Toronto Regent Park, Harbourfront\n", + "3 M6A North York Lawrence Manor, Lawrence Heights\n", + "4 M7A Downtown Toronto Queen's Park, Ontario Provincial Government" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n", + "wiki_page = requests.get(wiki)\n", + "\n", + "wiki_raw = pd.read_html(wiki_page.content, header = 0)[0]\n", + "df = wiki_raw[wiki_raw.Neighbourhood != 'Not assigned']\n", + "df.reset_index(inplace=True, drop=True)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughNeighbourhood
Postal Code
M1BScarboroughMalvern, Rouge
M1CScarboroughRouge Hill, Port Union, Highland Creek
M1EScarboroughGuildwood, Morningside, West Hill
M1GScarboroughWoburn
M1HScarboroughCedarbrae
.........
M9NYorkWeston
M9PEtobicokeWestmount
M9REtobicokeKingsview Village, St. Phillips, Martin Grove ...
M9VEtobicokeSouth Steeles, Silverstone, Humbergate, Jamest...
M9WEtobicokeNorthwest, West Humber - Clairville
\n", + "

103 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Borough Neighbourhood\n", + "Postal Code \n", + "M1B Scarborough Malvern, Rouge\n", + "M1C Scarborough Rouge Hill, Port Union, Highland Creek\n", + "M1E Scarborough Guildwood, Morningside, West Hill\n", + "M1G Scarborough Woburn\n", + "M1H Scarborough Cedarbrae\n", + "... ... ...\n", + "M9N York Weston\n", + "M9P Etobicoke Westmount\n", + "M9R Etobicoke Kingsview Village, St. Phillips, Martin Grove ...\n", + "M9V Etobicoke South Steeles, Silverstone, Humbergate, Jamest...\n", + "M9W Etobicoke Northwest, West Humber - Clairville\n", + "\n", + "[103 rows x 2 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['Postal Code']).first()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "103" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df['Postal Code'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeBoroughNeighbourhood
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Postal Code, Borough, Neighbourhood]\n", + "Index: []" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['Borough'] == 'Not assigned']" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(103, 3)" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "# Part 2" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: geocoder in c:\\users\\my_user\\anaconda3\\lib\\site-packages (1.38.1)\n", + "Requirement already satisfied: future in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (0.18.2)\n", + "Requirement already satisfied: ratelim in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (0.1.6)\n", + "Requirement already satisfied: requests in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (2.22.0)\n", + "Requirement already satisfied: click in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (7.0)\n", + "Requirement already satisfied: six in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (1.14.0)\n", + "Requirement already satisfied: decorator in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from ratelim->geocoder) (4.4.1)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (1.25.8)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (2.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (2019.11.28)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (3.0.4)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install geocoder" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "import geocoder" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeLatitudeLongitude
0M1B43.806686-79.194353
1M1C43.784535-79.160497
2M1E43.763573-79.188711
3M1G43.770992-79.216917
4M1H43.773136-79.239476
\n", + "
" + ], + "text/plain": [ + " Postal Code Latitude Longitude\n", + "0 M1B 43.806686 -79.194353\n", + "1 M1C 43.784535 -79.160497\n", + "2 M1E 43.763573 -79.188711\n", + "3 M1G 43.770992 -79.216917\n", + "4 M1H 43.773136 -79.239476" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = 'http://cocl.us/Geospatial_data'\n", + "df_geo = pd.read_csv(url)\n", + "df_geo.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Postal Code object\n", + "Latitude float64\n", + "Longitude float64\n", + "dtype: object" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_geo.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Postal Code object\n", + "Borough object\n", + "Neighbourhood object\n", + "dtype: object" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(103, 3)" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(103, 3)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_geo.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeBoroughNeighbourhoodLatitudeLongitude
0M3ANorth YorkParkwoods43.753259-79.329656
1M4ANorth YorkVictoria Village43.725882-79.315572
2M5ADowntown TorontoRegent Park, Harbourfront43.654260-79.360636
3M6ANorth YorkLawrence Manor, Lawrence Heights43.718518-79.464763
4M7ADowntown TorontoQueen's Park, Ontario Provincial Government43.662301-79.389494
5M9AEtobicokeIslington Avenue, Humber Valley Village43.667856-79.532242
6M1BScarboroughMalvern, Rouge43.806686-79.194353
7M3BNorth YorkDon Mills43.745906-79.352188
8M4BEast YorkParkview Hill, Woodbine Gardens43.706397-79.309937
9M5BDowntown TorontoGarden District, Ryerson43.657162-79.378937
10M6BNorth YorkGlencairn43.709577-79.445073
11M9BEtobicokeWest Deane Park, Princess Gardens, Martin Grov...43.650943-79.554724
\n", + "
" + ], + "text/plain": [ + " Postal Code Borough \\\n", + "0 M3A North York \n", + "1 M4A North York \n", + "2 M5A Downtown Toronto \n", + "3 M6A North York \n", + "4 M7A Downtown Toronto \n", + "5 M9A Etobicoke \n", + "6 M1B Scarborough \n", + "7 M3B North York \n", + "8 M4B East York \n", + "9 M5B Downtown Toronto \n", + "10 M6B North York \n", + "11 M9B Etobicoke \n", + "\n", + " Neighbourhood Latitude Longitude \n", + "0 Parkwoods 43.753259 -79.329656 \n", + "1 Victoria Village 43.725882 -79.315572 \n", + "2 Regent Park, Harbourfront 43.654260 -79.360636 \n", + "3 Lawrence Manor, Lawrence Heights 43.718518 -79.464763 \n", + "4 Queen's Park, Ontario Provincial Government 43.662301 -79.389494 \n", + "5 Islington Avenue, Humber Valley Village 43.667856 -79.532242 \n", + "6 Malvern, Rouge 43.806686 -79.194353 \n", + "7 Don Mills 43.745906 -79.352188 \n", + "8 Parkview Hill, Woodbine Gardens 43.706397 -79.309937 \n", + "9 Garden District, Ryerson 43.657162 -79.378937 \n", + "10 Glencairn 43.709577 -79.445073 \n", + "11 West Deane Park, Princess Gardens, Martin Grov... 43.650943 -79.554724 " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.join(df_geo.set_index('Postal Code'), on='Postal Code')\n", + "df.head(12)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(103, 5)" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "# Part 3\n", + "Using the foursquere API to segment and cluster the neighborhoods of Toronto" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting package metadata (current_repodata.json): ...working... done\n", + "Solving environment: ...working... done\n", + "\n", + "# All requested packages already installed.\n", + "\n", + "Collecting geopy\n", + " Downloading geopy-2.0.0-py3-none-any.whl (111 kB)\n", + "Collecting geographiclib<2,>=1.49\n", + " Downloading geographiclib-1.50-py3-none-any.whl (38 kB)\n", + "Installing collected packages: geographiclib, geopy\n", + "Successfully installed geographiclib-1.50 geopy-2.0.0\n", + "The geograpical coordinate of Toronto are 43.6534817, -79.3839347.\n" + ] + } + ], + "source": [ + "!conda install -c conda-forge geocoder --yes\n", + "import geocoder\n", + "!pip install geopy\n", + "from geopy.geocoders import Nominatim \n", + "\n", + "address = 'Toronto, Ontario'\n", + "\n", + "geolocator = Nominatim(user_agent=\"toronto_explorer\")\n", + "location = geolocator.geocode(address)\n", + "latitude = location.latitude\n", + "longitude = location.longitude\n", + "print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Printing the map" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import folium\n", + "\n", + "# create map of Toronto using latitude and longitude values\n", + "map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)\n", + "\n", + "# add markers to map\n", + "for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):\n", + " label = '{}, {}'.format(neighborhood, borough)\n", + " label = folium.Popup(label, parse_html=True)\n", + " folium.CircleMarker(\n", + " [lat, lng],\n", + " radius=5,\n", + " popup=label,\n", + " color='blue',\n", + " fill=True,\n", + " fill_color='#3186cc',\n", + " fill_opacity=0.7,\n", + " ).add_to(map_Toronto) \n", + " \n", + "map_Toronto" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define Foursquare parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "CLIENT_ID = 'MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN'\n", + "CLIENT_SECRET = 'YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ'" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your credentails:\n", + "CLIENT_ID: MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN\n", + "CLIENT_SECRET:YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ\n" + ] + } + ], + "source": [ + "VERSION = '20180604' # Foursquare API version\n", + "\n", + "print('Your credentails:')\n", + "print('CLIENT_ID: ' + CLIENT_ID)\n", + "print('CLIENT_SECRET:' + CLIENT_SECRET)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Explore the data, and get the venues in 500 meters range from our first entry" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.\n" + ] + } + ], + "source": [ + "neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value\n", + "neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value\n", + "\n", + "neighborhood_name = df.loc[0, 'Neighbourhood'] # neighborhood name\n", + "\n", + "print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, \n", + " neighborhood_latitude, \n", + " neighborhood_longitude))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the GET request URL" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=43.7532586,-79.3296565&radius=500&limit=100'" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LIMIT = 100\n", + "radius = 500\n", + "url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n", + " CLIENT_ID, \n", + " CLIENT_SECRET, \n", + " VERSION, \n", + " neighborhood_latitude, \n", + " neighborhood_longitude, \n", + " radius, \n", + " LIMIT)\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'meta': {'code': 200, 'requestId': '5f1dab2fb0d9d01cac2a7fbc'},\n", + " 'response': {'warning': {'text': \"There aren't a lot of results near you. Try something more general, reset your filters, or expand the search area.\"},\n", + " 'headerLocation': 'Parkwoods - Donalda',\n", + " 'headerFullLocation': 'Parkwoods - Donalda, Toronto',\n", + " 'headerLocationGranularity': 'neighborhood',\n", + " 'totalResults': 2,\n", + " 'suggestedBounds': {'ne': {'lat': 43.757758604500005,\n", + " 'lng': -79.32343823984928},\n", + " 'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},\n", + " 'groups': [{'type': 'Recommended Places',\n", + " 'name': 'recommended',\n", + " 'items': [{'reasons': {'count': 0,\n", + " 'items': [{'summary': 'This spot is popular',\n", + " 'type': 'general',\n", + " 'reasonName': 'globalInteractionReason'}]},\n", + " 'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',\n", + " 'name': 'Brookbanks Park',\n", + " 'location': {'address': 'Toronto',\n", + " 'lat': 43.751976046055574,\n", + " 'lng': -79.33214044722958,\n", + " 'labeledLatLngs': [{'label': 'display',\n", + " 'lat': 43.751976046055574,\n", + " 'lng': -79.33214044722958}],\n", + " 'distance': 245,\n", + " 'cc': 'CA',\n", + " 'city': 'Toronto',\n", + " 'state': 'ON',\n", + " 'country': 'Canada',\n", + " 'formattedAddress': ['Toronto', 'Toronto ON', 'Canada']},\n", + " 'categories': [{'id': '4bf58dd8d48988d163941735',\n", + " 'name': 'Park',\n", + " 'pluralName': 'Parks',\n", + " 'shortName': 'Park',\n", + " 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',\n", + " 'suffix': '.png'},\n", + " 'primary': True}],\n", + " 'photos': {'count': 0, 'groups': []}},\n", + " 'referralId': 'e-0-4e8d9dcdd5fbbbb6b3003c7b-0'},\n", + " {'reasons': {'count': 0,\n", + " 'items': [{'summary': 'This spot is popular',\n", + " 'type': 'general',\n", + " 'reasonName': 'globalInteractionReason'}]},\n", + " 'venue': {'id': '4cb11e2075ebb60cd1c4caad',\n", + " 'name': 'Variety Store',\n", + " 'location': {'address': '29 Valley Woods Road',\n", + " 'lat': 43.75197441585782,\n", + " 'lng': -79.33311418516017,\n", + " 'labeledLatLngs': [{'label': 'display',\n", + " 'lat': 43.75197441585782,\n", + " 'lng': -79.33311418516017}],\n", + " 'distance': 312,\n", + " 'cc': 'CA',\n", + " 'city': 'Toronto',\n", + " 'state': 'ON',\n", + " 'country': 'Canada',\n", + " 'formattedAddress': ['29 Valley Woods Road', 'Toronto ON', 'Canada']},\n", + " 'categories': [{'id': '4bf58dd8d48988d1f9941735',\n", + " 'name': 'Food & Drink Shop',\n", + " 'pluralName': 'Food & Drink Shops',\n", + " 'shortName': 'Food & Drink',\n", + " 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/foodanddrink_',\n", + " 'suffix': '.png'},\n", + " 'primary': True}],\n", + " 'photos': {'count': 0, 'groups': []}},\n", + " 'referralId': 'e-0-4cb11e2075ebb60cd1c4caad-1'}]}]}}" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = requests.get(url).json()\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "# function that extracts the category of the venue\n", + "def get_category_type(row):\n", + " try:\n", + " categories_list = row['categories']\n", + " except:\n", + " categories_list = row['venue.categories']\n", + " \n", + " if len(categories_list) == 0:\n", + " return None\n", + " else:\n", + " return categories_list[0]['name']" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:6: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecategorieslatlng
0Brookbanks ParkPark43.751976-79.332140
1Variety StoreFood & Drink Shop43.751974-79.333114
\n", + "
" + ], + "text/plain": [ + " name categories lat lng\n", + "0 Brookbanks Park Park 43.751976 -79.332140\n", + "1 Variety Store Food & Drink Shop 43.751974 -79.333114" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "from pandas.io.json import json_normalize\n", + "\n", + "venues = results['response']['groups'][0]['items']\n", + " \n", + "nearby_venues = json_normalize(venues) # flatten JSON\n", + "\n", + "# filter columns\n", + "filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n", + "nearby_venues =nearby_venues.loc[:, filtered_columns]\n", + "\n", + "# filter the category for each row\n", + "nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)\n", + "\n", + "# clean columns\n", + "nearby_venues.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n", + "\n", + "nearby_venues.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generalize to obtain the venues from all neighbourhoods in Toronto" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "def getNearbyVenues(names, latitudes, longitudes, radius=500):\n", + " \n", + " venues_list=[]\n", + " for name, lat, lng in zip(names, latitudes, longitudes):\n", + " print(name)\n", + " \n", + " # create the API request URL\n", + " url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n", + " CLIENT_ID, \n", + " CLIENT_SECRET, \n", + " VERSION, \n", + " lat, \n", + " lng, \n", + " radius, \n", + " LIMIT)\n", + " \n", + " # make the GET request\n", + " results = requests.get(url).json()[\"response\"]['groups'][0]['items']\n", + " \n", + " # return only relevant information for each nearby venue\n", + " venues_list.append([(\n", + " name, \n", + " lat, \n", + " lng, \n", + " v['venue']['name'], \n", + " v['venue']['location']['lat'], \n", + " v['venue']['location']['lng'], \n", + " v['venue']['categories'][0]['name']) for v in results])\n", + "\n", + " nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])\n", + " nearby_venues.columns = ['Neighbourhood', \n", + " 'Neighborhood Latitude', \n", + " 'Neighborhood Longitude', \n", + " 'Venue', \n", + " 'Venue Latitude', \n", + " 'Venue Longitude', \n", + " 'Venue Category']\n", + " \n", + " return(nearby_venues)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parkwoods\n", + "Victoria Village\n", + "Regent Park, Harbourfront\n", + "Lawrence Manor, Lawrence Heights\n", + "Queen's Park, Ontario Provincial Government\n", + "Islington Avenue, Humber Valley Village\n", + "Malvern, Rouge\n", + "Don Mills\n", + "Parkview Hill, Woodbine Gardens\n", + "Garden District, Ryerson\n", + "Glencairn\n", + "West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale\n", + "Rouge Hill, Port Union, Highland Creek\n", + "Don Mills\n", + "Woodbine Heights\n", + "St. James Town\n", + "Humewood-Cedarvale\n", + "Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood\n", + "Guildwood, Morningside, West Hill\n", + "The Beaches\n", + "Berczy Park\n", + "Caledonia-Fairbanks\n", + "Woburn\n", + "Leaside\n", + "Central Bay Street\n", + "my_usertie\n", + "Cedarbrae\n", + "Hillcrest Village\n", + "Bathurst Manor, Wilson Heights, Downsview North\n", + "Thorncliffe Park\n", + "Richmond, Adelaide, King\n", + "Dufferin, Dovercourt Village\n", + "Scarborough Village\n", + "Fairview, Henry Farm, Oriole\n", + "Northwood Park, York University\n", + "East Toronto, Broadview North (Old East York)\n", + "Harbourfront East, Union Station, Toronto Islands\n", + "Little Portugal, Trinity\n", + "Kennedy Park, Ionview, East Birchmount Park\n", + "Bayview Village\n", + "Downsview\n", + "The Danforth West, Riverdale\n", + "Toronto Dominion Centre, Design Exchange\n", + "Brockton, Parkdale Village, Exhibition Place\n", + "Golden Mile, Clairlea, Oakridge\n", + "York Mills, Silver Hills\n", + "Downsview\n", + "India Bazaar, The Beaches West\n", + "Commerce Court, Victoria Hotel\n", + "North Park, Maple Leaf Park, Upwood Park\n", + "Humber Summit\n", + "Cliffside, Cliffcrest, Scarborough Village West\n", + "Willowdale, Newtonbrook\n", + "Downsview\n", + "Studio District\n", + "Bedford Park, Lawrence Manor East\n", + "Del Ray, Mount Dennis, Keelsdale and Silverthorn\n", + "Humberlea, Emery\n", + "Birch Cliff, Cliffside West\n", + "Willowdale, Willowdale East\n", + "Downsview\n", + "Lawrence Park\n", + "Roselawn\n", + "Runnymede, The Junction North\n", + "Weston\n", + "Dorset Park, Wexford Heights, Scarborough Town Centre\n", + "York Mills West\n", + "Davisville North\n", + "Forest Hill North & West, Forest Hill Road Park\n", + "High Park, The Junction South\n", + "Westmount\n", + "Wexford, Maryvale\n", + "Willowdale, Willowdale West\n", + "North Toronto West, Lawrence Park\n", + "The Annex, North Midtown, Yorkville\n", + "Parkdale, Roncesvalles\n", + "Canada Post Gateway Processing Centre\n", + "Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens\n", + "Agincourt\n", + "Davisville\n", + "University of Toronto, Harbord\n", + "Runnymede, Swansea\n", + "Clarks Corners, Tam O'Shanter, Sullivan\n", + "Moore Park, Summerhill East\n", + "Kensington Market, Chinatown, Grange Park\n", + "Milliken, Agincourt North, Steeles East, L'Amoreaux East\n", + "Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park\n", + "CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport\n", + "New Toronto, Mimico South, Humber Bay Shores\n", + "South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens\n", + "Steeles West, L'Amoreaux West\n", + "Rosedale\n", + "Stn A PO Boxes\n", + "Alderwood, Long Branch\n", + "Northwest, West Humber - Clairville\n", + "Upper Rouge\n", + "St. James Town, Cabbagetown\n", + "First Canadian Place, Underground city\n", + "The Kingsway, Montgomery Road, Old Mill North\n", + "Church and Wellesley\n", + "Business reply mail Processing Centre, South Central Letter Processing Plant Toronto\n", + "Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East\n", + "Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West\n" + ] + } + ], + "source": [ + "toronto_venues = getNearbyVenues(names=df['Neighbourhood'],\n", + " latitudes=df['Latitude'],\n", + " longitudes=df['Longitude']\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Checking the size of df" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2153, 7)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NeighbourhoodNeighborhood LatitudeNeighborhood LongitudeVenueVenue LatitudeVenue LongitudeVenue Category
0Parkwoods43.753259-79.329656Brookbanks Park43.751976-79.332140Park
1Parkwoods43.753259-79.329656Variety Store43.751974-79.333114Food & Drink Shop
2Victoria Village43.725882-79.315572Victoria Village Arena43.723481-79.315635Hockey Arena
3Victoria Village43.725882-79.315572Portugril43.725819-79.312785Portuguese Restaurant
4Victoria Village43.725882-79.315572Tim Hortons43.725517-79.313103Coffee Shop
\n", + "
" + ], + "text/plain": [ + " Neighbourhood Neighborhood Latitude Neighborhood Longitude \\\n", + "0 Parkwoods 43.753259 -79.329656 \n", + "1 Parkwoods 43.753259 -79.329656 \n", + "2 Victoria Village 43.725882 -79.315572 \n", + "3 Victoria Village 43.725882 -79.315572 \n", + "4 Victoria Village 43.725882 -79.315572 \n", + "\n", + " Venue Venue Latitude Venue Longitude \\\n", + "0 Brookbanks Park 43.751976 -79.332140 \n", + "1 Variety Store 43.751974 -79.333114 \n", + "2 Victoria Village Arena 43.723481 -79.315635 \n", + "3 Portugril 43.725819 -79.312785 \n", + "4 Tim Hortons 43.725517 -79.313103 \n", + "\n", + " Venue Category \n", + "0 Park \n", + "1 Food & Drink Shop \n", + "2 Hockey Arena \n", + "3 Portuguese Restaurant \n", + "4 Coffee Shop " + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(toronto_venues.shape)\n", + "toronto_venues.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Checking how many venues there are for each venue" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Neighborhood LatitudeNeighborhood LongitudeVenueVenue LatitudeVenue LongitudeVenue Category
Neighbourhood
Agincourt444444
Alderwood, Long Branch888888
Bathurst Manor, Wilson Heights, Downsview North212121212121
Bayview Village444444
Bedford Park, Lawrence Manor East252525252525
.....................
Willowdale, Willowdale East343434343434
Willowdale, Willowdale West666666
Woburn444444
Woodbine Heights999999
York Mills West333333
\n", + "

96 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Neighborhood Latitude \\\n", + "Neighbourhood \n", + "Agincourt 4 \n", + "Alderwood, Long Branch 8 \n", + "Bathurst Manor, Wilson Heights, Downsview North 21 \n", + "Bayview Village 4 \n", + "Bedford Park, Lawrence Manor East 25 \n", + "... ... \n", + "Willowdale, Willowdale East 34 \n", + "Willowdale, Willowdale West 6 \n", + "Woburn 4 \n", + "Woodbine Heights 9 \n", + "York Mills West 3 \n", + "\n", + " Neighborhood Longitude \\\n", + "Neighbourhood \n", + "Agincourt 4 \n", + "Alderwood, Long Branch 8 \n", + "Bathurst Manor, Wilson Heights, Downsview North 21 \n", + "Bayview Village 4 \n", + "Bedford Park, Lawrence Manor East 25 \n", + "... ... \n", + "Willowdale, Willowdale East 34 \n", + "Willowdale, Willowdale West 6 \n", + "Woburn 4 \n", + "Woodbine Heights 9 \n", + "York Mills West 3 \n", + "\n", + " Venue Venue Latitude \\\n", + "Neighbourhood \n", + "Agincourt 4 4 \n", + "Alderwood, Long Branch 8 8 \n", + "Bathurst Manor, Wilson Heights, Downsview North 21 21 \n", + "Bayview Village 4 4 \n", + "Bedford Park, Lawrence Manor East 25 25 \n", + "... ... ... \n", + "Willowdale, Willowdale East 34 34 \n", + "Willowdale, Willowdale West 6 6 \n", + "Woburn 4 4 \n", + "Woodbine Heights 9 9 \n", + "York Mills West 3 3 \n", + "\n", + " Venue Longitude \\\n", + "Neighbourhood \n", + "Agincourt 4 \n", + "Alderwood, Long Branch 8 \n", + "Bathurst Manor, Wilson Heights, Downsview North 21 \n", + "Bayview Village 4 \n", + "Bedford Park, Lawrence Manor East 25 \n", + "... ... \n", + "Willowdale, Willowdale East 34 \n", + "Willowdale, Willowdale West 6 \n", + "Woburn 4 \n", + "Woodbine Heights 9 \n", + "York Mills West 3 \n", + "\n", + " Venue Category \n", + "Neighbourhood \n", + "Agincourt 4 \n", + "Alderwood, Long Branch 8 \n", + "Bathurst Manor, Wilson Heights, Downsview North 21 \n", + "Bayview Village 4 \n", + "Bedford Park, Lawrence Manor East 25 \n", + "... ... \n", + "Willowdale, Willowdale East 34 \n", + "Willowdale, Willowdale West 6 \n", + "Woburn 4 \n", + "Woodbine Heights 9 \n", + "York Mills West 3 \n", + "\n", + "[96 rows x 6 columns]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_venues.groupby('Neighbourhood').count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many categorys can we find?" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NeighbourhoodAccessories StoreAfghan RestaurantAirportAirport Food CourtAirport LoungeAirport ServiceAirport TerminalAmerican RestaurantAntique Shop...Vegetarian / Vegan RestaurantVideo Game StoreVideo StoreVietnamese RestaurantWarehouse StoreWine BarWine ShopWings JointWomen's StoreYoga Studio
0Parkwoods000000000...0000000000
1Parkwoods000000000...0000000000
2Victoria Village000000000...0000000000
3Victoria Village000000000...0000000000
4Victoria Village000000000...0000000000
\n", + "

5 rows × 270 columns

\n", + "
" + ], + "text/plain": [ + " Neighbourhood Accessories Store Afghan Restaurant Airport \\\n", + "0 Parkwoods 0 0 0 \n", + "1 Parkwoods 0 0 0 \n", + "2 Victoria Village 0 0 0 \n", + "3 Victoria Village 0 0 0 \n", + "4 Victoria Village 0 0 0 \n", + "\n", + " Airport Food Court Airport Lounge Airport Service Airport Terminal \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + " American Restaurant Antique Shop ... Vegetarian / Vegan Restaurant \\\n", + "0 0 0 ... 0 \n", + "1 0 0 ... 0 \n", + "2 0 0 ... 0 \n", + "3 0 0 ... 0 \n", + "4 0 0 ... 0 \n", + "\n", + " Video Game Store Video Store Vietnamese Restaurant Warehouse Store \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + " Wine Bar Wine Shop Wings Joint Women's Store Yoga Studio \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "\n", + "[5 rows x 270 columns]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# one hot encoding\n", + "toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix=\"\", prefix_sep=\"\")\n", + "\n", + "# add neighborhood column back to dataframe\n", + "toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] \n", + "\n", + "# move neighborhood column to the first column\n", + "fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])\n", + "toronto_onehot = toronto_onehot[fixed_columns]\n", + "\n", + "toronto_onehot.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2153, 270)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_onehot.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NeighbourhoodAccessories StoreAfghan RestaurantAirportAirport Food CourtAirport LoungeAirport ServiceAirport TerminalAmerican RestaurantAntique Shop...Vegetarian / Vegan RestaurantVideo Game StoreVideo StoreVietnamese RestaurantWarehouse StoreWine BarWine ShopWings JointWomen's StoreYoga Studio
0Agincourt0.00.00.00.00.00.00.00.000.0...0.00.00.00.00.00.00.00.00.000.0
1Alderwood, Long Branch0.00.00.00.00.00.00.00.000.0...0.00.00.00.00.00.00.00.00.000.0
2Bathurst Manor, Wilson Heights, Downsview North0.00.00.00.00.00.00.00.000.0...0.00.00.00.00.00.00.00.00.000.0
3Bayview Village0.00.00.00.00.00.00.00.000.0...0.00.00.00.00.00.00.00.00.000.0
4Bedford Park, Lawrence Manor East0.00.00.00.00.00.00.00.040.0...0.00.00.00.00.00.00.00.00.040.0
\n", + "

5 rows × 270 columns

\n", + "
" + ], + "text/plain": [ + " Neighbourhood Accessories Store \\\n", + "0 Agincourt 0.0 \n", + "1 Alderwood, Long Branch 0.0 \n", + "2 Bathurst Manor, Wilson Heights, Downsview North 0.0 \n", + "3 Bayview Village 0.0 \n", + "4 Bedford Park, Lawrence Manor East 0.0 \n", + "\n", + " Afghan Restaurant Airport Airport Food Court Airport Lounge \\\n", + "0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 \n", + "\n", + " Airport Service Airport Terminal American Restaurant Antique Shop ... \\\n", + "0 0.0 0.0 0.00 0.0 ... \n", + "1 0.0 0.0 0.00 0.0 ... \n", + "2 0.0 0.0 0.00 0.0 ... \n", + "3 0.0 0.0 0.00 0.0 ... \n", + "4 0.0 0.0 0.04 0.0 ... \n", + "\n", + " Vegetarian / Vegan Restaurant Video Game Store Video Store \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "\n", + " Vietnamese Restaurant Warehouse Store Wine Bar Wine Shop Wings Joint \\\n", + "0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Women's Store Yoga Studio \n", + "0 0.00 0.0 \n", + "1 0.00 0.0 \n", + "2 0.00 0.0 \n", + "3 0.00 0.0 \n", + "4 0.04 0.0 \n", + "\n", + "[5 rows x 270 columns]" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()\n", + "toronto_grouped.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(96, 270)" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_grouped.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Getting the top 10 for each neighbourhood" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "def return_most_common_venues(row, num_top_venues):\n", + " row_categories = row.iloc[1:]\n", + " row_categories_sorted = row_categories.sort_values(ascending=False)\n", + " \n", + " return row_categories_sorted.index.values[0:num_top_venues]" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Neighbourhood1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
0AgincourtLoungeLatin American RestaurantSkating RinkBreakfast SpotDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner Restaurant
1Alderwood, Long BranchPizza PlacePharmacySandwich PlaceDance StudioCoffee ShopPubGymAirport TerminalFalafel RestaurantEvent Space
2Bathurst Manor, Wilson Heights, Downsview NorthCoffee ShopBankFrozen Yogurt ShopBridal ShopSandwich PlaceDinerRestaurantDeli / BodegaMiddle Eastern RestaurantSupermarket
3Bayview VillageCaféBankChinese RestaurantJapanese RestaurantYoga StudioDinerDiscount StoreDistribution CenterDog RunDoner Restaurant
4Bedford Park, Lawrence Manor EastSandwich PlaceRestaurantItalian RestaurantCoffee ShopLiquor StoreThai RestaurantCaféPubButcherSushi Restaurant
\n", + "
" + ], + "text/plain": [ + " Neighbourhood 1st Most Common Venue \\\n", + "0 Agincourt Lounge \n", + "1 Alderwood, Long Branch Pizza Place \n", + "2 Bathurst Manor, Wilson Heights, Downsview North Coffee Shop \n", + "3 Bayview Village Café \n", + "4 Bedford Park, Lawrence Manor East Sandwich Place \n", + "\n", + " 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n", + "0 Latin American Restaurant Skating Rink Breakfast Spot \n", + "1 Pharmacy Sandwich Place Dance Studio \n", + "2 Bank Frozen Yogurt Shop Bridal Shop \n", + "3 Bank Chinese Restaurant Japanese Restaurant \n", + "4 Restaurant Italian Restaurant Coffee Shop \n", + "\n", + " 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n", + "0 Donut Shop Diner Discount Store \n", + "1 Coffee Shop Pub Gym \n", + "2 Sandwich Place Diner Restaurant \n", + "3 Yoga Studio Diner Discount Store \n", + "4 Liquor Store Thai Restaurant Café \n", + "\n", + " 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n", + "0 Distribution Center Dog Run Doner Restaurant \n", + "1 Airport Terminal Falafel Restaurant Event Space \n", + "2 Deli / Bodega Middle Eastern Restaurant Supermarket \n", + "3 Distribution Center Dog Run Doner Restaurant \n", + "4 Pub Butcher Sushi Restaurant " + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "num_top_venues = 10\n", + "\n", + "indicators = ['st', 'nd', 'rd']\n", + "\n", + "# create columns according to number of top venues\n", + "columns = ['Neighbourhood']\n", + "for ind in np.arange(num_top_venues):\n", + " try:\n", + " columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))\n", + " except:\n", + " columns.append('{}th Most Common Venue'.format(ind+1))\n", + "\n", + "# create a new dataframe\n", + "neighborhoods_venues_sorted = pd.DataFrame(columns=columns)\n", + "neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']\n", + "\n", + "for ind in np.arange(toronto_grouped.shape[0]):\n", + " neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)\n", + "\n", + "neighborhoods_venues_sorted.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Clustering Neighborhoods" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# import k-means from clustering stage\n", + "from sklearn.cluster import KMeans\n", + "\n", + "# set number of clusters\n", + "kclusters = 5\n", + "\n", + "toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)\n", + "\n", + "# run k-means clustering\n", + "kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)\n", + "\n", + "# check cluster labels generated for each row in the dataframe\n", + "kmeans.labels_[0:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Merge the dataframe with the top 10 and the cluster for each neighbourhood" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeBoroughNeighbourhoodLatitudeLongitude
0M3ANorth YorkParkwoods43.753259-79.329656
1M4ANorth YorkVictoria Village43.725882-79.315572
2M5ADowntown TorontoRegent Park, Harbourfront43.654260-79.360636
3M6ANorth YorkLawrence Manor, Lawrence Heights43.718518-79.464763
4M7ADowntown TorontoQueen's Park, Ontario Provincial Government43.662301-79.389494
..................
98M8XEtobicokeThe Kingsway, Montgomery Road, Old Mill North43.653654-79.506944
99M4YDowntown TorontoChurch and Wellesley43.665860-79.383160
100M7YEast TorontoBusiness reply mail Processing Centre, South C...43.662744-79.321558
101M8YEtobicokeOld Mill South, King's Mill Park, Sunnylea, Hu...43.636258-79.498509
102M8ZEtobicokeMimico NW, The Queensway West, South of Bloor,...43.628841-79.520999
\n", + "

103 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Postal Code Borough \\\n", + "0 M3A North York \n", + "1 M4A North York \n", + "2 M5A Downtown Toronto \n", + "3 M6A North York \n", + "4 M7A Downtown Toronto \n", + ".. ... ... \n", + "98 M8X Etobicoke \n", + "99 M4Y Downtown Toronto \n", + "100 M7Y East Toronto \n", + "101 M8Y Etobicoke \n", + "102 M8Z Etobicoke \n", + "\n", + " Neighbourhood Latitude Longitude \n", + "0 Parkwoods 43.753259 -79.329656 \n", + "1 Victoria Village 43.725882 -79.315572 \n", + "2 Regent Park, Harbourfront 43.654260 -79.360636 \n", + "3 Lawrence Manor, Lawrence Heights 43.718518 -79.464763 \n", + "4 Queen's Park, Ontario Provincial Government 43.662301 -79.389494 \n", + ".. ... ... ... \n", + "98 The Kingsway, Montgomery Road, Old Mill North 43.653654 -79.506944 \n", + "99 Church and Wellesley 43.665860 -79.383160 \n", + "100 Business reply mail Processing Centre, South C... 43.662744 -79.321558 \n", + "101 Old Mill South, King's Mill Park, Sunnylea, Hu... 43.636258 -79.498509 \n", + "102 Mimico NW, The Queensway West, South of Bloor,... 43.628841 -79.520999 \n", + "\n", + "[103 rows x 5 columns]" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Postal CodeBoroughNeighbourhoodLatitudeLongitudeCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
5M9AEtobicokeIslington Avenue, Humber Valley Village43.667856-79.532242NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
45M2LNorth YorkYork Mills, Silver Hills43.757490-79.374714NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
95M1XScarboroughUpper Rouge43.836125-79.205636NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Postal Code Borough Neighbourhood \\\n", + "5 M9A Etobicoke Islington Avenue, Humber Valley Village \n", + "45 M2L North York York Mills, Silver Hills \n", + "95 M1X Scarborough Upper Rouge \n", + "\n", + " Latitude Longitude Cluster Labels 1st Most Common Venue \\\n", + "5 43.667856 -79.532242 NaN NaN \n", + "45 43.757490 -79.374714 NaN NaN \n", + "95 43.836125 -79.205636 NaN NaN \n", + "\n", + " 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n", + "5 NaN NaN NaN \n", + "45 NaN NaN NaN \n", + "95 NaN NaN NaN \n", + "\n", + " 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n", + "5 NaN NaN NaN \n", + "45 NaN NaN NaN \n", + "95 NaN NaN NaN \n", + "\n", + " 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n", + "5 NaN NaN NaN \n", + "45 NaN NaN NaN \n", + "95 NaN NaN NaN " + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged[toronto_merged['Cluster Labels'].isnull()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the clusters in the map" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import matplotlib.cm as cm\n", + "import matplotlib.colors as colors\n", + "\n", + "# create map\n", + "map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)\n", + "\n", + "# set color scheme for the clusters\n", + "x = np.arange(kclusters)\n", + "ys = [i + x + (i*x)**2 for i in range(kclusters)]\n", + "colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))\n", + "rainbow = [colors.rgb2hex(i) for i in colors_array]\n", + "\n", + "toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])\n", + "\n", + "# add markers to the map\n", + "markers_colors = []\n", + "for lat, lon, poi, cluster in zip(toronto_merged_nonan['Latitude'], toronto_merged_nonan['Longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):\n", + " label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)\n", + " folium.CircleMarker(\n", + " [lat, lon],\n", + " radius=5,\n", + " popup=label,\n", + " color=rainbow[int(cluster-1)],\n", + " fill=True,\n", + " fill_color=rainbow[int(cluster-1)],\n", + " fill_opacity=0.7).add_to(map_clusters)\n", + " \n", + "map_clusters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cluster 1" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
0North York0.0ParkFood & Drink ShopYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDrugstore
21York0.0ParkWomen's StorePoolYoga StudioDoner RestaurantDim Sum RestaurantDinerDiscount StoreDistribution CenterDog Run
35East York0.0ParkConvenience StoreYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDumpling Restaurant
52North York0.0ParkYoga StudioDonut ShopDim Sum RestaurantDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDrugstore
64York0.0ParkConvenience StoreYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDumpling Restaurant
66North York0.0ParkConstruction & LandscapingConvenience StoreYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner Restaurant
85Scarborough0.0ParkPlaygroundCoffee ShopYoga StudioDoner RestaurantDim Sum RestaurantDinerDiscount StoreDistribution CenterDog Run
91Downtown Toronto0.0ParkTrailPlaygroundYoga StudioDessert ShopDim Sum RestaurantDinerDiscount StoreDistribution CenterDog Run
\n", + "
" + ], + "text/plain": [ + " Borough Cluster Labels 1st Most Common Venue \\\n", + "0 North York 0.0 Park \n", + "21 York 0.0 Park \n", + "35 East York 0.0 Park \n", + "52 North York 0.0 Park \n", + "64 York 0.0 Park \n", + "66 North York 0.0 Park \n", + "85 Scarborough 0.0 Park \n", + "91 Downtown Toronto 0.0 Park \n", + "\n", + " 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n", + "0 Food & Drink Shop Yoga Studio Donut Shop \n", + "21 Women's Store Pool Yoga Studio \n", + "35 Convenience Store Yoga Studio Donut Shop \n", + "52 Yoga Studio Donut Shop Dim Sum Restaurant \n", + "64 Convenience Store Yoga Studio Donut Shop \n", + "66 Construction & Landscaping Convenience Store Yoga Studio \n", + "85 Playground Coffee Shop Yoga Studio \n", + "91 Trail Playground Yoga Studio \n", + "\n", + " 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n", + "0 Diner Discount Store Distribution Center \n", + "21 Doner Restaurant Dim Sum Restaurant Diner \n", + "35 Diner Discount Store Distribution Center \n", + "52 Diner Discount Store Distribution Center \n", + "64 Diner Discount Store Distribution Center \n", + "66 Donut Shop Diner Discount Store \n", + "85 Doner Restaurant Dim Sum Restaurant Diner \n", + "91 Dessert Shop Dim Sum Restaurant Diner \n", + "\n", + " 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n", + "0 Dog Run Doner Restaurant Drugstore \n", + "21 Discount Store Distribution Center Dog Run \n", + "35 Dog Run Doner Restaurant Dumpling Restaurant \n", + "52 Dog Run Doner Restaurant Drugstore \n", + "64 Dog Run Doner Restaurant Dumpling Restaurant \n", + "66 Distribution Center Dog Run Doner Restaurant \n", + "85 Discount Store Distribution Center Dog Run \n", + "91 Discount Store Distribution Center Dog Run " + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cluster 2" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
1North York1.0Hockey ArenaPizza PlaceCoffee ShopPortuguese RestaurantYoga StudioDim Sum RestaurantDinerDiscount StoreDistribution CenterDog Run
2Downtown Toronto1.0Coffee ShopBakeryPubParkBreakfast SpotCaféTheaterYoga StudioMexican RestaurantShoe Store
3North York1.0Clothing StoreAccessories StoreCoffee ShopBoutiqueMiscellaneous ShopEvent SpaceFurniture / Home StoreWomen's StoreVietnamese RestaurantConvenience Store
4Downtown Toronto1.0Coffee ShopDinerYoga StudioBarBeer BarSmoothie ShopSandwich PlaceBurrito PlaceCaféPark
6Scarborough1.0Print ShopFast Food RestaurantYoga StudioDim Sum RestaurantDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDonut Shop
.......................................
97Downtown Toronto1.0Coffee ShopCaféHotelRestaurantGymJapanese RestaurantAmerican RestaurantSteakhouseAsian RestaurantSeafood Restaurant
98Etobicoke1.0RiverDoner RestaurantDessert ShopDim Sum RestaurantDinerDiscount StoreDistribution CenterDog RunYoga StudioDepartment Store
99Downtown Toronto1.0Coffee ShopSushi RestaurantJapanese RestaurantGay BarRestaurantYoga StudioBubble Tea ShopDance StudioMediterranean RestaurantMen's Store
100East Toronto1.0Light Rail StationYoga StudioAuto WorkshopSmoke ShopBrewerySpaFarmers MarketFast Food RestaurantBurrito PlaceRestaurant
102Etobicoke1.0Grocery StoreTanning SalonConvenience StoreDiscount StoreBurrito PlaceBurger JointSandwich PlaceKids StoreSupplement ShopBakery
\n", + "

87 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Borough Cluster Labels 1st Most Common Venue \\\n", + "1 North York 1.0 Hockey Arena \n", + "2 Downtown Toronto 1.0 Coffee Shop \n", + "3 North York 1.0 Clothing Store \n", + "4 Downtown Toronto 1.0 Coffee Shop \n", + "6 Scarborough 1.0 Print Shop \n", + ".. ... ... ... \n", + "97 Downtown Toronto 1.0 Coffee Shop \n", + "98 Etobicoke 1.0 River \n", + "99 Downtown Toronto 1.0 Coffee Shop \n", + "100 East Toronto 1.0 Light Rail Station \n", + "102 Etobicoke 1.0 Grocery Store \n", + "\n", + " 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n", + "1 Pizza Place Coffee Shop Portuguese Restaurant \n", + "2 Bakery Pub Park \n", + "3 Accessories Store Coffee Shop Boutique \n", + "4 Diner Yoga Studio Bar \n", + "6 Fast Food Restaurant Yoga Studio Dim Sum Restaurant \n", + ".. ... ... ... \n", + "97 Café Hotel Restaurant \n", + "98 Doner Restaurant Dessert Shop Dim Sum Restaurant \n", + "99 Sushi Restaurant Japanese Restaurant Gay Bar \n", + "100 Yoga Studio Auto Workshop Smoke Shop \n", + "102 Tanning Salon Convenience Store Discount Store \n", + "\n", + " 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n", + "1 Yoga Studio Dim Sum Restaurant Diner \n", + "2 Breakfast Spot Café Theater \n", + "3 Miscellaneous Shop Event Space Furniture / Home Store \n", + "4 Beer Bar Smoothie Shop Sandwich Place \n", + "6 Diner Discount Store Distribution Center \n", + ".. ... ... ... \n", + "97 Gym Japanese Restaurant American Restaurant \n", + "98 Diner Discount Store Distribution Center \n", + "99 Restaurant Yoga Studio Bubble Tea Shop \n", + "100 Brewery Spa Farmers Market \n", + "102 Burrito Place Burger Joint Sandwich Place \n", + "\n", + " 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n", + "1 Discount Store Distribution Center Dog Run \n", + "2 Yoga Studio Mexican Restaurant Shoe Store \n", + "3 Women's Store Vietnamese Restaurant Convenience Store \n", + "4 Burrito Place Café Park \n", + "6 Dog Run Doner Restaurant Donut Shop \n", + ".. ... ... ... \n", + "97 Steakhouse Asian Restaurant Seafood Restaurant \n", + "98 Dog Run Yoga Studio Department Store \n", + "99 Dance Studio Mediterranean Restaurant Men's Store \n", + "100 Fast Food Restaurant Burrito Place Restaurant \n", + "102 Kids Store Supplement Shop Bakery \n", + "\n", + "[87 rows x 12 columns]" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cluster 3" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
32Scarborough2.0Pizza PlacePlaygroundDoner RestaurantDessert ShopDim Sum RestaurantDinerDiscount StoreDistribution CenterDog RunDonut Shop
50North York2.0Pizza PlaceDepartment StoreEvent SpaceEthiopian RestaurantElectronics StoreEastern European RestaurantDumpling RestaurantDrugstoreDonut ShopDoner Restaurant
\n", + "
" + ], + "text/plain": [ + " Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n", + "32 Scarborough 2.0 Pizza Place Playground \n", + "50 North York 2.0 Pizza Place Department Store \n", + "\n", + " 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n", + "32 Doner Restaurant Dessert Shop Dim Sum Restaurant \n", + "50 Event Space Ethiopian Restaurant Electronics Store \n", + "\n", + " 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n", + "32 Diner Discount Store Distribution Center \n", + "50 Eastern European Restaurant Dumpling Restaurant Drugstore \n", + "\n", + " 9th Most Common Venue 10th Most Common Venue \n", + "32 Dog Run Donut Shop \n", + "50 Donut Shop Doner Restaurant " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cluster 4" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
57North York3.0Fabric ShopBaseball FieldYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDrugstore
101Etobicoke3.0Baseball FieldYoga StudioDonut ShopDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDrugstoreFarmers Market
\n", + "
" + ], + "text/plain": [ + " Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n", + "57 North York 3.0 Fabric Shop Baseball Field \n", + "101 Etobicoke 3.0 Baseball Field Yoga Studio \n", + "\n", + " 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n", + "57 Yoga Studio Donut Shop Diner \n", + "101 Donut Shop Diner Discount Store \n", + "\n", + " 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n", + "57 Discount Store Distribution Center Dog Run \n", + "101 Distribution Center Dog Run Doner Restaurant \n", + "\n", + " 9th Most Common Venue 10th Most Common Venue \n", + "57 Doner Restaurant Drugstore \n", + "101 Drugstore Farmers Market " + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cluster 5" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BoroughCluster Labels1st Most Common Venue2nd Most Common Venue3rd Most Common Venue4th Most Common Venue5th Most Common Venue6th Most Common Venue7th Most Common Venue8th Most Common Venue9th Most Common Venue10th Most Common Venue
11Etobicoke4.0Jewelry StoreYoga StudioDinerDiscount StoreDistribution CenterDog RunDoner RestaurantDonut ShopDrugstoreFarmers Market
\n", + "
" + ], + "text/plain": [ + " Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n", + "11 Etobicoke 4.0 Jewelry Store Yoga Studio \n", + "\n", + " 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n", + "11 Diner Discount Store Distribution Center \n", + "\n", + " 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n", + "11 Dog Run Doner Restaurant Donut Shop \n", + "\n", + " 9th Most Common Venue 10th Most Common Venue \n", + "11 Drugstore Farmers Market " + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- cgit v1.2.3-70-g09d2