{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Segmenting and Clustering Neighbourhoods in Toronto\n",
"---\n",
"# Part 1\n",
"Import libraries"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import requests"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get the HTML page of Wikipedia and use read_html we convert the html data into list of Data frame objects.\n",
"\n",
"Remove cells which have neighbourhood as \"Not assigned.\""
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Borough | \n",
" Neighbourhood | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" M3A | \n",
" North York | \n",
" Parkwoods | \n",
"
\n",
" \n",
" 1 | \n",
" M4A | \n",
" North York | \n",
" Victoria Village | \n",
"
\n",
" \n",
" 2 | \n",
" M5A | \n",
" Downtown Toronto | \n",
" Regent Park, Harbourfront | \n",
"
\n",
" \n",
" 3 | \n",
" M6A | \n",
" North York | \n",
" Lawrence Manor, Lawrence Heights | \n",
"
\n",
" \n",
" 4 | \n",
" M7A | \n",
" Downtown Toronto | \n",
" Queen's Park, Ontario Provincial Government | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Postal Code Borough Neighbourhood\n",
"0 M3A North York Parkwoods\n",
"1 M4A North York Victoria Village\n",
"2 M5A Downtown Toronto Regent Park, Harbourfront\n",
"3 M6A North York Lawrence Manor, Lawrence Heights\n",
"4 M7A Downtown Toronto Queen's Park, Ontario Provincial Government"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n",
"wiki_page = requests.get(wiki)\n",
"\n",
"wiki_raw = pd.read_html(wiki_page.content, header = 0)[0]\n",
"df = wiki_raw[wiki_raw.Neighbourhood != 'Not assigned']\n",
"df.reset_index(inplace=True, drop=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Neighbourhood | \n",
"
\n",
" \n",
" Postal Code | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" M1B | \n",
" Scarborough | \n",
" Malvern, Rouge | \n",
"
\n",
" \n",
" M1C | \n",
" Scarborough | \n",
" Rouge Hill, Port Union, Highland Creek | \n",
"
\n",
" \n",
" M1E | \n",
" Scarborough | \n",
" Guildwood, Morningside, West Hill | \n",
"
\n",
" \n",
" M1G | \n",
" Scarborough | \n",
" Woburn | \n",
"
\n",
" \n",
" M1H | \n",
" Scarborough | \n",
" Cedarbrae | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" M9N | \n",
" York | \n",
" Weston | \n",
"
\n",
" \n",
" M9P | \n",
" Etobicoke | \n",
" Westmount | \n",
"
\n",
" \n",
" M9R | \n",
" Etobicoke | \n",
" Kingsview Village, St. Phillips, Martin Grove ... | \n",
"
\n",
" \n",
" M9V | \n",
" Etobicoke | \n",
" South Steeles, Silverstone, Humbergate, Jamest... | \n",
"
\n",
" \n",
" M9W | \n",
" Etobicoke | \n",
" Northwest, West Humber - Clairville | \n",
"
\n",
" \n",
"
\n",
"
103 rows × 2 columns
\n",
"
"
],
"text/plain": [
" Borough Neighbourhood\n",
"Postal Code \n",
"M1B Scarborough Malvern, Rouge\n",
"M1C Scarborough Rouge Hill, Port Union, Highland Creek\n",
"M1E Scarborough Guildwood, Morningside, West Hill\n",
"M1G Scarborough Woburn\n",
"M1H Scarborough Cedarbrae\n",
"... ... ...\n",
"M9N York Weston\n",
"M9P Etobicoke Westmount\n",
"M9R Etobicoke Kingsview Village, St. Phillips, Martin Grove ...\n",
"M9V Etobicoke South Steeles, Silverstone, Humbergate, Jamest...\n",
"M9W Etobicoke Northwest, West Humber - Clairville\n",
"\n",
"[103 rows x 2 columns]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(['Postal Code']).first()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"103"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df['Postal Code'].unique())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Borough | \n",
" Neighbourhood | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Postal Code, Borough, Neighbourhood]\n",
"Index: []"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['Borough'] == 'Not assigned']"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"# Part 2"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: geocoder in c:\\users\\my_user\\anaconda3\\lib\\site-packages (1.38.1)\n",
"Requirement already satisfied: future in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (0.18.2)\n",
"Requirement already satisfied: ratelim in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (0.1.6)\n",
"Requirement already satisfied: requests in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (2.22.0)\n",
"Requirement already satisfied: click in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (7.0)\n",
"Requirement already satisfied: six in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from geocoder) (1.14.0)\n",
"Requirement already satisfied: decorator in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from ratelim->geocoder) (4.4.1)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (1.25.8)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (2.8)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (2019.11.28)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\my_user\\anaconda3\\lib\\site-packages (from requests->geocoder) (3.0.4)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install geocoder"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import geocoder"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Latitude | \n",
" Longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" M1B | \n",
" 43.806686 | \n",
" -79.194353 | \n",
"
\n",
" \n",
" 1 | \n",
" M1C | \n",
" 43.784535 | \n",
" -79.160497 | \n",
"
\n",
" \n",
" 2 | \n",
" M1E | \n",
" 43.763573 | \n",
" -79.188711 | \n",
"
\n",
" \n",
" 3 | \n",
" M1G | \n",
" 43.770992 | \n",
" -79.216917 | \n",
"
\n",
" \n",
" 4 | \n",
" M1H | \n",
" 43.773136 | \n",
" -79.239476 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Postal Code Latitude Longitude\n",
"0 M1B 43.806686 -79.194353\n",
"1 M1C 43.784535 -79.160497\n",
"2 M1E 43.763573 -79.188711\n",
"3 M1G 43.770992 -79.216917\n",
"4 M1H 43.773136 -79.239476"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url = 'http://cocl.us/Geospatial_data'\n",
"df_geo = pd.read_csv(url)\n",
"df_geo.head()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Postal Code object\n",
"Latitude float64\n",
"Longitude float64\n",
"dtype: object"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_geo.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Postal Code object\n",
"Borough object\n",
"Neighbourhood object\n",
"dtype: object"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_geo.shape"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Borough | \n",
" Neighbourhood | \n",
" Latitude | \n",
" Longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" M3A | \n",
" North York | \n",
" Parkwoods | \n",
" 43.753259 | \n",
" -79.329656 | \n",
"
\n",
" \n",
" 1 | \n",
" M4A | \n",
" North York | \n",
" Victoria Village | \n",
" 43.725882 | \n",
" -79.315572 | \n",
"
\n",
" \n",
" 2 | \n",
" M5A | \n",
" Downtown Toronto | \n",
" Regent Park, Harbourfront | \n",
" 43.654260 | \n",
" -79.360636 | \n",
"
\n",
" \n",
" 3 | \n",
" M6A | \n",
" North York | \n",
" Lawrence Manor, Lawrence Heights | \n",
" 43.718518 | \n",
" -79.464763 | \n",
"
\n",
" \n",
" 4 | \n",
" M7A | \n",
" Downtown Toronto | \n",
" Queen's Park, Ontario Provincial Government | \n",
" 43.662301 | \n",
" -79.389494 | \n",
"
\n",
" \n",
" 5 | \n",
" M9A | \n",
" Etobicoke | \n",
" Islington Avenue, Humber Valley Village | \n",
" 43.667856 | \n",
" -79.532242 | \n",
"
\n",
" \n",
" 6 | \n",
" M1B | \n",
" Scarborough | \n",
" Malvern, Rouge | \n",
" 43.806686 | \n",
" -79.194353 | \n",
"
\n",
" \n",
" 7 | \n",
" M3B | \n",
" North York | \n",
" Don Mills | \n",
" 43.745906 | \n",
" -79.352188 | \n",
"
\n",
" \n",
" 8 | \n",
" M4B | \n",
" East York | \n",
" Parkview Hill, Woodbine Gardens | \n",
" 43.706397 | \n",
" -79.309937 | \n",
"
\n",
" \n",
" 9 | \n",
" M5B | \n",
" Downtown Toronto | \n",
" Garden District, Ryerson | \n",
" 43.657162 | \n",
" -79.378937 | \n",
"
\n",
" \n",
" 10 | \n",
" M6B | \n",
" North York | \n",
" Glencairn | \n",
" 43.709577 | \n",
" -79.445073 | \n",
"
\n",
" \n",
" 11 | \n",
" M9B | \n",
" Etobicoke | \n",
" West Deane Park, Princess Gardens, Martin Grov... | \n",
" 43.650943 | \n",
" -79.554724 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Postal Code Borough \\\n",
"0 M3A North York \n",
"1 M4A North York \n",
"2 M5A Downtown Toronto \n",
"3 M6A North York \n",
"4 M7A Downtown Toronto \n",
"5 M9A Etobicoke \n",
"6 M1B Scarborough \n",
"7 M3B North York \n",
"8 M4B East York \n",
"9 M5B Downtown Toronto \n",
"10 M6B North York \n",
"11 M9B Etobicoke \n",
"\n",
" Neighbourhood Latitude Longitude \n",
"0 Parkwoods 43.753259 -79.329656 \n",
"1 Victoria Village 43.725882 -79.315572 \n",
"2 Regent Park, Harbourfront 43.654260 -79.360636 \n",
"3 Lawrence Manor, Lawrence Heights 43.718518 -79.464763 \n",
"4 Queen's Park, Ontario Provincial Government 43.662301 -79.389494 \n",
"5 Islington Avenue, Humber Valley Village 43.667856 -79.532242 \n",
"6 Malvern, Rouge 43.806686 -79.194353 \n",
"7 Don Mills 43.745906 -79.352188 \n",
"8 Parkview Hill, Woodbine Gardens 43.706397 -79.309937 \n",
"9 Garden District, Ryerson 43.657162 -79.378937 \n",
"10 Glencairn 43.709577 -79.445073 \n",
"11 West Deane Park, Princess Gardens, Martin Grov... 43.650943 -79.554724 "
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.join(df_geo.set_index('Postal Code'), on='Postal Code')\n",
"df.head(12)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 5)"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"# Part 3\n",
"Using the foursquere API to segment and cluster the neighborhoods of Toronto"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting package metadata (current_repodata.json): ...working... done\n",
"Solving environment: ...working... done\n",
"\n",
"# All requested packages already installed.\n",
"\n",
"Collecting geopy\n",
" Downloading geopy-2.0.0-py3-none-any.whl (111 kB)\n",
"Collecting geographiclib<2,>=1.49\n",
" Downloading geographiclib-1.50-py3-none-any.whl (38 kB)\n",
"Installing collected packages: geographiclib, geopy\n",
"Successfully installed geographiclib-1.50 geopy-2.0.0\n",
"The geograpical coordinate of Toronto are 43.6534817, -79.3839347.\n"
]
}
],
"source": [
"!conda install -c conda-forge geocoder --yes\n",
"import geocoder\n",
"!pip install geopy\n",
"from geopy.geocoders import Nominatim \n",
"\n",
"address = 'Toronto, Ontario'\n",
"\n",
"geolocator = Nominatim(user_agent=\"toronto_explorer\")\n",
"location = geolocator.geocode(address)\n",
"latitude = location.latitude\n",
"longitude = location.longitude\n",
"print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Printing the map"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Make this Notebook Trusted to load map: File -> Trust Notebook
"
],
"text/plain": [
""
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import folium\n",
"\n",
"# create map of Toronto using latitude and longitude values\n",
"map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)\n",
"\n",
"# add markers to map\n",
"for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):\n",
" label = '{}, {}'.format(neighborhood, borough)\n",
" label = folium.Popup(label, parse_html=True)\n",
" folium.CircleMarker(\n",
" [lat, lng],\n",
" radius=5,\n",
" popup=label,\n",
" color='blue',\n",
" fill=True,\n",
" fill_color='#3186cc',\n",
" fill_opacity=0.7,\n",
" ).add_to(map_Toronto) \n",
" \n",
"map_Toronto"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define Foursquare parameters"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"CLIENT_ID = 'MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN'\n",
"CLIENT_SECRET = 'YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ'"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your credentails:\n",
"CLIENT_ID: MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN\n",
"CLIENT_SECRET:YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ\n"
]
}
],
"source": [
"VERSION = '20180604' # Foursquare API version\n",
"\n",
"print('Your credentails:')\n",
"print('CLIENT_ID: ' + CLIENT_ID)\n",
"print('CLIENT_SECRET:' + CLIENT_SECRET)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Explore the data, and get the venues in 500 meters range from our first entry"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.\n"
]
}
],
"source": [
"neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value\n",
"neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value\n",
"\n",
"neighborhood_name = df.loc[0, 'Neighbourhood'] # neighborhood name\n",
"\n",
"print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, \n",
" neighborhood_latitude, \n",
" neighborhood_longitude))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the GET request URL"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://api.foursquare.com/v2/venues/explore?&client_id=MPMD3J0GGDV0HKDJFEDRK0USSGW0MQUD0DMN3C4ZAMWT2XTN&client_secret=YVCSGVSCX02EFWITMI3RDOTUSJEHV4APRMLBFCN5CKWXCTPJ&v=20180604&ll=43.7532586,-79.3296565&radius=500&limit=100'"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LIMIT = 100\n",
"radius = 500\n",
"url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n",
" CLIENT_ID, \n",
" CLIENT_SECRET, \n",
" VERSION, \n",
" neighborhood_latitude, \n",
" neighborhood_longitude, \n",
" radius, \n",
" LIMIT)\n",
"url"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'meta': {'code': 200, 'requestId': '5f1dab2fb0d9d01cac2a7fbc'},\n",
" 'response': {'warning': {'text': \"There aren't a lot of results near you. Try something more general, reset your filters, or expand the search area.\"},\n",
" 'headerLocation': 'Parkwoods - Donalda',\n",
" 'headerFullLocation': 'Parkwoods - Donalda, Toronto',\n",
" 'headerLocationGranularity': 'neighborhood',\n",
" 'totalResults': 2,\n",
" 'suggestedBounds': {'ne': {'lat': 43.757758604500005,\n",
" 'lng': -79.32343823984928},\n",
" 'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},\n",
" 'groups': [{'type': 'Recommended Places',\n",
" 'name': 'recommended',\n",
" 'items': [{'reasons': {'count': 0,\n",
" 'items': [{'summary': 'This spot is popular',\n",
" 'type': 'general',\n",
" 'reasonName': 'globalInteractionReason'}]},\n",
" 'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',\n",
" 'name': 'Brookbanks Park',\n",
" 'location': {'address': 'Toronto',\n",
" 'lat': 43.751976046055574,\n",
" 'lng': -79.33214044722958,\n",
" 'labeledLatLngs': [{'label': 'display',\n",
" 'lat': 43.751976046055574,\n",
" 'lng': -79.33214044722958}],\n",
" 'distance': 245,\n",
" 'cc': 'CA',\n",
" 'city': 'Toronto',\n",
" 'state': 'ON',\n",
" 'country': 'Canada',\n",
" 'formattedAddress': ['Toronto', 'Toronto ON', 'Canada']},\n",
" 'categories': [{'id': '4bf58dd8d48988d163941735',\n",
" 'name': 'Park',\n",
" 'pluralName': 'Parks',\n",
" 'shortName': 'Park',\n",
" 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',\n",
" 'suffix': '.png'},\n",
" 'primary': True}],\n",
" 'photos': {'count': 0, 'groups': []}},\n",
" 'referralId': 'e-0-4e8d9dcdd5fbbbb6b3003c7b-0'},\n",
" {'reasons': {'count': 0,\n",
" 'items': [{'summary': 'This spot is popular',\n",
" 'type': 'general',\n",
" 'reasonName': 'globalInteractionReason'}]},\n",
" 'venue': {'id': '4cb11e2075ebb60cd1c4caad',\n",
" 'name': 'Variety Store',\n",
" 'location': {'address': '29 Valley Woods Road',\n",
" 'lat': 43.75197441585782,\n",
" 'lng': -79.33311418516017,\n",
" 'labeledLatLngs': [{'label': 'display',\n",
" 'lat': 43.75197441585782,\n",
" 'lng': -79.33311418516017}],\n",
" 'distance': 312,\n",
" 'cc': 'CA',\n",
" 'city': 'Toronto',\n",
" 'state': 'ON',\n",
" 'country': 'Canada',\n",
" 'formattedAddress': ['29 Valley Woods Road', 'Toronto ON', 'Canada']},\n",
" 'categories': [{'id': '4bf58dd8d48988d1f9941735',\n",
" 'name': 'Food & Drink Shop',\n",
" 'pluralName': 'Food & Drink Shops',\n",
" 'shortName': 'Food & Drink',\n",
" 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/foodanddrink_',\n",
" 'suffix': '.png'},\n",
" 'primary': True}],\n",
" 'photos': {'count': 0, 'groups': []}},\n",
" 'referralId': 'e-0-4cb11e2075ebb60cd1c4caad-1'}]}]}}"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = requests.get(url).json()\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# function that extracts the category of the venue\n",
"def get_category_type(row):\n",
" try:\n",
" categories_list = row['categories']\n",
" except:\n",
" categories_list = row['venue.categories']\n",
" \n",
" if len(categories_list) == 0:\n",
" return None\n",
" else:\n",
" return categories_list[0]['name']"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\my_user\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:6: FutureWarning: pandas.io.json.json_normalize is deprecated, use pandas.json_normalize instead\n",
" \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" categories | \n",
" lat | \n",
" lng | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Brookbanks Park | \n",
" Park | \n",
" 43.751976 | \n",
" -79.332140 | \n",
"
\n",
" \n",
" 1 | \n",
" Variety Store | \n",
" Food & Drink Shop | \n",
" 43.751974 | \n",
" -79.333114 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name categories lat lng\n",
"0 Brookbanks Park Park 43.751976 -79.332140\n",
"1 Variety Store Food & Drink Shop 43.751974 -79.333114"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import json\n",
"from pandas.io.json import json_normalize\n",
"\n",
"venues = results['response']['groups'][0]['items']\n",
" \n",
"nearby_venues = json_normalize(venues) # flatten JSON\n",
"\n",
"# filter columns\n",
"filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']\n",
"nearby_venues =nearby_venues.loc[:, filtered_columns]\n",
"\n",
"# filter the category for each row\n",
"nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)\n",
"\n",
"# clean columns\n",
"nearby_venues.columns = [col.split(\".\")[-1] for col in nearby_venues.columns]\n",
"\n",
"nearby_venues.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Generalize to obtain the venues from all neighbourhoods in Toronto"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"def getNearbyVenues(names, latitudes, longitudes, radius=500):\n",
" \n",
" venues_list=[]\n",
" for name, lat, lng in zip(names, latitudes, longitudes):\n",
" print(name)\n",
" \n",
" # create the API request URL\n",
" url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(\n",
" CLIENT_ID, \n",
" CLIENT_SECRET, \n",
" VERSION, \n",
" lat, \n",
" lng, \n",
" radius, \n",
" LIMIT)\n",
" \n",
" # make the GET request\n",
" results = requests.get(url).json()[\"response\"]['groups'][0]['items']\n",
" \n",
" # return only relevant information for each nearby venue\n",
" venues_list.append([(\n",
" name, \n",
" lat, \n",
" lng, \n",
" v['venue']['name'], \n",
" v['venue']['location']['lat'], \n",
" v['venue']['location']['lng'], \n",
" v['venue']['categories'][0]['name']) for v in results])\n",
"\n",
" nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])\n",
" nearby_venues.columns = ['Neighbourhood', \n",
" 'Neighborhood Latitude', \n",
" 'Neighborhood Longitude', \n",
" 'Venue', \n",
" 'Venue Latitude', \n",
" 'Venue Longitude', \n",
" 'Venue Category']\n",
" \n",
" return(nearby_venues)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parkwoods\n",
"Victoria Village\n",
"Regent Park, Harbourfront\n",
"Lawrence Manor, Lawrence Heights\n",
"Queen's Park, Ontario Provincial Government\n",
"Islington Avenue, Humber Valley Village\n",
"Malvern, Rouge\n",
"Don Mills\n",
"Parkview Hill, Woodbine Gardens\n",
"Garden District, Ryerson\n",
"Glencairn\n",
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale\n",
"Rouge Hill, Port Union, Highland Creek\n",
"Don Mills\n",
"Woodbine Heights\n",
"St. James Town\n",
"Humewood-Cedarvale\n",
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood\n",
"Guildwood, Morningside, West Hill\n",
"The Beaches\n",
"Berczy Park\n",
"Caledonia-Fairbanks\n",
"Woburn\n",
"Leaside\n",
"Central Bay Street\n",
"my_usertie\n",
"Cedarbrae\n",
"Hillcrest Village\n",
"Bathurst Manor, Wilson Heights, Downsview North\n",
"Thorncliffe Park\n",
"Richmond, Adelaide, King\n",
"Dufferin, Dovercourt Village\n",
"Scarborough Village\n",
"Fairview, Henry Farm, Oriole\n",
"Northwood Park, York University\n",
"East Toronto, Broadview North (Old East York)\n",
"Harbourfront East, Union Station, Toronto Islands\n",
"Little Portugal, Trinity\n",
"Kennedy Park, Ionview, East Birchmount Park\n",
"Bayview Village\n",
"Downsview\n",
"The Danforth West, Riverdale\n",
"Toronto Dominion Centre, Design Exchange\n",
"Brockton, Parkdale Village, Exhibition Place\n",
"Golden Mile, Clairlea, Oakridge\n",
"York Mills, Silver Hills\n",
"Downsview\n",
"India Bazaar, The Beaches West\n",
"Commerce Court, Victoria Hotel\n",
"North Park, Maple Leaf Park, Upwood Park\n",
"Humber Summit\n",
"Cliffside, Cliffcrest, Scarborough Village West\n",
"Willowdale, Newtonbrook\n",
"Downsview\n",
"Studio District\n",
"Bedford Park, Lawrence Manor East\n",
"Del Ray, Mount Dennis, Keelsdale and Silverthorn\n",
"Humberlea, Emery\n",
"Birch Cliff, Cliffside West\n",
"Willowdale, Willowdale East\n",
"Downsview\n",
"Lawrence Park\n",
"Roselawn\n",
"Runnymede, The Junction North\n",
"Weston\n",
"Dorset Park, Wexford Heights, Scarborough Town Centre\n",
"York Mills West\n",
"Davisville North\n",
"Forest Hill North & West, Forest Hill Road Park\n",
"High Park, The Junction South\n",
"Westmount\n",
"Wexford, Maryvale\n",
"Willowdale, Willowdale West\n",
"North Toronto West, Lawrence Park\n",
"The Annex, North Midtown, Yorkville\n",
"Parkdale, Roncesvalles\n",
"Canada Post Gateway Processing Centre\n",
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens\n",
"Agincourt\n",
"Davisville\n",
"University of Toronto, Harbord\n",
"Runnymede, Swansea\n",
"Clarks Corners, Tam O'Shanter, Sullivan\n",
"Moore Park, Summerhill East\n",
"Kensington Market, Chinatown, Grange Park\n",
"Milliken, Agincourt North, Steeles East, L'Amoreaux East\n",
"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park\n",
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport\n",
"New Toronto, Mimico South, Humber Bay Shores\n",
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens\n",
"Steeles West, L'Amoreaux West\n",
"Rosedale\n",
"Stn A PO Boxes\n",
"Alderwood, Long Branch\n",
"Northwest, West Humber - Clairville\n",
"Upper Rouge\n",
"St. James Town, Cabbagetown\n",
"First Canadian Place, Underground city\n",
"The Kingsway, Montgomery Road, Old Mill North\n",
"Church and Wellesley\n",
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto\n",
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East\n",
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West\n"
]
}
],
"source": [
"toronto_venues = getNearbyVenues(names=df['Neighbourhood'],\n",
" latitudes=df['Latitude'],\n",
" longitudes=df['Longitude']\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Checking the size of df"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2153, 7)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Neighbourhood | \n",
" Neighborhood Latitude | \n",
" Neighborhood Longitude | \n",
" Venue | \n",
" Venue Latitude | \n",
" Venue Longitude | \n",
" Venue Category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Parkwoods | \n",
" 43.753259 | \n",
" -79.329656 | \n",
" Brookbanks Park | \n",
" 43.751976 | \n",
" -79.332140 | \n",
" Park | \n",
"
\n",
" \n",
" 1 | \n",
" Parkwoods | \n",
" 43.753259 | \n",
" -79.329656 | \n",
" Variety Store | \n",
" 43.751974 | \n",
" -79.333114 | \n",
" Food & Drink Shop | \n",
"
\n",
" \n",
" 2 | \n",
" Victoria Village | \n",
" 43.725882 | \n",
" -79.315572 | \n",
" Victoria Village Arena | \n",
" 43.723481 | \n",
" -79.315635 | \n",
" Hockey Arena | \n",
"
\n",
" \n",
" 3 | \n",
" Victoria Village | \n",
" 43.725882 | \n",
" -79.315572 | \n",
" Portugril | \n",
" 43.725819 | \n",
" -79.312785 | \n",
" Portuguese Restaurant | \n",
"
\n",
" \n",
" 4 | \n",
" Victoria Village | \n",
" 43.725882 | \n",
" -79.315572 | \n",
" Tim Hortons | \n",
" 43.725517 | \n",
" -79.313103 | \n",
" Coffee Shop | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Neighbourhood Neighborhood Latitude Neighborhood Longitude \\\n",
"0 Parkwoods 43.753259 -79.329656 \n",
"1 Parkwoods 43.753259 -79.329656 \n",
"2 Victoria Village 43.725882 -79.315572 \n",
"3 Victoria Village 43.725882 -79.315572 \n",
"4 Victoria Village 43.725882 -79.315572 \n",
"\n",
" Venue Venue Latitude Venue Longitude \\\n",
"0 Brookbanks Park 43.751976 -79.332140 \n",
"1 Variety Store 43.751974 -79.333114 \n",
"2 Victoria Village Arena 43.723481 -79.315635 \n",
"3 Portugril 43.725819 -79.312785 \n",
"4 Tim Hortons 43.725517 -79.313103 \n",
"\n",
" Venue Category \n",
"0 Park \n",
"1 Food & Drink Shop \n",
"2 Hockey Arena \n",
"3 Portuguese Restaurant \n",
"4 Coffee Shop "
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(toronto_venues.shape)\n",
"toronto_venues.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Checking how many venues there are for each venue"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Neighborhood Latitude | \n",
" Neighborhood Longitude | \n",
" Venue | \n",
" Venue Latitude | \n",
" Venue Longitude | \n",
" Venue Category | \n",
"
\n",
" \n",
" Neighbourhood | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Agincourt | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" Alderwood, Long Branch | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
"
\n",
" \n",
" Bathurst Manor, Wilson Heights, Downsview North | \n",
" 21 | \n",
" 21 | \n",
" 21 | \n",
" 21 | \n",
" 21 | \n",
" 21 | \n",
"
\n",
" \n",
" Bayview Village | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" Bedford Park, Lawrence Manor East | \n",
" 25 | \n",
" 25 | \n",
" 25 | \n",
" 25 | \n",
" 25 | \n",
" 25 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" Willowdale, Willowdale East | \n",
" 34 | \n",
" 34 | \n",
" 34 | \n",
" 34 | \n",
" 34 | \n",
" 34 | \n",
"
\n",
" \n",
" Willowdale, Willowdale West | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
"
\n",
" \n",
" Woburn | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" Woodbine Heights | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
"
\n",
" \n",
" York Mills West | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
96 rows × 6 columns
\n",
"
"
],
"text/plain": [
" Neighborhood Latitude \\\n",
"Neighbourhood \n",
"Agincourt 4 \n",
"Alderwood, Long Branch 8 \n",
"Bathurst Manor, Wilson Heights, Downsview North 21 \n",
"Bayview Village 4 \n",
"Bedford Park, Lawrence Manor East 25 \n",
"... ... \n",
"Willowdale, Willowdale East 34 \n",
"Willowdale, Willowdale West 6 \n",
"Woburn 4 \n",
"Woodbine Heights 9 \n",
"York Mills West 3 \n",
"\n",
" Neighborhood Longitude \\\n",
"Neighbourhood \n",
"Agincourt 4 \n",
"Alderwood, Long Branch 8 \n",
"Bathurst Manor, Wilson Heights, Downsview North 21 \n",
"Bayview Village 4 \n",
"Bedford Park, Lawrence Manor East 25 \n",
"... ... \n",
"Willowdale, Willowdale East 34 \n",
"Willowdale, Willowdale West 6 \n",
"Woburn 4 \n",
"Woodbine Heights 9 \n",
"York Mills West 3 \n",
"\n",
" Venue Venue Latitude \\\n",
"Neighbourhood \n",
"Agincourt 4 4 \n",
"Alderwood, Long Branch 8 8 \n",
"Bathurst Manor, Wilson Heights, Downsview North 21 21 \n",
"Bayview Village 4 4 \n",
"Bedford Park, Lawrence Manor East 25 25 \n",
"... ... ... \n",
"Willowdale, Willowdale East 34 34 \n",
"Willowdale, Willowdale West 6 6 \n",
"Woburn 4 4 \n",
"Woodbine Heights 9 9 \n",
"York Mills West 3 3 \n",
"\n",
" Venue Longitude \\\n",
"Neighbourhood \n",
"Agincourt 4 \n",
"Alderwood, Long Branch 8 \n",
"Bathurst Manor, Wilson Heights, Downsview North 21 \n",
"Bayview Village 4 \n",
"Bedford Park, Lawrence Manor East 25 \n",
"... ... \n",
"Willowdale, Willowdale East 34 \n",
"Willowdale, Willowdale West 6 \n",
"Woburn 4 \n",
"Woodbine Heights 9 \n",
"York Mills West 3 \n",
"\n",
" Venue Category \n",
"Neighbourhood \n",
"Agincourt 4 \n",
"Alderwood, Long Branch 8 \n",
"Bathurst Manor, Wilson Heights, Downsview North 21 \n",
"Bayview Village 4 \n",
"Bedford Park, Lawrence Manor East 25 \n",
"... ... \n",
"Willowdale, Willowdale East 34 \n",
"Willowdale, Willowdale West 6 \n",
"Woburn 4 \n",
"Woodbine Heights 9 \n",
"York Mills West 3 \n",
"\n",
"[96 rows x 6 columns]"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_venues.groupby('Neighbourhood').count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many categorys can we find?"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Neighbourhood | \n",
" Accessories Store | \n",
" Afghan Restaurant | \n",
" Airport | \n",
" Airport Food Court | \n",
" Airport Lounge | \n",
" Airport Service | \n",
" Airport Terminal | \n",
" American Restaurant | \n",
" Antique Shop | \n",
" ... | \n",
" Vegetarian / Vegan Restaurant | \n",
" Video Game Store | \n",
" Video Store | \n",
" Vietnamese Restaurant | \n",
" Warehouse Store | \n",
" Wine Bar | \n",
" Wine Shop | \n",
" Wings Joint | \n",
" Women's Store | \n",
" Yoga Studio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Parkwoods | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" Parkwoods | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" Victoria Village | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" Victoria Village | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" Victoria Village | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 270 columns
\n",
"
"
],
"text/plain": [
" Neighbourhood Accessories Store Afghan Restaurant Airport \\\n",
"0 Parkwoods 0 0 0 \n",
"1 Parkwoods 0 0 0 \n",
"2 Victoria Village 0 0 0 \n",
"3 Victoria Village 0 0 0 \n",
"4 Victoria Village 0 0 0 \n",
"\n",
" Airport Food Court Airport Lounge Airport Service Airport Terminal \\\n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" American Restaurant Antique Shop ... Vegetarian / Vegan Restaurant \\\n",
"0 0 0 ... 0 \n",
"1 0 0 ... 0 \n",
"2 0 0 ... 0 \n",
"3 0 0 ... 0 \n",
"4 0 0 ... 0 \n",
"\n",
" Video Game Store Video Store Vietnamese Restaurant Warehouse Store \\\n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" Wine Bar Wine Shop Wings Joint Women's Store Yoga Studio \n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
"[5 rows x 270 columns]"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# one hot encoding\n",
"toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix=\"\", prefix_sep=\"\")\n",
"\n",
"# add neighborhood column back to dataframe\n",
"toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] \n",
"\n",
"# move neighborhood column to the first column\n",
"fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])\n",
"toronto_onehot = toronto_onehot[fixed_columns]\n",
"\n",
"toronto_onehot.head()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2153, 270)"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_onehot.shape"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Neighbourhood | \n",
" Accessories Store | \n",
" Afghan Restaurant | \n",
" Airport | \n",
" Airport Food Court | \n",
" Airport Lounge | \n",
" Airport Service | \n",
" Airport Terminal | \n",
" American Restaurant | \n",
" Antique Shop | \n",
" ... | \n",
" Vegetarian / Vegan Restaurant | \n",
" Video Game Store | \n",
" Video Store | \n",
" Vietnamese Restaurant | \n",
" Warehouse Store | \n",
" Wine Bar | \n",
" Wine Shop | \n",
" Wings Joint | \n",
" Women's Store | \n",
" Yoga Studio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Agincourt | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Alderwood, Long Branch | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Bathurst Manor, Wilson Heights, Downsview North | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Bayview Village | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Bedford Park, Lawrence Manor East | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.04 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.04 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 270 columns
\n",
"
"
],
"text/plain": [
" Neighbourhood Accessories Store \\\n",
"0 Agincourt 0.0 \n",
"1 Alderwood, Long Branch 0.0 \n",
"2 Bathurst Manor, Wilson Heights, Downsview North 0.0 \n",
"3 Bayview Village 0.0 \n",
"4 Bedford Park, Lawrence Manor East 0.0 \n",
"\n",
" Afghan Restaurant Airport Airport Food Court Airport Lounge \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"\n",
" Airport Service Airport Terminal American Restaurant Antique Shop ... \\\n",
"0 0.0 0.0 0.00 0.0 ... \n",
"1 0.0 0.0 0.00 0.0 ... \n",
"2 0.0 0.0 0.00 0.0 ... \n",
"3 0.0 0.0 0.00 0.0 ... \n",
"4 0.0 0.0 0.04 0.0 ... \n",
"\n",
" Vegetarian / Vegan Restaurant Video Game Store Video Store \\\n",
"0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 \n",
"\n",
" Vietnamese Restaurant Warehouse Store Wine Bar Wine Shop Wings Joint \\\n",
"0 0.0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" Women's Store Yoga Studio \n",
"0 0.00 0.0 \n",
"1 0.00 0.0 \n",
"2 0.00 0.0 \n",
"3 0.00 0.0 \n",
"4 0.04 0.0 \n",
"\n",
"[5 rows x 270 columns]"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()\n",
"toronto_grouped.head()"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(96, 270)"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_grouped.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Getting the top 10 for each neighbourhood"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"def return_most_common_venues(row, num_top_venues):\n",
" row_categories = row.iloc[1:]\n",
" row_categories_sorted = row_categories.sort_values(ascending=False)\n",
" \n",
" return row_categories_sorted.index.values[0:num_top_venues]"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Neighbourhood | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Agincourt | \n",
" Lounge | \n",
" Latin American Restaurant | \n",
" Skating Rink | \n",
" Breakfast Spot | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
"
\n",
" \n",
" 1 | \n",
" Alderwood, Long Branch | \n",
" Pizza Place | \n",
" Pharmacy | \n",
" Sandwich Place | \n",
" Dance Studio | \n",
" Coffee Shop | \n",
" Pub | \n",
" Gym | \n",
" Airport Terminal | \n",
" Falafel Restaurant | \n",
" Event Space | \n",
"
\n",
" \n",
" 2 | \n",
" Bathurst Manor, Wilson Heights, Downsview North | \n",
" Coffee Shop | \n",
" Bank | \n",
" Frozen Yogurt Shop | \n",
" Bridal Shop | \n",
" Sandwich Place | \n",
" Diner | \n",
" Restaurant | \n",
" Deli / Bodega | \n",
" Middle Eastern Restaurant | \n",
" Supermarket | \n",
"
\n",
" \n",
" 3 | \n",
" Bayview Village | \n",
" Café | \n",
" Bank | \n",
" Chinese Restaurant | \n",
" Japanese Restaurant | \n",
" Yoga Studio | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
"
\n",
" \n",
" 4 | \n",
" Bedford Park, Lawrence Manor East | \n",
" Sandwich Place | \n",
" Restaurant | \n",
" Italian Restaurant | \n",
" Coffee Shop | \n",
" Liquor Store | \n",
" Thai Restaurant | \n",
" Café | \n",
" Pub | \n",
" Butcher | \n",
" Sushi Restaurant | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Neighbourhood 1st Most Common Venue \\\n",
"0 Agincourt Lounge \n",
"1 Alderwood, Long Branch Pizza Place \n",
"2 Bathurst Manor, Wilson Heights, Downsview North Coffee Shop \n",
"3 Bayview Village Café \n",
"4 Bedford Park, Lawrence Manor East Sandwich Place \n",
"\n",
" 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n",
"0 Latin American Restaurant Skating Rink Breakfast Spot \n",
"1 Pharmacy Sandwich Place Dance Studio \n",
"2 Bank Frozen Yogurt Shop Bridal Shop \n",
"3 Bank Chinese Restaurant Japanese Restaurant \n",
"4 Restaurant Italian Restaurant Coffee Shop \n",
"\n",
" 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n",
"0 Donut Shop Diner Discount Store \n",
"1 Coffee Shop Pub Gym \n",
"2 Sandwich Place Diner Restaurant \n",
"3 Yoga Studio Diner Discount Store \n",
"4 Liquor Store Thai Restaurant Café \n",
"\n",
" 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n",
"0 Distribution Center Dog Run Doner Restaurant \n",
"1 Airport Terminal Falafel Restaurant Event Space \n",
"2 Deli / Bodega Middle Eastern Restaurant Supermarket \n",
"3 Distribution Center Dog Run Doner Restaurant \n",
"4 Pub Butcher Sushi Restaurant "
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"num_top_venues = 10\n",
"\n",
"indicators = ['st', 'nd', 'rd']\n",
"\n",
"# create columns according to number of top venues\n",
"columns = ['Neighbourhood']\n",
"for ind in np.arange(num_top_venues):\n",
" try:\n",
" columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))\n",
" except:\n",
" columns.append('{}th Most Common Venue'.format(ind+1))\n",
"\n",
"# create a new dataframe\n",
"neighborhoods_venues_sorted = pd.DataFrame(columns=columns)\n",
"neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']\n",
"\n",
"for ind in np.arange(toronto_grouped.shape[0]):\n",
" neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)\n",
"\n",
"neighborhoods_venues_sorted.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Clustering Neighborhoods"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# import k-means from clustering stage\n",
"from sklearn.cluster import KMeans\n",
"\n",
"# set number of clusters\n",
"kclusters = 5\n",
"\n",
"toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)\n",
"\n",
"# run k-means clustering\n",
"kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)\n",
"\n",
"# check cluster labels generated for each row in the dataframe\n",
"kmeans.labels_[0:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Merge the dataframe with the top 10 and the cluster for each neighbourhood"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Borough | \n",
" Neighbourhood | \n",
" Latitude | \n",
" Longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" M3A | \n",
" North York | \n",
" Parkwoods | \n",
" 43.753259 | \n",
" -79.329656 | \n",
"
\n",
" \n",
" 1 | \n",
" M4A | \n",
" North York | \n",
" Victoria Village | \n",
" 43.725882 | \n",
" -79.315572 | \n",
"
\n",
" \n",
" 2 | \n",
" M5A | \n",
" Downtown Toronto | \n",
" Regent Park, Harbourfront | \n",
" 43.654260 | \n",
" -79.360636 | \n",
"
\n",
" \n",
" 3 | \n",
" M6A | \n",
" North York | \n",
" Lawrence Manor, Lawrence Heights | \n",
" 43.718518 | \n",
" -79.464763 | \n",
"
\n",
" \n",
" 4 | \n",
" M7A | \n",
" Downtown Toronto | \n",
" Queen's Park, Ontario Provincial Government | \n",
" 43.662301 | \n",
" -79.389494 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 98 | \n",
" M8X | \n",
" Etobicoke | \n",
" The Kingsway, Montgomery Road, Old Mill North | \n",
" 43.653654 | \n",
" -79.506944 | \n",
"
\n",
" \n",
" 99 | \n",
" M4Y | \n",
" Downtown Toronto | \n",
" Church and Wellesley | \n",
" 43.665860 | \n",
" -79.383160 | \n",
"
\n",
" \n",
" 100 | \n",
" M7Y | \n",
" East Toronto | \n",
" Business reply mail Processing Centre, South C... | \n",
" 43.662744 | \n",
" -79.321558 | \n",
"
\n",
" \n",
" 101 | \n",
" M8Y | \n",
" Etobicoke | \n",
" Old Mill South, King's Mill Park, Sunnylea, Hu... | \n",
" 43.636258 | \n",
" -79.498509 | \n",
"
\n",
" \n",
" 102 | \n",
" M8Z | \n",
" Etobicoke | \n",
" Mimico NW, The Queensway West, South of Bloor,... | \n",
" 43.628841 | \n",
" -79.520999 | \n",
"
\n",
" \n",
"
\n",
"
103 rows × 5 columns
\n",
"
"
],
"text/plain": [
" Postal Code Borough \\\n",
"0 M3A North York \n",
"1 M4A North York \n",
"2 M5A Downtown Toronto \n",
"3 M6A North York \n",
"4 M7A Downtown Toronto \n",
".. ... ... \n",
"98 M8X Etobicoke \n",
"99 M4Y Downtown Toronto \n",
"100 M7Y East Toronto \n",
"101 M8Y Etobicoke \n",
"102 M8Z Etobicoke \n",
"\n",
" Neighbourhood Latitude Longitude \n",
"0 Parkwoods 43.753259 -79.329656 \n",
"1 Victoria Village 43.725882 -79.315572 \n",
"2 Regent Park, Harbourfront 43.654260 -79.360636 \n",
"3 Lawrence Manor, Lawrence Heights 43.718518 -79.464763 \n",
"4 Queen's Park, Ontario Provincial Government 43.662301 -79.389494 \n",
".. ... ... ... \n",
"98 The Kingsway, Montgomery Road, Old Mill North 43.653654 -79.506944 \n",
"99 Church and Wellesley 43.665860 -79.383160 \n",
"100 Business reply mail Processing Centre, South C... 43.662744 -79.321558 \n",
"101 Old Mill South, King's Mill Park, Sunnylea, Hu... 43.636258 -79.498509 \n",
"102 Mimico NW, The Queensway West, South of Bloor,... 43.628841 -79.520999 \n",
"\n",
"[103 rows x 5 columns]"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Postal Code | \n",
" Borough | \n",
" Neighbourhood | \n",
" Latitude | \n",
" Longitude | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 5 | \n",
" M9A | \n",
" Etobicoke | \n",
" Islington Avenue, Humber Valley Village | \n",
" 43.667856 | \n",
" -79.532242 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 45 | \n",
" M2L | \n",
" North York | \n",
" York Mills, Silver Hills | \n",
" 43.757490 | \n",
" -79.374714 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 95 | \n",
" M1X | \n",
" Scarborough | \n",
" Upper Rouge | \n",
" 43.836125 | \n",
" -79.205636 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Postal Code Borough Neighbourhood \\\n",
"5 M9A Etobicoke Islington Avenue, Humber Valley Village \n",
"45 M2L North York York Mills, Silver Hills \n",
"95 M1X Scarborough Upper Rouge \n",
"\n",
" Latitude Longitude Cluster Labels 1st Most Common Venue \\\n",
"5 43.667856 -79.532242 NaN NaN \n",
"45 43.757490 -79.374714 NaN NaN \n",
"95 43.836125 -79.205636 NaN NaN \n",
"\n",
" 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n",
"5 NaN NaN NaN \n",
"45 NaN NaN NaN \n",
"95 NaN NaN NaN \n",
"\n",
" 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n",
"5 NaN NaN NaN \n",
"45 NaN NaN NaN \n",
"95 NaN NaN NaN \n",
"\n",
" 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n",
"5 NaN NaN NaN \n",
"45 NaN NaN NaN \n",
"95 NaN NaN NaN "
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged[toronto_merged['Cluster Labels'].isnull()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the clusters in the map"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Make this Notebook Trusted to load map: File -> Trust Notebook
"
],
"text/plain": [
""
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import matplotlib.cm as cm\n",
"import matplotlib.colors as colors\n",
"\n",
"# create map\n",
"map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)\n",
"\n",
"# set color scheme for the clusters\n",
"x = np.arange(kclusters)\n",
"ys = [i + x + (i*x)**2 for i in range(kclusters)]\n",
"colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))\n",
"rainbow = [colors.rgb2hex(i) for i in colors_array]\n",
"\n",
"toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])\n",
"\n",
"# add markers to the map\n",
"markers_colors = []\n",
"for lat, lon, poi, cluster in zip(toronto_merged_nonan['Latitude'], toronto_merged_nonan['Longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):\n",
" label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)\n",
" folium.CircleMarker(\n",
" [lat, lon],\n",
" radius=5,\n",
" popup=label,\n",
" color=rainbow[int(cluster-1)],\n",
" fill=True,\n",
" fill_color=rainbow[int(cluster-1)],\n",
" fill_opacity=0.7).add_to(map_clusters)\n",
" \n",
"map_clusters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cluster 1"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" North York | \n",
" 0.0 | \n",
" Park | \n",
" Food & Drink Shop | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Drugstore | \n",
"
\n",
" \n",
" 21 | \n",
" York | \n",
" 0.0 | \n",
" Park | \n",
" Women's Store | \n",
" Pool | \n",
" Yoga Studio | \n",
" Doner Restaurant | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
"
\n",
" \n",
" 35 | \n",
" East York | \n",
" 0.0 | \n",
" Park | \n",
" Convenience Store | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Dumpling Restaurant | \n",
"
\n",
" \n",
" 52 | \n",
" North York | \n",
" 0.0 | \n",
" Park | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Drugstore | \n",
"
\n",
" \n",
" 64 | \n",
" York | \n",
" 0.0 | \n",
" Park | \n",
" Convenience Store | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Dumpling Restaurant | \n",
"
\n",
" \n",
" 66 | \n",
" North York | \n",
" 0.0 | \n",
" Park | \n",
" Construction & Landscaping | \n",
" Convenience Store | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
"
\n",
" \n",
" 85 | \n",
" Scarborough | \n",
" 0.0 | \n",
" Park | \n",
" Playground | \n",
" Coffee Shop | \n",
" Yoga Studio | \n",
" Doner Restaurant | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
"
\n",
" \n",
" 91 | \n",
" Downtown Toronto | \n",
" 0.0 | \n",
" Park | \n",
" Trail | \n",
" Playground | \n",
" Yoga Studio | \n",
" Dessert Shop | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Borough Cluster Labels 1st Most Common Venue \\\n",
"0 North York 0.0 Park \n",
"21 York 0.0 Park \n",
"35 East York 0.0 Park \n",
"52 North York 0.0 Park \n",
"64 York 0.0 Park \n",
"66 North York 0.0 Park \n",
"85 Scarborough 0.0 Park \n",
"91 Downtown Toronto 0.0 Park \n",
"\n",
" 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n",
"0 Food & Drink Shop Yoga Studio Donut Shop \n",
"21 Women's Store Pool Yoga Studio \n",
"35 Convenience Store Yoga Studio Donut Shop \n",
"52 Yoga Studio Donut Shop Dim Sum Restaurant \n",
"64 Convenience Store Yoga Studio Donut Shop \n",
"66 Construction & Landscaping Convenience Store Yoga Studio \n",
"85 Playground Coffee Shop Yoga Studio \n",
"91 Trail Playground Yoga Studio \n",
"\n",
" 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n",
"0 Diner Discount Store Distribution Center \n",
"21 Doner Restaurant Dim Sum Restaurant Diner \n",
"35 Diner Discount Store Distribution Center \n",
"52 Diner Discount Store Distribution Center \n",
"64 Diner Discount Store Distribution Center \n",
"66 Donut Shop Diner Discount Store \n",
"85 Doner Restaurant Dim Sum Restaurant Diner \n",
"91 Dessert Shop Dim Sum Restaurant Diner \n",
"\n",
" 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n",
"0 Dog Run Doner Restaurant Drugstore \n",
"21 Discount Store Distribution Center Dog Run \n",
"35 Dog Run Doner Restaurant Dumpling Restaurant \n",
"52 Dog Run Doner Restaurant Drugstore \n",
"64 Dog Run Doner Restaurant Dumpling Restaurant \n",
"66 Distribution Center Dog Run Doner Restaurant \n",
"85 Discount Store Distribution Center Dog Run \n",
"91 Discount Store Distribution Center Dog Run "
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cluster 2"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" North York | \n",
" 1.0 | \n",
" Hockey Arena | \n",
" Pizza Place | \n",
" Coffee Shop | \n",
" Portuguese Restaurant | \n",
" Yoga Studio | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
"
\n",
" \n",
" 2 | \n",
" Downtown Toronto | \n",
" 1.0 | \n",
" Coffee Shop | \n",
" Bakery | \n",
" Pub | \n",
" Park | \n",
" Breakfast Spot | \n",
" Café | \n",
" Theater | \n",
" Yoga Studio | \n",
" Mexican Restaurant | \n",
" Shoe Store | \n",
"
\n",
" \n",
" 3 | \n",
" North York | \n",
" 1.0 | \n",
" Clothing Store | \n",
" Accessories Store | \n",
" Coffee Shop | \n",
" Boutique | \n",
" Miscellaneous Shop | \n",
" Event Space | \n",
" Furniture / Home Store | \n",
" Women's Store | \n",
" Vietnamese Restaurant | \n",
" Convenience Store | \n",
"
\n",
" \n",
" 4 | \n",
" Downtown Toronto | \n",
" 1.0 | \n",
" Coffee Shop | \n",
" Diner | \n",
" Yoga Studio | \n",
" Bar | \n",
" Beer Bar | \n",
" Smoothie Shop | \n",
" Sandwich Place | \n",
" Burrito Place | \n",
" Café | \n",
" Park | \n",
"
\n",
" \n",
" 6 | \n",
" Scarborough | \n",
" 1.0 | \n",
" Print Shop | \n",
" Fast Food Restaurant | \n",
" Yoga Studio | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Donut Shop | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 97 | \n",
" Downtown Toronto | \n",
" 1.0 | \n",
" Coffee Shop | \n",
" Café | \n",
" Hotel | \n",
" Restaurant | \n",
" Gym | \n",
" Japanese Restaurant | \n",
" American Restaurant | \n",
" Steakhouse | \n",
" Asian Restaurant | \n",
" Seafood Restaurant | \n",
"
\n",
" \n",
" 98 | \n",
" Etobicoke | \n",
" 1.0 | \n",
" River | \n",
" Doner Restaurant | \n",
" Dessert Shop | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Yoga Studio | \n",
" Department Store | \n",
"
\n",
" \n",
" 99 | \n",
" Downtown Toronto | \n",
" 1.0 | \n",
" Coffee Shop | \n",
" Sushi Restaurant | \n",
" Japanese Restaurant | \n",
" Gay Bar | \n",
" Restaurant | \n",
" Yoga Studio | \n",
" Bubble Tea Shop | \n",
" Dance Studio | \n",
" Mediterranean Restaurant | \n",
" Men's Store | \n",
"
\n",
" \n",
" 100 | \n",
" East Toronto | \n",
" 1.0 | \n",
" Light Rail Station | \n",
" Yoga Studio | \n",
" Auto Workshop | \n",
" Smoke Shop | \n",
" Brewery | \n",
" Spa | \n",
" Farmers Market | \n",
" Fast Food Restaurant | \n",
" Burrito Place | \n",
" Restaurant | \n",
"
\n",
" \n",
" 102 | \n",
" Etobicoke | \n",
" 1.0 | \n",
" Grocery Store | \n",
" Tanning Salon | \n",
" Convenience Store | \n",
" Discount Store | \n",
" Burrito Place | \n",
" Burger Joint | \n",
" Sandwich Place | \n",
" Kids Store | \n",
" Supplement Shop | \n",
" Bakery | \n",
"
\n",
" \n",
"
\n",
"
87 rows × 12 columns
\n",
"
"
],
"text/plain": [
" Borough Cluster Labels 1st Most Common Venue \\\n",
"1 North York 1.0 Hockey Arena \n",
"2 Downtown Toronto 1.0 Coffee Shop \n",
"3 North York 1.0 Clothing Store \n",
"4 Downtown Toronto 1.0 Coffee Shop \n",
"6 Scarborough 1.0 Print Shop \n",
".. ... ... ... \n",
"97 Downtown Toronto 1.0 Coffee Shop \n",
"98 Etobicoke 1.0 River \n",
"99 Downtown Toronto 1.0 Coffee Shop \n",
"100 East Toronto 1.0 Light Rail Station \n",
"102 Etobicoke 1.0 Grocery Store \n",
"\n",
" 2nd Most Common Venue 3rd Most Common Venue 4th Most Common Venue \\\n",
"1 Pizza Place Coffee Shop Portuguese Restaurant \n",
"2 Bakery Pub Park \n",
"3 Accessories Store Coffee Shop Boutique \n",
"4 Diner Yoga Studio Bar \n",
"6 Fast Food Restaurant Yoga Studio Dim Sum Restaurant \n",
".. ... ... ... \n",
"97 Café Hotel Restaurant \n",
"98 Doner Restaurant Dessert Shop Dim Sum Restaurant \n",
"99 Sushi Restaurant Japanese Restaurant Gay Bar \n",
"100 Yoga Studio Auto Workshop Smoke Shop \n",
"102 Tanning Salon Convenience Store Discount Store \n",
"\n",
" 5th Most Common Venue 6th Most Common Venue 7th Most Common Venue \\\n",
"1 Yoga Studio Dim Sum Restaurant Diner \n",
"2 Breakfast Spot Café Theater \n",
"3 Miscellaneous Shop Event Space Furniture / Home Store \n",
"4 Beer Bar Smoothie Shop Sandwich Place \n",
"6 Diner Discount Store Distribution Center \n",
".. ... ... ... \n",
"97 Gym Japanese Restaurant American Restaurant \n",
"98 Diner Discount Store Distribution Center \n",
"99 Restaurant Yoga Studio Bubble Tea Shop \n",
"100 Brewery Spa Farmers Market \n",
"102 Burrito Place Burger Joint Sandwich Place \n",
"\n",
" 8th Most Common Venue 9th Most Common Venue 10th Most Common Venue \n",
"1 Discount Store Distribution Center Dog Run \n",
"2 Yoga Studio Mexican Restaurant Shoe Store \n",
"3 Women's Store Vietnamese Restaurant Convenience Store \n",
"4 Burrito Place Café Park \n",
"6 Dog Run Doner Restaurant Donut Shop \n",
".. ... ... ... \n",
"97 Steakhouse Asian Restaurant Seafood Restaurant \n",
"98 Dog Run Yoga Studio Department Store \n",
"99 Dance Studio Mediterranean Restaurant Men's Store \n",
"100 Fast Food Restaurant Burrito Place Restaurant \n",
"102 Kids Store Supplement Shop Bakery \n",
"\n",
"[87 rows x 12 columns]"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cluster 3"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 32 | \n",
" Scarborough | \n",
" 2.0 | \n",
" Pizza Place | \n",
" Playground | \n",
" Doner Restaurant | \n",
" Dessert Shop | \n",
" Dim Sum Restaurant | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Donut Shop | \n",
"
\n",
" \n",
" 50 | \n",
" North York | \n",
" 2.0 | \n",
" Pizza Place | \n",
" Department Store | \n",
" Event Space | \n",
" Ethiopian Restaurant | \n",
" Electronics Store | \n",
" Eastern European Restaurant | \n",
" Dumpling Restaurant | \n",
" Drugstore | \n",
" Donut Shop | \n",
" Doner Restaurant | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n",
"32 Scarborough 2.0 Pizza Place Playground \n",
"50 North York 2.0 Pizza Place Department Store \n",
"\n",
" 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n",
"32 Doner Restaurant Dessert Shop Dim Sum Restaurant \n",
"50 Event Space Ethiopian Restaurant Electronics Store \n",
"\n",
" 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n",
"32 Diner Discount Store Distribution Center \n",
"50 Eastern European Restaurant Dumpling Restaurant Drugstore \n",
"\n",
" 9th Most Common Venue 10th Most Common Venue \n",
"32 Dog Run Donut Shop \n",
"50 Donut Shop Doner Restaurant "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cluster 4"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 57 | \n",
" North York | \n",
" 3.0 | \n",
" Fabric Shop | \n",
" Baseball Field | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Drugstore | \n",
"
\n",
" \n",
" 101 | \n",
" Etobicoke | \n",
" 3.0 | \n",
" Baseball Field | \n",
" Yoga Studio | \n",
" Donut Shop | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Drugstore | \n",
" Farmers Market | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n",
"57 North York 3.0 Fabric Shop Baseball Field \n",
"101 Etobicoke 3.0 Baseball Field Yoga Studio \n",
"\n",
" 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n",
"57 Yoga Studio Donut Shop Diner \n",
"101 Donut Shop Diner Discount Store \n",
"\n",
" 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n",
"57 Discount Store Distribution Center Dog Run \n",
"101 Distribution Center Dog Run Doner Restaurant \n",
"\n",
" 9th Most Common Venue 10th Most Common Venue \n",
"57 Doner Restaurant Drugstore \n",
"101 Drugstore Farmers Market "
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cluster 5"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Borough | \n",
" Cluster Labels | \n",
" 1st Most Common Venue | \n",
" 2nd Most Common Venue | \n",
" 3rd Most Common Venue | \n",
" 4th Most Common Venue | \n",
" 5th Most Common Venue | \n",
" 6th Most Common Venue | \n",
" 7th Most Common Venue | \n",
" 8th Most Common Venue | \n",
" 9th Most Common Venue | \n",
" 10th Most Common Venue | \n",
"
\n",
" \n",
" \n",
" \n",
" 11 | \n",
" Etobicoke | \n",
" 4.0 | \n",
" Jewelry Store | \n",
" Yoga Studio | \n",
" Diner | \n",
" Discount Store | \n",
" Distribution Center | \n",
" Dog Run | \n",
" Doner Restaurant | \n",
" Donut Shop | \n",
" Drugstore | \n",
" Farmers Market | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Borough Cluster Labels 1st Most Common Venue 2nd Most Common Venue \\\n",
"11 Etobicoke 4.0 Jewelry Store Yoga Studio \n",
"\n",
" 3rd Most Common Venue 4th Most Common Venue 5th Most Common Venue \\\n",
"11 Diner Discount Store Distribution Center \n",
"\n",
" 6th Most Common Venue 7th Most Common Venue 8th Most Common Venue \\\n",
"11 Dog Run Doner Restaurant Donut Shop \n",
"\n",
" 9th Most Common Venue 10th Most Common Venue \n",
"11 Drugstore Farmers Market "
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}