{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Meetup clustering sample.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"display_name":"Python [Root]","language":"python","name":"Python [Root]"}},"cells":[{"metadata":{"id":"yDJ_wyzrShx_","colab_type":"text"},"cell_type":"markdown","source":["# Designing ML - Week 4!\n","michelle.carney@berkeley.edu\n","\n","This is a jupyter notebook on clustering meetup.com data! In this notebook, I have used the location and group information to cluster the members into 6 clusters - but it is up to you to figure out what they mean!"]},{"metadata":{"id":"T2pfi6LPShyB","colab_type":"code","colab":{}},"cell_type":"code","source":["#these are all of the libraries i'll be using - and I load the groups.csv data\n","import pandas as pd\n","import numpy as np\n","import random\n","import sklearn\n","from sklearn.datasets import make_blobs\n","from sklearn.cluster import KMeans\n","from sklearn.metrics import silhouette_samples, silhouette_score\n","import matplotlib.pyplot as plt\n","import matplotlib.cm as cm\n","import numpy as np\n","import matplotlib\n","%matplotlib inline\n","matplotlib.style.use('ggplot')\n","df = pd.read_csv('groups.csv')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-FGh1jP-ShyI","colab_type":"text"},"cell_type":"markdown","source":["# Data Step\n","This step we're going to look at our dataframes and become familiar with what's in them.\n","This data was collected via meetup.com API in Dec 2017"]},{"metadata":{"id":"wdlubD0_ShyJ","colab_type":"code","colab":{},"outputId":"3a7a217f-15d9-466c-f56c-bc70b078cffa"},"cell_type":"code","source":["#this is what the groups.csv looks like as a dataframe - it is about the groups\n","df.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
group_idcategory_idcategory.namecategory.shortnamecity_idcitycountrycreateddescriptiongroup_photo.base_url...organizer.photo.photo_linkorganizer.photo.thumb_linkorganizer.photo.typeratingstatetimezoneurlnameutc_offsetvisibilitywho
0638814health/wellbeinghealth-wellbeing10001New YorkUS2002-11-21 16:50:46Those who practice or hold a strong interest i...https://secure.meetupstatic.com...https://secure.meetupstatic.com/photos/member/...https://secure.meetupstatic.com/photos/member/...member4.39NYUS/Easternalternative-health-nyc-14400publicExplorers of Health
165104community/environmentcommunity-environment10001New YorkUS2003-05-20 14:48:54The New York Alternative Energy Meetupis for t...https://secure.meetupstatic.com...https://secure.meetupstatic.com/photos/member/...https://secure.meetupstatic.com/photos/member/...member4.31NYUS/Easternalternative-energy-meetup-14400publicClean Energy Supporters
2845826pets/animalspets-animals10001New YorkUS2004-03-27 09:55:41not_foundhttps://secure.meetupstatic.com...https://secure.meetupstatic.com/photos/member/...https://secure.meetupstatic.com/photos/member/...member4.84NYUS/EasternAnimals-14400publicAnimal Voices
3894029sci-fi/fantasysci-fi-fantasy10001New YorkUS2002-11-16 04:49:16Welcome to the The New York City Anime Meetup ...https://secure.meetupstatic.com...https://secure.meetupstatic.com/photos/member/...https://secure.meetupstatic.com/photos/member/...member4.46NYUS/EasternNYC-Anime-14400publicAnime Fans
41010426pets/animalspets-animals10001New YorkUS2003-10-22 21:39:49We welcome those who support pits, even if you...https://secure.meetupstatic.com...https://secure.meetupstatic.com/photos/member/...https://secure.meetupstatic.com/photos/member/...member4.09NYUS/EasternNYC-Pitbull-14400public_limitedNYC Pits & People, Dog Lovers
\n","

5 rows × 36 columns

\n","
"],"text/plain":[" group_id category_id category.name category.shortname \\\n","0 6388 14 health/wellbeing health-wellbeing \n","1 6510 4 community/environment community-environment \n","2 8458 26 pets/animals pets-animals \n","3 8940 29 sci-fi/fantasy sci-fi-fantasy \n","4 10104 26 pets/animals pets-animals \n","\n"," city_id city country created \\\n","0 10001 New York US 2002-11-21 16:50:46 \n","1 10001 New York US 2003-05-20 14:48:54 \n","2 10001 New York US 2004-03-27 09:55:41 \n","3 10001 New York US 2002-11-16 04:49:16 \n","4 10001 New York US 2003-10-22 21:39:49 \n","\n"," description \\\n","0 Those who practice or hold a strong interest i... \n","1 The New York Alternative Energy Meetupis for t... \n","2 not_found \n","3 Welcome to the The New York City Anime Meetup ... \n","4 We welcome those who support pits, even if you... \n","\n"," group_photo.base_url ... \\\n","0 https://secure.meetupstatic.com ... \n","1 https://secure.meetupstatic.com ... \n","2 https://secure.meetupstatic.com ... \n","3 https://secure.meetupstatic.com ... \n","4 https://secure.meetupstatic.com ... \n","\n"," organizer.photo.photo_link \\\n","0 https://secure.meetupstatic.com/photos/member/... \n","1 https://secure.meetupstatic.com/photos/member/... \n","2 https://secure.meetupstatic.com/photos/member/... \n","3 https://secure.meetupstatic.com/photos/member/... \n","4 https://secure.meetupstatic.com/photos/member/... \n","\n"," organizer.photo.thumb_link organizer.photo.type \\\n","0 https://secure.meetupstatic.com/photos/member/... member \n","1 https://secure.meetupstatic.com/photos/member/... member \n","2 https://secure.meetupstatic.com/photos/member/... member \n","3 https://secure.meetupstatic.com/photos/member/... member \n","4 https://secure.meetupstatic.com/photos/member/... member \n","\n"," rating state timezone urlname utc_offset \\\n","0 4.39 NY US/Eastern alternative-health-nyc -14400 \n","1 4.31 NY US/Eastern alternative-energy-meetup -14400 \n","2 4.84 NY US/Eastern Animals -14400 \n","3 4.46 NY US/Eastern NYC-Anime -14400 \n","4 4.09 NY US/Eastern NYC-Pitbull -14400 \n","\n"," visibility who \n","0 public Explorers of Health \n","1 public Clean Energy Supporters \n","2 public Animal Voices \n","3 public Anime Fans \n","4 public_limited NYC Pits & People, Dog Lovers \n","\n","[5 rows x 36 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"metadata":{"id":"YmobRscRShyQ","colab_type":"code","colab":{}},"cell_type":"code","source":["#this is the df about the members\n","df2 = pd.read_csv('members.csv', encoding = \"ISO-8859-1\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MQaxs9PfShyS","colab_type":"code","colab":{},"outputId":"0a641218-58d3-4620-b62c-1ed9a6509f55"},"cell_type":"code","source":["df2.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
member_idbiocitycountryhometownjoinedlatlinklonmember_namestatemember_statusvisitedgroup_id
03not_foundNew YorkusNew York, NY2007-05-01 22:04:3740.72http://www.meetup.com/members/3-74.0Matt MeekerNYactive2009-09-18 18:32:23490552
13not_foundNew YorkusNew York, NY2011-01-23 14:13:1740.72http://www.meetup.com/members/3-74.0Matt MeekerNYactive2011-03-20 01:02:111474611
23Hi, I'm Matt. I'm an entrepreneur who has star...New YorkusNew York, NY2010-12-30 18:47:3440.72http://www.meetup.com/members/3-74.0Matt MeekerNYactive2011-01-18 20:37:231490492
33Hi, I'm Matt. I'm an entrepreneur who has star...New YorkusNew York, NY2011-01-03 14:45:2140.72http://www.meetup.com/members/3-74.0Matt MeekerNYactive2011-07-23 03:42:281515830
43Hi, I'm Matt. I'm an entrepreneur who has star...New YorkusNew York, NY2010-12-30 18:34:5040.72http://www.meetup.com/members/3-74.0Matt MeekerNYactive2011-06-13 18:33:231574965
\n","
"],"text/plain":[" member_id bio city \\\n","0 3 not_found New York \n","1 3 not_found New York \n","2 3 Hi, I'm Matt. I'm an entrepreneur who has star... New York \n","3 3 Hi, I'm Matt. I'm an entrepreneur who has star... New York \n","4 3 Hi, I'm Matt. I'm an entrepreneur who has star... New York \n","\n"," country hometown joined lat \\\n","0 us New York, NY 2007-05-01 22:04:37 40.72 \n","1 us New York, NY 2011-01-23 14:13:17 40.72 \n","2 us New York, NY 2010-12-30 18:47:34 40.72 \n","3 us New York, NY 2011-01-03 14:45:21 40.72 \n","4 us New York, NY 2010-12-30 18:34:50 40.72 \n","\n"," link lon member_name state member_status \\\n","0 http://www.meetup.com/members/3 -74.0 Matt Meeker NY active \n","1 http://www.meetup.com/members/3 -74.0 Matt Meeker NY active \n","2 http://www.meetup.com/members/3 -74.0 Matt Meeker NY active \n","3 http://www.meetup.com/members/3 -74.0 Matt Meeker NY active \n","4 http://www.meetup.com/members/3 -74.0 Matt Meeker NY active \n","\n"," visited group_id \n","0 2009-09-18 18:32:23 490552 \n","1 2011-03-20 01:02:11 1474611 \n","2 2011-01-18 20:37:23 1490492 \n","3 2011-07-23 03:42:28 1515830 \n","4 2011-06-13 18:33:23 1574965 "]},"metadata":{"tags":[]},"execution_count":4}]},{"metadata":{"id":"X0X_dnbUShyW","colab_type":"text"},"cell_type":"markdown","source":["## Data Cleaning Phase\n","In this phase, we'll sample a % of the data, and then use \"one hot\" encoding to turn string features into numbers for our mathematical models! (read more here: http://www.insightsbot.com/blog/zuyVu/python-one-hot-encoding-with-pandas-made-simple)"]},{"metadata":{"id":"dzpLJcnXShyW","colab_type":"code","colab":{}},"cell_type":"code","source":["# I am sampling to 50,000 random samples from the dataframe since it is just SOOO big! This will help my code run faster\n","df2_sample = df2.sample(n=50000)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"0FlDG5pYShyY","colab_type":"code","colab":{},"outputId":"01b5a776-d74e-4c6d-927b-ae2e41237c97"},"cell_type":"code","source":["#let's explore the df by member id and the first record number of each - what are the features we want to use?\n","df2_sample.groupby(['member_id']).first()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
biocitycountryhometownjoinedlatlinklonmember_namestatemember_statusvisitedgroup_id
member_id
3not_foundNew YorkusNew York, NY2007-05-01 22:04:3740.72http://www.meetup.com/members/3-74.00Matt MeekerNYactive2009-09-18 18:32:23490552
6Community organizerNew Yorkusnot_found2013-09-11 00:42:0640.73http://www.meetup.com/members/6-74.00Scott HeifermanNYactive2014-09-20 12:28:38113455
36not_foundNew YorkusNew York2010-07-27 18:44:2440.80http://www.meetup.com/members/36-73.97Mark HurstNYactive2013-06-26 13:31:37703741
65I work on Go at Google.San FranciscousPortland2012-03-20 05:29:1037.74http://www.meetup.com/members/65-122.44Brad FitzpatrickCAactive2017-06-03 06:22:282701562
82I write code for a living and occasionally dab...San FranciscousNY / SF2014-05-16 23:24:4437.78http://www.meetup.com/members/82-122.42Maggie NelsonCAactive2014-05-16 23:24:441811614
117not_foundNew Yorkusnot_found2002-06-16 17:10:3240.75http://www.meetup.com/members/117-73.99DaveVockellNYactive2004-08-05 02:48:10131291
150not_foundNew YorkusNew York2008-05-12 20:29:1240.71http://www.meetup.com/members/150-74.02Rex SorgatzNYactive2017-01-06 21:16:26272793
176not_foundSan Franciscousnot_found2006-04-01 20:36:2737.79http://www.meetup.com/members/176-122.40Cal HendersonCAactive2014-05-12 21:18:22120903
210Chris Kramer - I've been running affiliate pro...New Yorkusnot_found2008-03-12 00:11:2740.74http://www.meetup.com/members/210-74.00chris kramerNYactive2013-09-28 11:38:07255307
227not_foundSan Franciscousnot_found2003-07-10 22:08:4737.79http://www.meetup.com/members/227-122.41Patrick BreitenbachCAactive2011-07-26 21:34:3354659
335not_foundSan FranciscousGrass Valley, CA2013-04-22 19:02:3637.78http://www.meetup.com/members/335-122.42BarakCAactive2016-07-10 17:30:51107592
428not_foundSan FranciscousSan Francisco2012-01-16 00:15:2037.72http://www.meetup.com/members/428-122.44David PippengerCAactive2012-01-16 00:15:201060260
819not_foundChicagousChicago2015-03-19 04:00:4242.01http://www.meetup.com/members/819-87.74JamesILactive2015-03-19 04:00:42514628
848not_foundNew Yorkusnot_found2015-03-25 21:19:2940.75http://www.meetup.com/members/848-73.99alex chanNYactive2016-06-02 14:00:1187095
883not_foundSan FranciscousBoston2014-09-18 21:43:4437.80http://www.meetup.com/members/883-122.44Todd AgulnickCAactive2014-10-10 19:42:1417009192
887I'm a software developer consultant with Thoug...New YorkusDallas2012-10-30 17:43:5940.76http://www.meetup.com/members/887-73.97KrisNYactive2013-02-07 00:37:161777521
1230Hi, I'm Ryan... I like hacking hardware, 3d pr...San FranciscousSan Francisco2013-12-15 19:59:5937.78http://www.meetup.com/members/1230-122.46ryan nelsonCAactive2014-03-04 22:28:341240980
1502not_foundNew YorkusNew York2009-10-08 06:31:3140.72http://www.meetup.com/members/1502-73.98AnilNYactive2009-10-08 06:31:311282709
1581not_foundSan Franciscousnot_found2012-08-06 18:52:5937.78http://www.meetup.com/members/1581-122.44Mark BallewCAactive2017-05-23 05:21:551788730
1945not_foundChicagousChicago2005-06-30 13:15:4641.94http://www.meetup.com/members/1945-87.65BenILactive2008-04-14 21:52:45107575
2629Co-founder of awe.sm. I do databases and anyth...San Franciscousnot_found2012-03-15 23:01:2237.75http://www.meetup.com/members/2629-122.42SeldoCAactive2017-09-28 20:38:26107604
2889I've done a little bit of everything in the di...New Yorkusnot_found2010-08-26 00:25:2240.74http://www.meetup.com/members/2889-73.99LisaNYactive2010-09-08 18:42:581642043
3045Hello there. I'm an entrepreneur and developer...San FranciscousUSA2011-03-30 00:39:3637.79http://www.meetup.com/members/3045-122.41Neil MansillaCAactive2013-05-28 21:15:1754659
3402not_foundNew YorkusNYC2016-06-10 20:00:5140.79http://www.meetup.com/members/3402-73.95Chaz AntonelliNYactive2017-07-15 14:56:01107592
3588not_foundSan Franciscousnot_found2015-03-16 18:14:2837.78http://www.meetup.com/members/3588-122.42David GustafsonCAactive2015-10-03 20:59:39230033
3705not_foundChicagousnot_found2011-06-06 15:54:0841.97http://www.meetup.com/members/3705-87.70Jeremy McMillanILactive2015-02-15 16:08:22192016
3735Frequent commuter and occasional weekend riderSan Franciscousnot_found2013-05-05 02:06:4237.79http://www.meetup.com/members/3735-122.40ChrisCAactive2013-09-29 17:55:27618694
3811not_foundSan Franciscousnot_found2016-01-08 19:32:3737.79http://www.meetup.com/members/3811-122.40David BarrCAactive2016-01-26 19:35:0119253477
3944not_foundSan Franciscousnot_found2007-06-25 09:08:3837.77http://www.meetup.com/members/3944-122.44Liz DizonCAactive2016-05-09 20:51:30228852
3999Rubyist at Goldbely.comNew YorkusNew York2011-01-19 18:13:5740.75http://www.meetup.com/members/3999-73.99Trevor StowNYactive2017-08-14 15:32:031768544
..........................................
240816906not_foundSan Franciscousnot_found2017-11-09 06:07:0937.78http://www.meetup.com/members/240816906-122.42Christi SpannCAactive2017-11-09 06:07:0925137308
240817162not_foundNew Yorkusnot_found2017-11-09 06:13:5940.75http://www.meetup.com/members/240817162-73.99GOKTUG KASALNYactive2017-11-09 06:13:5918899254
240817878not_foundSan Franciscousnot_found2017-11-09 06:32:1837.77http://www.meetup.com/members/240817878-122.40Tanay RashinkarCAactive2017-11-09 06:32:1818825676
240818081not_foundNew Yorkusnot_found2017-11-09 06:38:5640.75http://www.meetup.com/members/240818081-73.99Kevin WrightNYactive2017-11-09 06:38:5620197789
240819321not_foundNew Yorkusnot_found2017-11-09 07:08:1140.75http://www.meetup.com/members/240819321-73.99Jungyoon KimNYactive2017-11-09 07:08:1119435902
240820767not_foundNew Yorkusnot_found2017-11-09 07:47:0440.72http://www.meetup.com/members/240820767-73.98Dannah GottliebNYactive2017-11-09 07:47:0424834040
240823125not_foundNew Yorkusnot_found2017-11-09 08:42:4040.75http://www.meetup.com/members/240823125-73.99Christine PandjaitanNYactive2017-11-09 08:42:4023695230
240830560Parts Sales Rep @ Standard Equipment Company &...Chicagousnot_found2017-11-09 11:46:5641.94http://www.meetup.com/members/240830560-87.75Mike KowalczykILactive2017-11-09 11:46:5624317440
240830739not_foundChicagousnot_found2017-11-09 13:12:5441.70http://www.meetup.com/members/240830739-87.66Jalen OnoratiILactive2017-11-09 13:12:547508692
240833111not_foundChicagousnot_found2017-11-09 12:44:0641.92http://www.meetup.com/members/240833111-87.65Leaquat Hassan JunuILactive2017-11-09 12:44:0624252421
240833981not_foundNew Yorkusnot_found2017-11-09 13:04:2640.75http://www.meetup.com/members/240833981-73.99Daniel ValcourtNYactive2017-11-09 13:04:2625484015
240834173not_foundNew Yorkusnot_found2017-11-09 13:04:2240.75http://www.meetup.com/members/240834173-73.98FelixNYactive2017-11-09 13:04:2221016346
240835211not_foundNew Yorkusnot_found2017-11-09 13:25:4940.75http://www.meetup.com/members/240835211-73.99Vijay ShingalaNYactive2017-11-09 13:25:4926327411
240837395not_foundNew Yorkusnot_found2017-11-09 14:09:4740.75http://www.meetup.com/members/240837395-73.99Brooke NoellNYactive2017-11-09 14:09:4720343769
240837474not_foundNew Yorkusnot_found2017-11-09 14:15:4940.75http://www.meetup.com/members/240837474-73.99Joseph CahillNYactive2017-11-09 14:15:4923412860
240838597not_foundNew Yorkusnot_found2017-11-09 14:43:0740.75http://www.meetup.com/members/240838597-73.98Kary HerreraNYactive2017-11-09 14:43:0724834040
240838614not_foundNew Yorkusnot_found2017-11-09 14:33:2040.74http://www.meetup.com/members/240838614-73.99AndrianaNYactive2017-11-09 14:33:20860035
240840567not_foundNew Yorkusnot_found2017-11-09 15:03:1940.75http://www.meetup.com/members/240840567-73.99Aileen ZNYactive2017-11-09 15:03:1926327411
240840580not_foundNew Yorkusnot_found2017-11-09 15:13:3940.75http://www.meetup.com/members/240840580-73.99Stefaniya LexandrovnaNYactive2017-11-09 15:13:3920648888
240841318not_foundChicagousnot_found2017-11-09 15:18:1841.88http://www.meetup.com/members/240841318-87.62Altan ErdemirILactive2017-11-09 15:18:1826071452
240841346not_foundNew Yorkusnot_found2017-11-09 15:18:0440.75http://www.meetup.com/members/240841346-73.99Missy SmithNYactive2017-11-09 15:18:0425815190
240841863not_foundNew Yorkusnot_found2017-11-09 15:32:3740.75http://www.meetup.com/members/240841863-73.99Tara M.NYactive2017-11-09 15:32:3725815190
240842594not_foundNew Yorkusnot_found2017-11-09 15:38:5340.75http://www.meetup.com/members/240842594-73.99Jade WangNYactive2017-11-09 15:38:5320167049
240842680not_foundSan Franciscousnot_found2017-11-09 15:55:2437.77http://www.meetup.com/members/240842680-122.41Liviu-Marian NegrilaCAactive2017-11-09 15:55:2420234705
240842986not_foundChicagousnot_found2017-11-09 15:48:4741.94http://www.meetup.com/members/240842986-87.65Trisha OrozcoILactive2017-11-09 15:48:4723270826
240845614not_foundNew Yorkusnot_found2017-11-09 16:39:4340.84http://www.meetup.com/members/240845614-73.94PriyaNYactive2017-11-09 16:39:4323738973
240845866not_foundNew Yorkusnot_found2017-11-09 16:40:4240.75http://www.meetup.com/members/240845866-73.99Eric SeamanNYactive2017-11-09 16:40:4225783205
240846998not_foundNew Yorkusnot_found2017-11-09 16:51:5340.75http://www.meetup.com/members/240846998-73.99Janeille PitaNYactive2017-11-09 16:51:5320979932
240849026not_foundNew Yorkusnot_found2017-11-09 17:24:1440.81http://www.meetup.com/members/240849026-73.95HU YangNYactive2017-11-09 17:24:1426298738
240852081not_foundNew Yorkusnot_found2017-11-09 18:18:0540.71http://www.meetup.com/members/240852081-74.00James WeitzNYactive2017-11-09 18:18:0526226036
\n","

1087923 rows × 13 columns

\n","
"],"text/plain":[" bio city \\\n","member_id \n","3 not_found New York \n","6 Community organizer New York \n","36 not_found New York \n","65 I work on Go at Google. San Francisco \n","82 I write code for a living and occasionally dab... San Francisco \n","117 not_found New York \n","150 not_found New York \n","176 not_found San Francisco \n","210 Chris Kramer - I've been running affiliate pro... New York \n","227 not_found San Francisco \n","335 not_found San Francisco \n","428 not_found San Francisco \n","819 not_found Chicago \n","848 not_found New York \n","883 not_found San Francisco \n","887 I'm a software developer consultant with Thoug... New York \n","1230 Hi, I'm Ryan... I like hacking hardware, 3d pr... San Francisco \n","1502 not_found New York \n","1581 not_found San Francisco \n","1945 not_found Chicago \n","2629 Co-founder of awe.sm. I do databases and anyth... San Francisco \n","2889 I've done a little bit of everything in the di... New York \n","3045 Hello there. I'm an entrepreneur and developer... San Francisco \n","3402 not_found New York \n","3588 not_found San Francisco \n","3705 not_found Chicago \n","3735 Frequent commuter and occasional weekend rider San Francisco \n","3811 not_found San Francisco \n","3944 not_found San Francisco \n","3999 Rubyist at Goldbely.com New York \n","... ... ... \n","240816906 not_found San Francisco \n","240817162 not_found New York \n","240817878 not_found San Francisco \n","240818081 not_found New York \n","240819321 not_found New York \n","240820767 not_found New York \n","240823125 not_found New York \n","240830560 Parts Sales Rep @ Standard Equipment Company &... Chicago \n","240830739 not_found Chicago \n","240833111 not_found Chicago \n","240833981 not_found New York \n","240834173 not_found New York \n","240835211 not_found New York \n","240837395 not_found New York \n","240837474 not_found New York \n","240838597 not_found New York \n","240838614 not_found New York \n","240840567 not_found New York \n","240840580 not_found New York \n","240841318 not_found Chicago \n","240841346 not_found New York \n","240841863 not_found New York \n","240842594 not_found New York \n","240842680 not_found San Francisco \n","240842986 not_found Chicago \n","240845614 not_found New York \n","240845866 not_found New York \n","240846998 not_found New York \n","240849026 not_found New York \n","240852081 not_found New York \n","\n"," country hometown joined lat \\\n","member_id \n","3 us New York, NY 2007-05-01 22:04:37 40.72 \n","6 us not_found 2013-09-11 00:42:06 40.73 \n","36 us New York 2010-07-27 18:44:24 40.80 \n","65 us Portland 2012-03-20 05:29:10 37.74 \n","82 us NY / SF 2014-05-16 23:24:44 37.78 \n","117 us not_found 2002-06-16 17:10:32 40.75 \n","150 us New York 2008-05-12 20:29:12 40.71 \n","176 us not_found 2006-04-01 20:36:27 37.79 \n","210 us not_found 2008-03-12 00:11:27 40.74 \n","227 us not_found 2003-07-10 22:08:47 37.79 \n","335 us Grass Valley, CA 2013-04-22 19:02:36 37.78 \n","428 us San Francisco 2012-01-16 00:15:20 37.72 \n","819 us Chicago 2015-03-19 04:00:42 42.01 \n","848 us not_found 2015-03-25 21:19:29 40.75 \n","883 us Boston 2014-09-18 21:43:44 37.80 \n","887 us Dallas 2012-10-30 17:43:59 40.76 \n","1230 us San Francisco 2013-12-15 19:59:59 37.78 \n","1502 us New York 2009-10-08 06:31:31 40.72 \n","1581 us not_found 2012-08-06 18:52:59 37.78 \n","1945 us Chicago 2005-06-30 13:15:46 41.94 \n","2629 us not_found 2012-03-15 23:01:22 37.75 \n","2889 us not_found 2010-08-26 00:25:22 40.74 \n","3045 us USA 2011-03-30 00:39:36 37.79 \n","3402 us NYC 2016-06-10 20:00:51 40.79 \n","3588 us not_found 2015-03-16 18:14:28 37.78 \n","3705 us not_found 2011-06-06 15:54:08 41.97 \n","3735 us not_found 2013-05-05 02:06:42 37.79 \n","3811 us not_found 2016-01-08 19:32:37 37.79 \n","3944 us not_found 2007-06-25 09:08:38 37.77 \n","3999 us New York 2011-01-19 18:13:57 40.75 \n","... ... ... ... ... \n","240816906 us not_found 2017-11-09 06:07:09 37.78 \n","240817162 us not_found 2017-11-09 06:13:59 40.75 \n","240817878 us not_found 2017-11-09 06:32:18 37.77 \n","240818081 us not_found 2017-11-09 06:38:56 40.75 \n","240819321 us not_found 2017-11-09 07:08:11 40.75 \n","240820767 us not_found 2017-11-09 07:47:04 40.72 \n","240823125 us not_found 2017-11-09 08:42:40 40.75 \n","240830560 us not_found 2017-11-09 11:46:56 41.94 \n","240830739 us not_found 2017-11-09 13:12:54 41.70 \n","240833111 us not_found 2017-11-09 12:44:06 41.92 \n","240833981 us not_found 2017-11-09 13:04:26 40.75 \n","240834173 us not_found 2017-11-09 13:04:22 40.75 \n","240835211 us not_found 2017-11-09 13:25:49 40.75 \n","240837395 us not_found 2017-11-09 14:09:47 40.75 \n","240837474 us not_found 2017-11-09 14:15:49 40.75 \n","240838597 us not_found 2017-11-09 14:43:07 40.75 \n","240838614 us not_found 2017-11-09 14:33:20 40.74 \n","240840567 us not_found 2017-11-09 15:03:19 40.75 \n","240840580 us not_found 2017-11-09 15:13:39 40.75 \n","240841318 us not_found 2017-11-09 15:18:18 41.88 \n","240841346 us not_found 2017-11-09 15:18:04 40.75 \n","240841863 us not_found 2017-11-09 15:32:37 40.75 \n","240842594 us not_found 2017-11-09 15:38:53 40.75 \n","240842680 us not_found 2017-11-09 15:55:24 37.77 \n","240842986 us not_found 2017-11-09 15:48:47 41.94 \n","240845614 us not_found 2017-11-09 16:39:43 40.84 \n","240845866 us not_found 2017-11-09 16:40:42 40.75 \n","240846998 us not_found 2017-11-09 16:51:53 40.75 \n","240849026 us not_found 2017-11-09 17:24:14 40.81 \n","240852081 us not_found 2017-11-09 18:18:05 40.71 \n","\n"," link lon \\\n","member_id \n","3 http://www.meetup.com/members/3 -74.00 \n","6 http://www.meetup.com/members/6 -74.00 \n","36 http://www.meetup.com/members/36 -73.97 \n","65 http://www.meetup.com/members/65 -122.44 \n","82 http://www.meetup.com/members/82 -122.42 \n","117 http://www.meetup.com/members/117 -73.99 \n","150 http://www.meetup.com/members/150 -74.02 \n","176 http://www.meetup.com/members/176 -122.40 \n","210 http://www.meetup.com/members/210 -74.00 \n","227 http://www.meetup.com/members/227 -122.41 \n","335 http://www.meetup.com/members/335 -122.42 \n","428 http://www.meetup.com/members/428 -122.44 \n","819 http://www.meetup.com/members/819 -87.74 \n","848 http://www.meetup.com/members/848 -73.99 \n","883 http://www.meetup.com/members/883 -122.44 \n","887 http://www.meetup.com/members/887 -73.97 \n","1230 http://www.meetup.com/members/1230 -122.46 \n","1502 http://www.meetup.com/members/1502 -73.98 \n","1581 http://www.meetup.com/members/1581 -122.44 \n","1945 http://www.meetup.com/members/1945 -87.65 \n","2629 http://www.meetup.com/members/2629 -122.42 \n","2889 http://www.meetup.com/members/2889 -73.99 \n","3045 http://www.meetup.com/members/3045 -122.41 \n","3402 http://www.meetup.com/members/3402 -73.95 \n","3588 http://www.meetup.com/members/3588 -122.42 \n","3705 http://www.meetup.com/members/3705 -87.70 \n","3735 http://www.meetup.com/members/3735 -122.40 \n","3811 http://www.meetup.com/members/3811 -122.40 \n","3944 http://www.meetup.com/members/3944 -122.44 \n","3999 http://www.meetup.com/members/3999 -73.99 \n","... ... ... \n","240816906 http://www.meetup.com/members/240816906 -122.42 \n","240817162 http://www.meetup.com/members/240817162 -73.99 \n","240817878 http://www.meetup.com/members/240817878 -122.40 \n","240818081 http://www.meetup.com/members/240818081 -73.99 \n","240819321 http://www.meetup.com/members/240819321 -73.99 \n","240820767 http://www.meetup.com/members/240820767 -73.98 \n","240823125 http://www.meetup.com/members/240823125 -73.99 \n","240830560 http://www.meetup.com/members/240830560 -87.75 \n","240830739 http://www.meetup.com/members/240830739 -87.66 \n","240833111 http://www.meetup.com/members/240833111 -87.65 \n","240833981 http://www.meetup.com/members/240833981 -73.99 \n","240834173 http://www.meetup.com/members/240834173 -73.98 \n","240835211 http://www.meetup.com/members/240835211 -73.99 \n","240837395 http://www.meetup.com/members/240837395 -73.99 \n","240837474 http://www.meetup.com/members/240837474 -73.99 \n","240838597 http://www.meetup.com/members/240838597 -73.98 \n","240838614 http://www.meetup.com/members/240838614 -73.99 \n","240840567 http://www.meetup.com/members/240840567 -73.99 \n","240840580 http://www.meetup.com/members/240840580 -73.99 \n","240841318 http://www.meetup.com/members/240841318 -87.62 \n","240841346 http://www.meetup.com/members/240841346 -73.99 \n","240841863 http://www.meetup.com/members/240841863 -73.99 \n","240842594 http://www.meetup.com/members/240842594 -73.99 \n","240842680 http://www.meetup.com/members/240842680 -122.41 \n","240842986 http://www.meetup.com/members/240842986 -87.65 \n","240845614 http://www.meetup.com/members/240845614 -73.94 \n","240845866 http://www.meetup.com/members/240845866 -73.99 \n","240846998 http://www.meetup.com/members/240846998 -73.99 \n","240849026 http://www.meetup.com/members/240849026 -73.95 \n","240852081 http://www.meetup.com/members/240852081 -74.00 \n","\n"," member_name state member_status visited \\\n","member_id \n","3 Matt Meeker NY active 2009-09-18 18:32:23 \n","6 Scott Heiferman NY active 2014-09-20 12:28:38 \n","36 Mark Hurst NY active 2013-06-26 13:31:37 \n","65 Brad Fitzpatrick CA active 2017-06-03 06:22:28 \n","82 Maggie Nelson CA active 2014-05-16 23:24:44 \n","117 DaveVockell NY active 2004-08-05 02:48:10 \n","150 Rex Sorgatz NY active 2017-01-06 21:16:26 \n","176 Cal Henderson CA active 2014-05-12 21:18:22 \n","210 chris kramer NY active 2013-09-28 11:38:07 \n","227 Patrick Breitenbach CA active 2011-07-26 21:34:33 \n","335 Barak CA active 2016-07-10 17:30:51 \n","428 David Pippenger CA active 2012-01-16 00:15:20 \n","819 James IL active 2015-03-19 04:00:42 \n","848 alex chan NY active 2016-06-02 14:00:11 \n","883 Todd Agulnick CA active 2014-10-10 19:42:14 \n","887 Kris NY active 2013-02-07 00:37:16 \n","1230 ryan nelson CA active 2014-03-04 22:28:34 \n","1502 Anil NY active 2009-10-08 06:31:31 \n","1581 Mark Ballew CA active 2017-05-23 05:21:55 \n","1945 Ben IL active 2008-04-14 21:52:45 \n","2629 Seldo CA active 2017-09-28 20:38:26 \n","2889 Lisa NY active 2010-09-08 18:42:58 \n","3045 Neil Mansilla CA active 2013-05-28 21:15:17 \n","3402 Chaz Antonelli NY active 2017-07-15 14:56:01 \n","3588 David Gustafson CA active 2015-10-03 20:59:39 \n","3705 Jeremy McMillan IL active 2015-02-15 16:08:22 \n","3735 Chris CA active 2013-09-29 17:55:27 \n","3811 David Barr CA active 2016-01-26 19:35:01 \n","3944 Liz Dizon CA active 2016-05-09 20:51:30 \n","3999 Trevor Stow NY active 2017-08-14 15:32:03 \n","... ... ... ... ... \n","240816906 Christi Spann CA active 2017-11-09 06:07:09 \n","240817162 GOKTUG KASAL NY active 2017-11-09 06:13:59 \n","240817878 Tanay Rashinkar CA active 2017-11-09 06:32:18 \n","240818081 Kevin Wright NY active 2017-11-09 06:38:56 \n","240819321 Jungyoon Kim NY active 2017-11-09 07:08:11 \n","240820767 Dannah Gottlieb NY active 2017-11-09 07:47:04 \n","240823125 Christine Pandjaitan NY active 2017-11-09 08:42:40 \n","240830560 Mike Kowalczyk IL active 2017-11-09 11:46:56 \n","240830739 Jalen Onorati IL active 2017-11-09 13:12:54 \n","240833111 Leaquat Hassan Junu IL active 2017-11-09 12:44:06 \n","240833981 Daniel Valcourt NY active 2017-11-09 13:04:26 \n","240834173 Felix NY active 2017-11-09 13:04:22 \n","240835211 Vijay Shingala NY active 2017-11-09 13:25:49 \n","240837395 Brooke Noell NY active 2017-11-09 14:09:47 \n","240837474 Joseph Cahill NY active 2017-11-09 14:15:49 \n","240838597 Kary Herrera NY active 2017-11-09 14:43:07 \n","240838614 Andriana NY active 2017-11-09 14:33:20 \n","240840567 Aileen Z NY active 2017-11-09 15:03:19 \n","240840580 Stefaniya Lexandrovna NY active 2017-11-09 15:13:39 \n","240841318 Altan Erdemir IL active 2017-11-09 15:18:18 \n","240841346 Missy Smith NY active 2017-11-09 15:18:04 \n","240841863 Tara M. NY active 2017-11-09 15:32:37 \n","240842594 Jade Wang NY active 2017-11-09 15:38:53 \n","240842680 Liviu-Marian Negrila CA active 2017-11-09 15:55:24 \n","240842986 Trisha Orozco IL active 2017-11-09 15:48:47 \n","240845614 Priya NY active 2017-11-09 16:39:43 \n","240845866 Eric Seaman NY active 2017-11-09 16:40:42 \n","240846998 Janeille Pita NY active 2017-11-09 16:51:53 \n","240849026 HU Yang NY active 2017-11-09 17:24:14 \n","240852081 James Weitz NY active 2017-11-09 18:18:05 \n","\n"," group_id \n","member_id \n","3 490552 \n","6 113455 \n","36 703741 \n","65 2701562 \n","82 1811614 \n","117 131291 \n","150 272793 \n","176 120903 \n","210 255307 \n","227 54659 \n","335 107592 \n","428 1060260 \n","819 514628 \n","848 87095 \n","883 17009192 \n","887 1777521 \n","1230 1240980 \n","1502 1282709 \n","1581 1788730 \n","1945 107575 \n","2629 107604 \n","2889 1642043 \n","3045 54659 \n","3402 107592 \n","3588 230033 \n","3705 192016 \n","3735 618694 \n","3811 19253477 \n","3944 228852 \n","3999 1768544 \n","... ... \n","240816906 25137308 \n","240817162 18899254 \n","240817878 18825676 \n","240818081 20197789 \n","240819321 19435902 \n","240820767 24834040 \n","240823125 23695230 \n","240830560 24317440 \n","240830739 7508692 \n","240833111 24252421 \n","240833981 25484015 \n","240834173 21016346 \n","240835211 26327411 \n","240837395 20343769 \n","240837474 23412860 \n","240838597 24834040 \n","240838614 860035 \n","240840567 26327411 \n","240840580 20648888 \n","240841318 26071452 \n","240841346 25815190 \n","240841863 25815190 \n","240842594 20167049 \n","240842680 20234705 \n","240842986 23270826 \n","240845614 23738973 \n","240845866 25783205 \n","240846998 20979932 \n","240849026 26298738 \n","240852081 26226036 \n","\n","[1087923 rows x 13 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"metadata":{"id":"YBlnYiCiShya","colab_type":"code","colab":{}},"cell_type":"code","source":["#One feature I want to use is the GROUP ID - one thing that we can do is \"get dummies\" or \"one-hot encoding\" to \n","#turn string variables into numbers! look at it below\n","df2_sample_dummies = pd.get_dummies(df2_sample['group_id'], prefix = 'group_id')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ITFFW-1WShyb","colab_type":"code","colab":{}},"cell_type":"code","source":["# df2_sample_dummies_first = df2_sample_dummies.groupby(['member_id']).first()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"klPBHaYaShyc","colab_type":"code","colab":{},"outputId":"abcdb5bf-857c-45ba-f424-2ded26a8fc9c"},"cell_type":"code","source":["#this is what it looks like to have \"dummies\" or one-hot encoded variables! \n","#http://www.insightsbot.com/blog/zuyVu/python-one-hot-encoding-with-pandas-made-simple \n","df2_dummies.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
group_id_6388group_id_6510group_id_8458group_id_8940group_id_12542group_id_12907group_id_14573group_id_15324group_id_16620group_id_17921...group_id_26371769group_id_26372763group_id_26373602group_id_26374579group_id_26374655group_id_26375445group_id_26376543group_id_26377698group_id_26378067group_id_26378128
00000000000...0000000000
10000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000000
\n","

5 rows × 12546 columns

\n","
"],"text/plain":[" group_id_6388 group_id_6510 group_id_8458 group_id_8940 group_id_12542 \\\n","0 0 0 0 0 0 \n","1 0 0 0 0 0 \n","2 0 0 0 0 0 \n","3 0 0 0 0 0 \n","4 0 0 0 0 0 \n","\n"," group_id_12907 group_id_14573 group_id_15324 group_id_16620 \\\n","0 0 0 0 0 \n","1 0 0 0 0 \n","2 0 0 0 0 \n","3 0 0 0 0 \n","4 0 0 0 0 \n","\n"," group_id_17921 ... group_id_26371769 group_id_26372763 \\\n","0 0 ... 0 0 \n","1 0 ... 0 0 \n","2 0 ... 0 0 \n","3 0 ... 0 0 \n","4 0 ... 0 0 \n","\n"," group_id_26373602 group_id_26374579 group_id_26374655 group_id_26375445 \\\n","0 0 0 0 0 \n","1 0 0 0 0 \n","2 0 0 0 0 \n","3 0 0 0 0 \n","4 0 0 0 0 \n","\n"," group_id_26376543 group_id_26377698 group_id_26378067 group_id_26378128 \n","0 0 0 0 0 \n","1 0 0 0 0 \n","2 0 0 0 0 \n","3 0 0 0 0 \n","4 0 0 0 0 \n","\n","[5 rows x 12546 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"metadata":{"id":"KpdqG1nRShye","colab_type":"code","colab":{}},"cell_type":"code","source":["#Let's combine it back to our original dataframe \n","df2_sample_dummies_concat = pd.concat([df2_sample, df2_sample_dummies], axis=1)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_LkLYWMlShyf","colab_type":"code","colab":{},"outputId":"356c59a7-6fb3-47d1-b265-6c007f18770f"},"cell_type":"code","source":["df2_sample_dummies_concat.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
member_idbiocitycountryhometownjoinedlatlinklonmember_name...group_id_26344309group_id_26347789group_id_26350181group_id_26350972group_id_26352410group_id_26355546group_id_26361954group_id_26365189group_id_26371769group_id_26372763
5715312234880949not_foundSan Franciscousnot_found2017-08-29 18:43:2637.78http://www.meetup.com/members/234880949-122.42Justine Jennings...0000000000
493951831033not_foundSan Franciscousnot_found2011-04-29 05:36:1837.77http://www.meetup.com/members/1831033-122.40Ines Sombra...0000000000
117748815422371looking forward to playing more soccerChicagousChicago2013-02-01 05:41:4741.92http://www.meetup.com/members/15422371-87.70Enrique...0000000000
2243458101653742not_foundNew Yorkusnot_found2013-07-10 18:53:4540.76http://www.meetup.com/members/101653742-73.99M...0000000000
5738270235589417not_foundNew Yorkusnot_found2017-09-08 02:03:2240.80http://www.meetup.com/members/235589417-73.97Yuanyuan (Yoannie) Lei...0000000000
\n","

5 rows × 8200 columns

\n","
"],"text/plain":[" member_id bio city \\\n","5715312 234880949 not_found San Francisco \n","49395 1831033 not_found San Francisco \n","1177488 15422371 looking forward to playing more soccer Chicago \n","2243458 101653742 not_found New York \n","5738270 235589417 not_found New York \n","\n"," country hometown joined lat \\\n","5715312 us not_found 2017-08-29 18:43:26 37.78 \n","49395 us not_found 2011-04-29 05:36:18 37.77 \n","1177488 us Chicago 2013-02-01 05:41:47 41.92 \n","2243458 us not_found 2013-07-10 18:53:45 40.76 \n","5738270 us not_found 2017-09-08 02:03:22 40.80 \n","\n"," link lon \\\n","5715312 http://www.meetup.com/members/234880949 -122.42 \n","49395 http://www.meetup.com/members/1831033 -122.40 \n","1177488 http://www.meetup.com/members/15422371 -87.70 \n","2243458 http://www.meetup.com/members/101653742 -73.99 \n","5738270 http://www.meetup.com/members/235589417 -73.97 \n","\n"," member_name ... group_id_26344309 group_id_26347789 \\\n","5715312 Justine Jennings ... 0 0 \n","49395 Ines Sombra ... 0 0 \n","1177488 Enrique ... 0 0 \n","2243458 M ... 0 0 \n","5738270 Yuanyuan (Yoannie) Lei ... 0 0 \n","\n"," group_id_26350181 group_id_26350972 group_id_26352410 \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 0 \n","5738270 0 0 0 \n","\n"," group_id_26355546 group_id_26361954 group_id_26365189 \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 0 \n","5738270 0 0 0 \n","\n"," group_id_26371769 group_id_26372763 \n","5715312 0 0 \n","49395 0 0 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n","[5 rows x 8200 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"metadata":{"id":"f0lUks5DShyh","colab_type":"code","colab":{}},"cell_type":"code","source":["#let's repeat the same process for the \"cities\" feature\n","df2_sample_dummies_cities = pd.get_dummies(df2_sample_dummies_concat['city'], prefix = 'cities_')\n","df2_sample_dummies_concat_cities2 = pd.concat([df2_sample_dummies_concat, df2_sample_dummies_cities], axis=1)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"f96MSfpvShyj","colab_type":"code","colab":{},"outputId":"b67a56e8-3f33-4674-95a2-d02bc755dece"},"cell_type":"code","source":["df2_sample_dummies_concat_cities2.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
member_idbiocitycountryhometownjoinedlatlinklonmember_name...cities__Chicagocities__Chicago Heightscities__Chicago Ridgecities__East Chicagocities__New Yorkcities__North Chicagocities__San Franciscocities__South San Franciscocities__West Chicagocities__West New York
5715312234880949not_foundSan Franciscousnot_found2017-08-29 18:43:2637.78http://www.meetup.com/members/234880949-122.42Justine Jennings...0000001000
493951831033not_foundSan Franciscousnot_found2011-04-29 05:36:1837.77http://www.meetup.com/members/1831033-122.40Ines Sombra...0000001000
117748815422371looking forward to playing more soccerChicagousChicago2013-02-01 05:41:4741.92http://www.meetup.com/members/15422371-87.70Enrique...1000000000
2243458101653742not_foundNew Yorkusnot_found2013-07-10 18:53:4540.76http://www.meetup.com/members/101653742-73.99M...0000100000
5738270235589417not_foundNew Yorkusnot_found2017-09-08 02:03:2240.80http://www.meetup.com/members/235589417-73.97Yuanyuan (Yoannie) Lei...0000100000
\n","

5 rows × 8210 columns

\n","
"],"text/plain":[" member_id bio city \\\n","5715312 234880949 not_found San Francisco \n","49395 1831033 not_found San Francisco \n","1177488 15422371 looking forward to playing more soccer Chicago \n","2243458 101653742 not_found New York \n","5738270 235589417 not_found New York \n","\n"," country hometown joined lat \\\n","5715312 us not_found 2017-08-29 18:43:26 37.78 \n","49395 us not_found 2011-04-29 05:36:18 37.77 \n","1177488 us Chicago 2013-02-01 05:41:47 41.92 \n","2243458 us not_found 2013-07-10 18:53:45 40.76 \n","5738270 us not_found 2017-09-08 02:03:22 40.80 \n","\n"," link lon \\\n","5715312 http://www.meetup.com/members/234880949 -122.42 \n","49395 http://www.meetup.com/members/1831033 -122.40 \n","1177488 http://www.meetup.com/members/15422371 -87.70 \n","2243458 http://www.meetup.com/members/101653742 -73.99 \n","5738270 http://www.meetup.com/members/235589417 -73.97 \n","\n"," member_name ... cities__Chicago cities__Chicago Heights \\\n","5715312 Justine Jennings ... 0 0 \n","49395 Ines Sombra ... 0 0 \n","1177488 Enrique ... 1 0 \n","2243458 M ... 0 0 \n","5738270 Yuanyuan (Yoannie) Lei ... 0 0 \n","\n"," cities__Chicago Ridge cities__East Chicago cities__New York \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 1 \n","5738270 0 0 1 \n","\n"," cities__North Chicago cities__San Francisco \\\n","5715312 0 1 \n","49395 0 1 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__South San Francisco cities__West Chicago \\\n","5715312 0 0 \n","49395 0 0 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__West New York \n","5715312 0 \n","49395 0 \n","1177488 0 \n","2243458 0 \n","5738270 0 \n","\n","[5 rows x 8210 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"metadata":{"id":"BfC2_RYgShyk","colab_type":"code","colab":{}},"cell_type":"code","source":["#write it to a CSV before your kernel dies! this could be helpful if you want to use the same sample again in the future\n","# df2_sample_dummies_concat_cities2.to_csv('members2.csv')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Oz117YECShyl","colab_type":"code","colab":{}},"cell_type":"code","source":["#print(list(df2_sample_dummies_concat_cities2.columns.values))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"GOzttMhJShym","colab_type":"text"},"cell_type":"markdown","source":["## Training the model\n","I am deciding that we use groupIDs and Cities as our main features to train the model - and since we want the data itself to group and tell us what the major groups are - we are going to use a technique called \"clustering\" (or k-means clustering, where k= the number of clusters). I don't know in advance what the best number of clusters will be, so I will try a bunch of different k (k= 2,4,6,8) and then see how well our clusters are performing. See below :)"]},{"metadata":{"id":"Eaue1gYtShyn","colab_type":"code","colab":{},"outputId":"8bb074a1-ae57-46cf-c09a-ea454ce52f0b"},"cell_type":"code","source":["df2_sample_dummies_concat_cities2_train = df2_sample_dummies_concat_cities2.loc[:, 'group_id_6388':'cities__West New York']\n","df2_sample_dummies_concat_cities2_train.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
group_id_6388group_id_6510group_id_8458group_id_8940group_id_12542group_id_12907group_id_14573group_id_15324group_id_17921group_id_18843...cities__Chicagocities__Chicago Heightscities__Chicago Ridgecities__East Chicagocities__New Yorkcities__North Chicagocities__San Franciscocities__South San Franciscocities__West Chicagocities__West New York
57153120000000000...0000001000
493950000000000...0000001000
11774880000000000...1000000000
22434580000000000...0000100000
57382700000000000...0000100000
\n","

5 rows × 8196 columns

\n","
"],"text/plain":[" group_id_6388 group_id_6510 group_id_8458 group_id_8940 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_12542 group_id_12907 group_id_14573 group_id_15324 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_17921 group_id_18843 ... cities__Chicago \\\n","5715312 0 0 ... 0 \n","49395 0 0 ... 0 \n","1177488 0 0 ... 1 \n","2243458 0 0 ... 0 \n","5738270 0 0 ... 0 \n","\n"," cities__Chicago Heights cities__Chicago Ridge cities__East Chicago \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 0 \n","5738270 0 0 0 \n","\n"," cities__New York cities__North Chicago cities__San Francisco \\\n","5715312 0 0 1 \n","49395 0 0 1 \n","1177488 0 0 0 \n","2243458 1 0 0 \n","5738270 1 0 0 \n","\n"," cities__South San Francisco cities__West Chicago \\\n","5715312 0 0 \n","49395 0 0 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__West New York \n","5715312 0 \n","49395 0 \n","1177488 0 \n","2243458 0 \n","5738270 0 \n","\n","[5 rows x 8196 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"metadata":{"id":"1hDEqd7MShyo","colab_type":"code","colab":{},"outputId":"d15099ff-8537-42a0-fdb1-0ccff0e88268"},"cell_type":"code","source":["#k = 8 training model\n","km = KMeans(n_clusters=8)\n","%time km.fit(df2_sample_dummies_concat_cities2_train)\n","clusters = km.labels_.tolist()\n","silhouette_k8 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:50000], clusters[0:50000])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["CPU times: user 6min 12s, sys: 2min 54s, total: 9min 7s\n","Wall time: 10min 23s\n"],"name":"stdout"}]},{"metadata":{"collapsed":true,"id":"BZvrdfmJShyp","colab_type":"code","colab":{}},"cell_type":"code","source":["# km = KMeans(n_clusters=7)\n","# %time km.fit(df2_sample_dummies_concat_cities2_train)\n","# clusters = km.labels_.tolist()\n","# silhouette_k7 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:10000], clusters[0:10000])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"vxvSkXJ0Shyq","colab_type":"code","colab":{},"outputId":"bf0b317c-dfaa-452f-dbc6-d48556a0e828"},"cell_type":"code","source":["#k=6 training model\n","km = KMeans(n_clusters=6)\n","%time km.fit(df2_sample_dummies_concat_cities2_train)\n","clusters = km.labels_.tolist()\n","silhouette_k6 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:50000], clusters[0:50000])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["CPU times: user 5min 21s, sys: 2min 37s, total: 7min 59s\n","Wall time: 8min 54s\n"],"name":"stdout"}]},{"metadata":{"collapsed":true,"id":"1MTUbrFOShys","colab_type":"code","colab":{}},"cell_type":"code","source":["# km = KMeans(n_clusters=5)\n","# %time km.fit(df2_sample_dummies_concat_cities2_train)\n","# clusters = km.labels_.tolist()\n","# silhouette_k5 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:10000], clusters[0:10000])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"aaoO3OhQShyt","colab_type":"code","colab":{},"outputId":"989cab2e-6d57-4250-b2ae-799037ad409e"},"cell_type":"code","source":["# k = 4 training model\n","km = KMeans(n_clusters=4)\n","%time km.fit(df2_sample_dummies_concat_cities2_train)\n","clusters = km.labels_.tolist()\n","silhouette_k4 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:50000], clusters[0:50000])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["CPU times: user 4min 34s, sys: 2min 21s, total: 6min 55s\n","Wall time: 8min\n"],"name":"stdout"}]},{"metadata":{"collapsed":true,"id":"7tFlqpt0Shyu","colab_type":"code","colab":{}},"cell_type":"code","source":["# km = KMeans(n_clusters=3)\n","# %time km.fit(df2_sample_dummies_concat_cities2_train)\n","# clusters = km.labels_.tolist()\n","# silhouette_k3 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:10000], clusters[0:10000])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Fag4o2tNShyv","colab_type":"code","colab":{},"outputId":"8d5e0b39-5980-41a7-ca54-b44c945db2d2"},"cell_type":"code","source":["# k = 2 training model\n","km = KMeans(n_clusters=2)\n","%time km.fit(df2_sample_dummies_concat_cities2_train)\n","clusters = km.labels_.tolist()\n","silhouette_k2 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:50000], clusters[0:50000])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["CPU times: user 3min 51s, sys: 2min 43s, total: 6min 34s\n","Wall time: 8min 23s\n"],"name":"stdout"}]},{"metadata":{"id":"Ovxld8dBShyw","colab_type":"text"},"cell_type":"markdown","source":["## Model Evaluation\n","Ok now that we've trained 4 models on different size clusters (different k) and we calculated a silhouette coefficient - The silhouette value is a measure of how similar an object is to its own cluster (cohesion) compared to other clusters (separation).\n","\n","We can plot the number of clusters by the silhouette score and use the elbow method (visually looking at the data to see where's an \"elbow\") to see which cluster does the best. The Elbow method is a method of interpretation and validation of consistency within cluster analysis designed to help finding the appropriate number of clusters in a dataset.\n","\n","http://www.awesomestats.in/python-cluster-validation/"]},{"metadata":{"id":"LesQETsvShyx","colab_type":"code","colab":{},"outputId":"768f401d-ce78-4dc5-ec85-5d76b602c43b"},"cell_type":"code","source":["# silhouette = [silhouette_k2, silhouette_k3, silhouette_k4, silhouette_k5, silhouette_k6, silhouette_k7, silhouette_k8]\n","# count_k = [2, 3, 4, 5, 6, 7, 8]\n","\n","silhouette = [silhouette_k2, silhouette_k4, silhouette_k6, silhouette_k8]\n","count_k = [2, 4, 6, 8]\n","\n","count_silhouette = list(zip(count_k, silhouette))\n","print(count_silhouette)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[(2, 0.20347691177588109), (4, 0.23169483895092327), (6, 0.075606478197469767), (8, 0.0028336940867501044)]\n"],"name":"stdout"}]},{"metadata":{"id":"PanSR9spShyy","colab_type":"code","colab":{},"outputId":"41291288-2c03-4a57-cf8b-3c05dd08047a"},"cell_type":"code","source":["plt.plot(*zip(*count_silhouette))"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[]"]},"metadata":{"tags":[]},"execution_count":27},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8FeXd/vHPPSdsAQLkhMWwCIQEBC2goVIUKyZi3ZDH\nUkXrghFRFlHrwlIUCqKRXRQEBbHiUutKsTyVptriD9oKArIpEEQUA8Yk7BAgmfv3x8Egj2j2zMmc\n6/1Pe14zc871bfq6MrlnmGOstRYREYkYjtcBRESkaqn4RUQijIpfRCTCqPhFRCKMil9EJMKo+EVE\nIoyKX0Qkwqj4RUQijIpfRCTCqPhFRCJMlNcBfkxWVlaZjouLiyMnJ6eC03jDL7P4ZQ7QLOHIL3NA\n+WaJj48v8b464xcRiTAqfhGRCKPiFxGJMCp+EZEIo+IXEYkwKn4RkQij4hcRiTBhex+/VH82Lwe7\ndSNH6tSBn/3c6zgicoKKXyqEtRa+ycJu3QhbN2K3bITcbAD2A2bAcJwLUr0NKSKAil/KyLqFsHMH\nduvGE2W/CfbvDW2s3wASO2FS+2ASOxH1l5c59socbOtETPMzvQ0uIip+KRlbcBy+yDxR9Jsg81M4\ncii0MdgE07ErJHXCJHaEps0xxhQdG3PfOHLuvRl3zhM4v5+KqV3HoylEBFT88iPs0XzY9tnJov98\nMxw/Ftp4RktMt56Q2BGT2AkTbPyT7xVoGIsz8H7caY9gX34G0u475ReDiFQtFb8AYA8dgK2bThb9\njkxwXTAOtGqL+eXlmKRO0K4jpn5Mqd/fdPgZpk9/7KJXIOlsTM/elTCFiJSEij9C2T25Jy/Ebt0E\nX+8IbYiqAW0SMb/qF1q2SeiAqRNdIZ9prvgNdusm7KvPYtskYlq0qZD3FZHSUfFHAGstZO8KFf2W\njdjMTfDt7tDG2nWg3VmYbj0xiZ1CpV+jZqXkME4A5/bf4Y6/F3fOJJwxUzG1K+aXioiUnIrfh6zr\nwtc7Ti36fXtCG+vFhNbmL7kSk3g2tGiNCQSqLJuJaYhzxwO4U8dgF86GgfdrvV+kiqn4fcAWHIcd\n20Lr81s2wrZP4fCJO25iG2M6/OzEHTedoFkLz4vWtD8bc82N2HdegvZnYy76lad5RCKNir8askfz\n4fPNJ4t++2Y4duKOm2YtMMkXfu+Omybehv0R5vJ+ofyvPodtnYRp1dbrSCIRQ8VfDdhDByFz08mi\n/3IbFBaG7rhp2QZz0a9CF2LbdcTENPQ6bokYxzmx3n8P7twncMZMr7CLyCLy01T8YcjuzcVu3cT+\nrz6ncN0qyPoSrIWoKGidhOn9P6FbKxPOqtZlaeo3wLnjQdypv8cunAV3POD5MpRIJFDxe8xaC9/u\nPvUZNyfuuMmvXQfadsAkXxgq+jZJlXbHjVdMUidM35uwb70Yug5x8RVeRxLxPRV/FbOuC1lfFj3f\nxm7ZCPvyQhvr1Yd2ofIzSZ2I69qN3D17vQ1cBcxl12K3bMS+Ng/bpj3mzASvI4n4moq/ktmCAvhy\n28l/Ebt1Exw+GNrYKA7T/pzQhdikE3fcOCe/IsEEIuPHYxwHJ+0+3An3nlzvj67rdSwR34qMZqlC\n9uhR2L45dAabuQm2fQbHjoY2Nm2OOa9H6MmViR1DDzfTmjYApn4MzqAHcCePxn3xKZw7R+h/G5FK\nouIvJ3v4IGR+erLov8iEwgIwJvSPo3r2DpV8YkdMTCOv44Y1064j5tpbsG+8gP3nEkyvK72OJOJL\nKv5Ssvv2FF2EDT3j5ovQHTeBqNDjDnpfE/oXsQkdtFxRBubSvtjNG7B/no9t2x5zZjuvI4n4jor/\nJ1hrIeebk48+2LoJsrNCG2vVhrbtMX1uCBV9m0RMzVreBvaB0Hr/vaH1/jlP4Dw8HRNdz+tYIr6i\n4v8e67qw66tTi35vbmhj3fqhh5n98rLQow9atsVE6X++ymDqxeAMegh38ijcPz6Fc9dIrfeLVKCI\nbi5bUABffX7yX8RmfgqHDoQ2NgyG1uaTOoXO6M849Y4bqVwmoUNovf/1Bdj338WkXO11JBHfiKji\nt8eOwvYtJ4v+881wND+0sUk8psv5Jx9mFtdUZ5keM5f2DV1LeX1BaL2/TZLXkUR8wdfFbw8fgm2f\nniz6799x07w15oLUE3fcdMI00B034cYYg3PbPbgT7sOdOwnn4RmYulrvFykvXxW/3b+H/C3rcFf/\nF7tlA+z84sQdNwFonYhJ7XPi6wPP0gXDasLUrY8z6EHcSaNwX3gSZ8ho/SUmUk4lKv61a9eyYMEC\nXNclJSWFvn37nrL9ww8/ZNGiRVhrqVOnDgMHDqR169YlOrai2OPHcUfczr6CAqhZK3Q75VX9Tzzj\npj2mlu64qa5M2/aYfrdiX5uPzfgL5tJrvI4kUq0VW/yu6zJ//nzGjBlDMBhk1KhRJCcn06JFi6J9\nmjRpwrhx46hXrx5r1qzh2Wef5bHHHivRsRXF1KiBufVuGiV1ZG9MUHfc+IxJ6YPdvBH75gvYhA6Y\ntu29jiRSbRV7m0pmZibNmjWjadOmREVF0aNHD1auXHnKPu3bt6devdDSSWJiIrm5uSU+tiI53XtR\nI6mTSt+HjDE4A4ZDwyDu3EnY7+6+EpFSK7b48/LyCAaDRa+DwSB5eXk/uv/7779P165dy3SsyE8x\ndevh3DkC9u3BXfBk6B/YiUipVeip8YYNG/jggw8YP358qY/NyMggIyMDgPT0dOLi4sqUISoqqszH\nhhu/zFKhc8TFcXjAMA7Mn0H0ir9T95obK+Z9S8gvPxPwzyx+mQOqbpZiiz82NrZo6QYgNzeX2NjY\nH+y3Y8cO5s6dy6hRo6hfv36pjgVITU0lNTW16HVOTk7Jp/ieuLi4Mh8bbvwyS0XPYc/vBWs+4uDC\nZzjcrBUmoUOFvXdx/PIzAf/M4pc5oHyzxMfHl3jfYpd6EhIS2LVrF9nZ2RQUFLBixQqSk5NP2Scn\nJ4cpU6YwbNiwUz68JMeKlJYxBufWuyG2Me6zk7AH93sdSaRaKfaMPxAIkJaWxsSJE3Fdl169etGy\nZUuWLl0KQO/evXnjjTc4ePAg8+bNKzomPT39R48VKS8TXRfnzodw0x/CfX4GzrAxeqSGSAkZG6ZX\nyLKyssp0nP7sCz+VOYf7wV+xr8zF/PpWnF/9ulI+4/v88jMB/8zilzkgjJZ6RMKZufgKzHkXYN9e\nGHqaqogUS8Uv1ZoxBnPLMAg2wX12MvaA1vtFiqPil2rPRNfFuWsEHNyH+/y00PcqiMiPUvGLL5hW\nCZjrB8KG1di/vel1HJGwpuIX3zC/vBzTrSf2nZdDj+EWkdNS8YtvGGMwNw+Fxs1wn5uM3b/X60gi\nYUnFL75i6kTj3PkQHDyAO3+61vtFTkPFL75jWrXF3HAHbFqD/d83vI4jEnZU/OJLpudlmJ//Ervo\nFezm9V7HEQkrKn7xpdB6/2Bocgbuc1Ow+/d4HUkkbKj4xbdM7Wicux6Cw4dw503DuoVeRxIJCyp+\n8TXTog3mhkHw6SfYv77udRyRsKDiF98zF16K6X4xdvGr2E8/8TqOiOdU/OJ7xhjMbwdD0+a486Zi\n92m9XyKbil8igqldJ/Q8n/zDoYu9Wu+XCKbil4hhmp+JufEu2Lwe++5rXscR8YyKXyKKc0Eq5heX\nYN99DbtprddxRDyh4peIY357FzRrEVrv35vndRyRKqfil4hjatUOrfcfzQ+t9xdqvV8ii4pfIpKJ\nbxW602fLBuziV72OI1KlVPwSsZwel2AuSMEueR27cY3XcUSqjIpfIpq54S6IbxVa79+T63UckSqh\n4peIZmrVwrlzBBw/FvryFq33SwRQ8UvEM2e0wNw0GLZuwi562es4IpVOxS8CON17YXr2xv7vG9gN\nH3sdR6RSqfhFTjD974DmZ+LOn4bNy/E6jkilUfGLnGBq1grd33+8ILTeX1DgdSSRSqHiF/ke06wF\n5uYhkPmp1vvFt1T8Iv+Hc/4vMRddhv3bm9h1K72OI1LhVPwip2GuHwgt2uA+PwOb+63XcUQqlIpf\n5DSK1vsLtN4v/qPiF/kRpmk85tZhsO0z7NsLvY4jUmFU/CI/wenWE3Px5dilb2M/+cjrOCIVQsUv\nUgxz3e3Qqi3u8zMozN7ldRyRcosqyU5r165lwYIFuK5LSkoKffv2PWX7119/zezZs9m+fTv9+/en\nT58+RduGDh1K7dq1cRyHQCBAenp6xU4gUslMjZo4dz6EO+E+9k19BHvfeExUDa9jiZRZscXvui7z\n589nzJgxBINBRo0aRXJyMi1atCjap169etx2222sXHn6W9/Gjh1LTExMxaUWqWKmSTzOgOEcn/ME\n5q0XQ38FiFRTxS71ZGZm0qxZM5o2bUpUVBQ9evT4QcE3aNCAdu3aEQgEKi2oiNfMeRdQ54p+2L8v\nwq79j9dxRMqs2OLPy8sjGAwWvQ4Gg+Tlle57SidMmMCIESPIyMgofUKRMFJ/wDA4sx3ugiex3+72\nOo5ImZRojb88JkyYQGxsLPv27ePRRx8lPj6ejh07/mC/jIyMol8M6enpxMXFlenzoqKiynxsuPHL\nLH6ZA0KzBEc+Tt79A3Cen07sY3MwNarner9ffi5+mQOqbpZiiz82Npbc3JPfTJSbm0tsbGyJP+C7\nfRs0aEC3bt3IzMw8bfGnpqaSmppa9Donp2xPR4yLiyvzseHGL7P4ZQ4IzbI3qhbmlrspmJPOt3On\n4PS/w+tYZeKXn4tf5oDyzRIfH1/ifYtd6klISGDXrl1kZ2dTUFDAihUrSE5OLtGb5+fnc+TIkaL/\nvm7dOlq1alXicCLhypzXA5NyNfYfi7GrV3gdR6RUij3jDwQCpKWlMXHiRFzXpVevXrRs2ZKlS5cC\n0Lt3b/bu3cvIkSM5cuQIxhiWLFnCtGnTOHDgAFOmTAGgsLCQCy+8kC5dulTuRCJVxPQbgN32Ge4L\nT+G0bItp3MzrSCIlYqy11usQp5OVlVWm4/RnX/jxyxzww1nst7txH70PGp+BM+KJarXe75efi1/m\ngDBa6hGRH2caN8MZcA/syMS+/rzXcURKRMUvUk6ma3dM6jXYD/6K/Xi513FEiqXiF6kA5te3QJsk\n3D8+hdXzfCTMqfhFKoCJqoFz50NgHNy5T2CPH/M6ksiPUvGLVBATbIKTdi98+Tn2z1rvl/Cl4hep\nQKbzzzG9+2L/uQR35YdexxE5LRW/SAUz/3MLJHTAvvg09puy3ZYsUplU/CIVzERF4dzxIASicOdo\nvV/Cj4pfpBKYYOPQev/O7dg/zfM6jsgpVPwilcT8rBvmsmuxy/6G+99/eR1HpIiKX6QSmb43hdb7\nF87G7t7pdRwRQMUvUqlMVBTOoIegRhTu3EnYY0e9jiSi4hepbCY2Diftd7DzC+yfnvM6joiKX6Qq\nmHPOw1z+a+yHS3H/84HXcSTCqfhFqoi55iZI7Ih96RnsLq33i3dU/CJVxAQCofv7a9QMPc/nqNb7\nxRsqfpEqZBoFcW7/HWR9iX11rtdxJEKp+EWqmDn7XMzlv8Euz8Bd8b7XcSQCqfhFPGD63ABJZ2Nf\nfgab9aXXcSTCqPhFPBBa778fatUOPc/naL7XkSSCqPhFPGIaBnEG3g+7d2JfnuN1HIkgKn4RD5mO\nXTBXXo/99/u4y//hdRyJECp+EY+Zq6+H9udgX3kG+/UOr+NIBFDxi3jMOAGcOx6A2tGh9f78I15H\nEp9T8YuEAdOgUWi9/5uvQ3f6WOt1JPExFb9ImDBndcZc1R/7n39i/9/fvY4jPqbiFwkj5qrr4KzO\n2Fefxe7c7nUc8SkVv0gYMU4AZ+DvILou7pxJ2PzDXkcSH1Lxi4QZE9ModLE3exd2odb7peKp+EXC\nkGl/DqbPDdiP/oX98D2v44jPqPhFwpS5oh907IJ99TnsV1rvl4qj4hcJU8YJhB7hXK9+6P7+I1rv\nl4qh4hcJYyamYWi9/9vd2IWztN4vFULFLxLmTNLZmGtuxK78EPuvv3kdR3wgqiQ7rV27lgULFuC6\nLikpKfTt2/eU7V9//TWzZ89m+/bt9O/fnz59+pT4WBEpnrm8HzZzE/a1edi2SZhWCV5Hkmqs2DN+\n13WZP38+o0ePZvr06SxfvpydO0/9ouh69epx2223cfXVV5f6WBEpnnEcnLT7oF5MaL3/8CGvI0k1\nVmzxZ2Zm0qxZM5o2bUpUVBQ9evRg5cqVp+zToEED2rVrRyAQKPWxIlIypn4DnEEPQm429sWntd4v\nZVZs8efl5REMBoteB4NB8vLySvTm5TlWRH7IJHbE9L0Z+/Fy7D+XeB1HqqkSrfFXhYyMDDIyMgBI\nT08nLi6uTO8TFRVV5mPDjV9m8cscEB6z2N/ewd4dWzj25+dpcO751EjoUKb3CYdZKoJf5oCqm6XY\n4o+NjSU3N7fodW5uLrGxsSV689Icm5qaSmpqatHrnJycEn3G/xUXF1fmY8ONX2bxyxwQPrPY3w6F\nz+8lL30UzsPTMdH1Sv0e4TJLefllDijfLPHx8SXet9ilnoSEBHbt2kV2djYFBQWsWLGC5OTkEr15\neY4VkR9n6seE1vvzvsX941Na75dSKfaMPxAIkJaWxsSJE3Fdl169etGyZUuWLl0KQO/evdm7dy8j\nR47kyJEjGGNYsmQJ06ZNIzo6+rTHikj5mXZnYa69BfvGC9j3/4pJucrrSFJNGBumpwpZWVllOk5/\n9oUfv8wB4TeLdV3cWRNh4xqcEU9g2iSW+Nhwm6Ws/DIHhNFSj4iEr9D9/fdCg0a4c5/AHjrodSSp\nBlT8ItWcqVs/tN6/Nxf3hZla75diqfhFfMAkdMBceyus/Q/2H3/xOo6EORW/iE+YS6+Bzj8PXez9\nfLPXcSSMqfhFfMIYg3PbvdAwiPvsZOyhA15HkjCl4hfxEVO3Hs6dD8HePNwFT2q9X05LxS/iM6ZN\nEqbfAPjkI+zf3/E6joQhFb+ID5mUq6Frd+xbL2K3feZ1HAkzKn4RHzLG4AwYfmK9fxL24H6vI0kY\nUfGL+JSJrodz5wjYtxf3+RlY1/U6koQJFb+Ij5k2iZjfpMH6VVrvlyIqfhGfM5dcCef2CK33Z27y\nOo6EARW/iM8ZY3BuvRuCTXDnTsYe0Hp/pFPxi0QAE103tN5/cB/u89O13h/hVPwiEcKcmYC5biBs\n+Bj73ltexxEPhc137opI5TMXXw6b12PfeYkjrdtiO3TFGON1LKliOuMXiSDGGMytd0OrBPZPG4c7\nczz2291ex5IqpuIXiTCmTjTOyEnUS7sHtm7CHTsMd8nr2ILjXkeTKqLiF4lAJhCg7tXX44yfBeec\nh317Ie74e7FbNnodTaqAil8kgpnYOAKDR+EMexiO5uNOHhX6Fi894sHXdHFXRDCdu+F0OAe7+E/Y\njEXYT/6L6ZeG6XGJLv76kM74RQQAU6s2Tr8BOGOmQ9Pm2BeexJ0yGrvrK6+jSQVT8YvIKUyL1jgP\npWNuHgo7d+D+4R7ct1/CHjvqdTSpICp+EfkB4zg4F12GM2E2pltP7JI/4467G7thtdfRpAKo+EXk\nR5mYhji334fzuwngBHCfHBf6Pt+9eV5Hk3JQ8YtIscxZnXHGzsT0uRG75j+4jwzB/eCvWLfQ62hS\nBip+ESkRU6MGztX9ccY9Ba0Tsa/MxX38IeyObV5Hk1JS8YtIqZim8Tj3jccMvB9ys3En3o/72jxs\n/mGvo0kJ6T5+ESk1Ywzm/F9izz4P+/aL2H8sxq5ajnPDHdD1F7r3P8zpjF9EyszUrYdz0xCcEU9A\nvfq4z6TjPv0oNucbr6PJT1Dxi0i5mYQOOGOmY35zG2xeH3rw29/exBYUeB1NTkPFLyIVwgQCOL3/\nB+cPs6BjF+ybf8R99D59z28YUvGLSIUywcYEhv4eZ+hoOHII94mRuC8+jT10wOtocoIu7opIpTBd\nuuN06Ixd/Co24y/Ytf/F/CYN0/1iXfz1mM74RaTSmNp1cH6TFnrwW1xT7PPTcaeOwe7e6XW0iFai\nM/61a9eyYMECXNclJSWFvn37nrLdWsuCBQtYs2YNtWrVYsiQIbRt2xaAoUOHUrt2bRzHIRAIkJ6e\nXvFTiEhYMy3b4IychF32HvatF3H/MBzzq36YK/phatT0Ol7EKbb4Xddl/vz5jBkzhmAwyKhRo0hO\nTqZFixZF+6xZs4bdu3czc+ZMtm7dyrx583jssceKto8dO5aYmJjKmUBEqgXjOJiLL8ee2x372vPY\nd/+E/ehfOL+9C9Oxq9fxIkqxSz2ZmZk0a9aMpk2bEhUVRY8ePVi5cuUp+6xatYqLLroIYwxJSUkc\nOnSIPXv2VFpoEam+TEwjnDvux7lvPADu9LG4z03B7lNnVJViz/jz8vIIBoNFr4PBIFu3bv3BPnFx\ncafsk5eXR6NGjQCYMGECjuNw6aWXkpqaetrPycjIICMjA4D09PRT3q80oqKiynxsuPHLLH6ZAzRL\nhbooFdu9J4feXMihtxbChtXUvXkwdXpfg3FKfvnR8zkqUFXNUul39UyYMIHY2Fj27dvHo48+Snx8\nPB07dvzBfqmpqaf8UsjJySnT58XFxZX52HDjl1n8MgdolkpxaV+cc5JxX3qGA3Mnc2DpIpybh2Ja\ntinR4WEzRwUozyzx8fEl3rfYX6uxsbHk5uYWvc7NzSU2NvYH+3w/7Pf3+e4/GzRoQLdu3cjMzCxx\nOBGJDKZZC5z7H8Xcfh/kfIP76H24rz+PzT/idTRfKrb4ExIS2LVrF9nZ2RQUFLBixQqSk5NP2Sc5\nOZlly5ZhrWXLli1ER0fTqFEj8vPzOXIk9IPLz89n3bp1tGrVqnImEZFqzRiD071X6Fu/LkjFLn0H\n95Gh2DX/8Tqa7xS71BMIBEhLS2PixIm4rkuvXr1o2bIlS5cuBaB379507dqV1atXM3z4cGrWrMmQ\nIUMA2LdvH1OmTAGgsLCQCy+8kC5dulTiOCJS3Zm69TG3DMP2SMF9aTbu7Meg889xbrgTE2zsdTxf\nMNZa63WI08nKyirTcVrvCz9+mQM0S1WzBQXYjEXYxa8CBtPnRkzK1Ziok+es1WGOkgqbNX4REa+Y\nqCicX/069OC3Dj/DvrEAd+LvsNs+8zpatabiF5GwZ+Ka4gwbgzN4FBw8gPvECNyFs7GHDnodrVrS\nQ9pEpFowxsC5v8Dp2Bm76NXQt36t+TdHbr8X2/FcPfitFHTGLyLViqkdjXP97ThjpkKwCftn/AF3\n+iPYb8p2XTASqfhFpFoyrRJwRk2i/qD74YutuOPuxv3Lq9jjx72OFvZU/CJSbRknQPTlv8YZPxvT\ntTt28au4fxiO/fQTr6OFNRW/iFR7pmEszqAHce4ZB24h7rSHcedPw+7f63W0sKTiFxHfMGefizPu\nKcyV12FX/j/chwfjLvsb1nW9jhZWVPwi4iumZi2cvjfhjH0SWrTBLpyNO2kkducXXkcLGyp+EfEl\nc0ZLnAcmYm67B775GnfCvbhvLMAezfc6mud0H7+I+JYxBtMjBfuzbtg3XsC+9zZ21fLQc386d/M6\nnmd0xi8ivmfqxeAMGI7z4ONQsxbu0xMonP0YNs8fz/gpLRW/iEQMk9QJ55EZmGtvgY2rcR8Zivv3\nRdjCQq+jVSkVv4hEFBNVA+fyfjjjnobEjtg/zw89+G37Fq+jVRkVv4hEJNO4Gc7wR3DuGgEH9uE+\n/iDuy3Owhw95Ha3S6eKuiEQsYwycdwFOx67Yd17CfrAEu+bfmOtux3Tr6dsHv+mMX0QinqkTjXPD\nIJzRk6FhEPvcFNwZ47DZ/nzwm4pfROQE0zoRZ/RkTP9B8PlnuGPvxn33Nd89+E3FLyLyPcYJ4KRc\nFXrwW+efYxe9jDv+Huzm9V5HqzAqfhGR0zCNgjh3jcAZ/ggcP4Y75fe4z8/AHtjndbRyU/GLiPwE\nc04yzh9mYS7vh/3oX7gPD8H9cGm1fvCbil9EpBimVi2ca2/BefhJOKMl9sWncSePxn79pdfRykTF\nLyJSQqZ5K5wHH8Pcejfs+gp3wj24b/0Re/So19FKRcUvIlIKxnFwLrwUZ8IzmPMvxv7vm7hjh2LX\nr/I6Womp+EVEysDUj8G57R6cBx6DGjVxZ46ncE46dk+u19GKpeIXESkH0/5snEeexPS9Cdatwn1k\nCO4/FmPd8H3wm4pfRKScTI0aOFdehzPuKWjbAfun53AfexD7xVavo52Wil9EpIKYJmfg3DsOM+hB\n2JuL+9gDuK/MDbsHv+khbSIiFcgYg+nWE9vpXOw7C7H/XIJd/W+c/gPhvAvC4sFvOuMXEakEJrou\nzo134YyaDDENcOdOwp05Hvvtbq+jqfhFRCqTaZOE8/tpmOtvh62bcMcOw13yOrbAuwe/qfhFRCqZ\nCQRwUq/BGT8LzjkP+/ZC3PH3Yrds9CSPil9EpIqY2DgCg0fhDHsYjh3FnTwK94WZ2AP7qzSHLu6K\niFQx07kbTodzsIv/hM1YhP3kv5h+adg+11XJ55eo+NeuXcuCBQtwXZeUlBT69u17ynZrLQsWLGDN\nmjXUqlWLIUOG0LZt2xIdKyISiUyt2ph+A7DdL8Z9aTb2hSfZ89G/sENGY2rVrtTPLnapx3Vd5s+f\nz+jRo5k+fTrLly9n586dp+yzZs0adu/ezcyZMxk0aBDz5s0r8bEiIpHMtGiN81A65pZhBOJbVnrp\nQwnO+DMzM2nWrBlNmzYFoEePHqxcuZIWLVoU7bNq1SouuugijDEkJSVx6NAh9uzZw7ffflvssSIi\nkc44DqZnbxrExZGTk1Ppn1ds8efl5REMBoteB4NBtm7d+oN94uLiTtknLy+vRMd+JyMjg4yMDADS\n09NPeb/SiIqKKvOx4cYvs/hlDtAs4cgvc0DVzRI2F3dTU1NJTU0tel3W33pxVfQbsyr4ZRa/zAGa\nJRz5ZQ4o3yzx8fEl3rfY4o+NjSU39+RjRnNzc4mNjf3BPt8P+90+hYWFxR4rIiJVq9iLuwkJCeza\ntYvs7GwKCgpYsWIFycnJp+yTnJzMsmXLsNayZcsWoqOjadSoUYmOFRGRqlXsGX8gECAtLY2JEyfi\nui69evX5pAvIAAAFIklEQVSiZcuWLF26FIDevXvTtWtXVq9ezfDhw6lZsyZDhgz5yWNFRMQ7xlpr\nvQ5xOllZWWU6Tut94ccvc4BmCUd+mQOqbo1fj2wQEYkwKn4RkQgTtks9IiJSOXx3xj9y5EivI1QY\nv8zilzlAs4Qjv8wBVTeL74pfRER+mopfRCTCBMaNGzfO6xAV7btHQvuBX2bxyxygWcKRX+aAqplF\nF3dFRCKMlnpERCJM2DydszxycnKYNWsWe/fuxRhDamoqV1xxhdexyuTYsWOMHTuWgoICCgsL6d69\nO9ddVzVfx1YZXNdl5MiRxMbGVvu7L4YOHUrt2rVxHIdAIEB6errXkcrk0KFDzJkzh6+++gpjDIMH\nDyYpKcnrWKWWlZXF9OnTi15nZ2dz3XXXceWVV3qYquzeffdd3n//fYwxtGzZkiFDhlCzZs3K+TDr\nA3l5eXbbtm3WWmsPHz5shw8fbr/66iuPU5WN67r2yJEj1lprjx8/bkeNGmU3b97scaqyW7x4sZ0x\nY4Z9/PHHvY5SbkOGDLH79u3zOka5PfXUUzYjI8NaG/r/2MGDBz1OVH6FhYV24MCBNjs72+soZZKb\nm2uHDBlijx49aq21durUqfaDDz6otM/zxVJPo0aNii6I1KlTh+bNm5OXl+dxqrIxxlC7duir1woL\nCyksLMQY43GqssnNzWX16tWkpKR4HUVOOHz4MJ9++imXXHIJEPrij7p163qcqvzWr19Ps2bNaNy4\nsddRysx1XY4dO0ZhYSHHjh2jUaNGlfZZvljq+b7s7Gy2b99Ou3btvI5SZq7rMmLECHbv3s1ll11G\nYmKi15HK5IUXXuCmm27iyJEjXkepMBMmTMBxHC699NJTvjiousjOziYmJobZs2ezY8cO2rZty4AB\nA4pONqqr5cuXc8EFF3gdo8xiY2O5+uqrGTx4MDVr1qRz58507ty50j7PF2f838nPz2fq1KkMGDCA\n6Ohor+OUmeM4TJ48mTlz5rBt2za+/PJLryOV2scff0yDBg18dZvdhAkTmDx5MqNHj+a9995j06ZN\nXkcqtcLCQrZv307v3r2ZNGkStWrV4p133vE6VrkUFBTw8ccf0717d6+jlNnBgwdZuXIls2bNYu7c\nueTn57Ns2bJK+zzfFH9BQQFTp06lZ8+enH/++V7HqRB169alU6dOrF271usopbZ582ZWrVrF0KFD\nmTFjBhs2bGDmzJlexyqX7749rkGDBnTr1o3MzEyPE5VeMBgkGAwW/RXZvXt3tm/f7nGq8lmzZg1t\n2rShYcOGXkcps/Xr19OkSRNiYmKIiori/PPPZ8uWLZX2eb5Y6rHWMmfOHJo3b85VV13ldZxy2b9/\nP4FAgLp163Ls2DHWrVvHNddc43WsUrvxxhu58cYbAdi4cSOLFy9m+PDhHqcqu/z8fKy11KlTh/z8\nfNatW0e/fv28jlVqDRs2JBgMkpWVRXx8POvXr6dFixZexyqX6r7MA6Hn8G/dupWjR49Ss2ZN1q9f\nT0JCQqV9ni+Kf/PmzSxbtoxWrVrx4IMPAnDDDTdw7rnnepys9Pbs2cOsWbNwXRdrLb/4xS8477zz\nvI4V8fbt28eUKVOA0HLJhRdeSJcuXTxOVTZpaWnMnDmTgoICmjRpUvSNedXRd7+EBw0a5HWUcklM\nTKR79+6MGDGCQCBA69atK/Uakv7lrohIhPHNGr+IiJSMil9EJMKo+EVEIoyKX0Qkwqj4RUQijIpf\nRCTCqPhFRCKMil9EJML8f40nwuxRX6K9AAAAAElFTkSuQmCC\n","text/plain":[""]},"metadata":{"tags":[]}}]},{"metadata":{"id":"JIESDzalShy0","colab_type":"text"},"cell_type":"markdown","source":["### From this plot - I am going to go with k=6 being the \"elbow\" of the data - it is doing the best in terms of clustering and not significantly better than k=8 clusters. "]},{"metadata":{"id":"P1w_i4UtShy1","colab_type":"code","colab":{},"outputId":"d2563879-6495-447f-ed27-c564745103f3"},"cell_type":"code","source":["km = KMeans(n_clusters=6)\n","%time km.fit(df2_sample_dummies_concat_cities2_train)\n","clusters6 = km.labels_.tolist()\n","silhouette_k6 = silhouette_score(df2_sample_dummies_concat_cities2_train[0:50000], clusters[0:50000])"],"execution_count":0,"outputs":[{"output_type":"stream","text":["CPU times: user 5min 8s, sys: 2min 39s, total: 7min 47s\n","Wall time: 8min 40s\n"],"name":"stdout"}]},{"metadata":{"id":"ha8OHiedShy3","colab_type":"code","colab":{}},"cell_type":"code","source":["#Let's assign these clusters back to the original df and take a look!\n","df2_sample_dummies_concat_cities2_train.loc[:, \"cluster_number\"] = clusters6"],"execution_count":0,"outputs":[]},{"metadata":{"id":"yrKfuwDdShy4","colab_type":"code","colab":{},"outputId":"fb528d0a-2217-445f-bed2-e85471322d70"},"cell_type":"code","source":["df2_sample_dummies_concat_cities2_train.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
group_id_6388group_id_6510group_id_8458group_id_8940group_id_12542group_id_12907group_id_14573group_id_15324group_id_17921group_id_18843...cities__Chicago Heightscities__Chicago Ridgecities__East Chicagocities__New Yorkcities__North Chicagocities__San Franciscocities__South San Franciscocities__West Chicagocities__West New Yorkcluster_number
57153120000000000...0000010001
493950000000000...0000010001
11774880000000000...0000000002
22434580000000000...0001000000
57382700000000000...0001000000
\n","

5 rows × 8197 columns

\n","
"],"text/plain":[" group_id_6388 group_id_6510 group_id_8458 group_id_8940 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_12542 group_id_12907 group_id_14573 group_id_15324 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_17921 group_id_18843 ... cities__Chicago Heights \\\n","5715312 0 0 ... 0 \n","49395 0 0 ... 0 \n","1177488 0 0 ... 0 \n","2243458 0 0 ... 0 \n","5738270 0 0 ... 0 \n","\n"," cities__Chicago Ridge cities__East Chicago cities__New York \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 1 \n","5738270 0 0 1 \n","\n"," cities__North Chicago cities__San Francisco \\\n","5715312 0 1 \n","49395 0 1 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__South San Francisco cities__West Chicago \\\n","5715312 0 0 \n","49395 0 0 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__West New York cluster_number \n","5715312 0 1 \n","49395 0 1 \n","1177488 0 2 \n","2243458 0 0 \n","5738270 0 0 \n","\n","[5 rows x 8197 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"metadata":{"scrolled":true,"id":"1yuqRVmTShy5","colab_type":"code","colab":{},"outputId":"a627377c-7bb4-46fa-f641-3a66d521493b"},"cell_type":"code","source":["#it is important to investigate how many samples are in each of your clusters - we can seee here that the first 3 \n","#clusters have WAAAY more samples than the last 3! So, when we plot our visualizations, let's see what makes them\n","#so different!\n","df2_sample_dummies_concat_cities2_train[\"cluster_number\"].value_counts()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 27013\n","1 12500\n","2 10266\n","3 106\n","4 91\n","5 24\n","Name: cluster_number, dtype: int64"]},"metadata":{"tags":[]},"execution_count":36}]},{"metadata":{"id":"VXc_3zIcShy7","colab_type":"text"},"cell_type":"markdown","source":["## Model Output\n","Now that we've decided on k=6 clusters, let's assign the cluster labels back to the original data, and make it interpretable!"]},{"metadata":{"id":"fTgtn29HShy7","colab_type":"code","colab":{},"outputId":"b422a740-f293-447a-9337-a295ccb6c89f"},"cell_type":"code","source":["df2_sample.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
member_idbiocitycountryhometownjoinedlatlinklonmember_namestatemember_statusvisitedgroup_id
5715312234880949not_foundSan Franciscousnot_found2017-08-29 18:43:2637.78http://www.meetup.com/members/234880949-122.42Justine JenningsCAactive2017-09-05 17:34:494260482
493951831033not_foundSan Franciscousnot_found2011-04-29 05:36:1837.77http://www.meetup.com/members/1831033-122.40Ines SombraCAactive2015-11-29 04:23:381811614
117748815422371looking forward to playing more soccerChicagousChicago2013-02-01 05:41:4741.92http://www.meetup.com/members/15422371-87.70EnriqueILactive2017-09-06 23:02:43565564
2243458101653742not_foundNew Yorkusnot_found2013-07-10 18:53:4540.76http://www.meetup.com/members/101653742-73.99MNYactive2016-11-07 20:56:572662432
5738270235589417not_foundNew Yorkusnot_found2017-09-08 02:03:2240.80http://www.meetup.com/members/235589417-73.97Yuanyuan (Yoannie) LeiNYactive2017-09-08 02:03:228639012
\n","
"],"text/plain":[" member_id bio city \\\n","5715312 234880949 not_found San Francisco \n","49395 1831033 not_found San Francisco \n","1177488 15422371 looking forward to playing more soccer Chicago \n","2243458 101653742 not_found New York \n","5738270 235589417 not_found New York \n","\n"," country hometown joined lat \\\n","5715312 us not_found 2017-08-29 18:43:26 37.78 \n","49395 us not_found 2011-04-29 05:36:18 37.77 \n","1177488 us Chicago 2013-02-01 05:41:47 41.92 \n","2243458 us not_found 2013-07-10 18:53:45 40.76 \n","5738270 us not_found 2017-09-08 02:03:22 40.80 \n","\n"," link lon \\\n","5715312 http://www.meetup.com/members/234880949 -122.42 \n","49395 http://www.meetup.com/members/1831033 -122.40 \n","1177488 http://www.meetup.com/members/15422371 -87.70 \n","2243458 http://www.meetup.com/members/101653742 -73.99 \n","5738270 http://www.meetup.com/members/235589417 -73.97 \n","\n"," member_name state member_status visited \\\n","5715312 Justine Jennings CA active 2017-09-05 17:34:49 \n","49395 Ines Sombra CA active 2015-11-29 04:23:38 \n","1177488 Enrique IL active 2017-09-06 23:02:43 \n","2243458 M NY active 2016-11-07 20:56:57 \n","5738270 Yuanyuan (Yoannie) Lei NY active 2017-09-08 02:03:22 \n","\n"," group_id \n","5715312 4260482 \n","49395 1811614 \n","1177488 565564 \n","2243458 2662432 \n","5738270 8639012 "]},"metadata":{"tags":[]},"execution_count":39}]},{"metadata":{"id":"bBE0gbuaShy9","colab_type":"code","colab":{},"outputId":"c8109dff-2f35-4809-e804-a2e43bd9de63"},"cell_type":"code","source":["df2_sample_dummies_concat_cities2_train.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
group_id_6388group_id_6510group_id_8458group_id_8940group_id_12542group_id_12907group_id_14573group_id_15324group_id_17921group_id_18843...cities__Chicago Heightscities__Chicago Ridgecities__East Chicagocities__New Yorkcities__North Chicagocities__San Franciscocities__South San Franciscocities__West Chicagocities__West New Yorkcluster_number
57153120000000000...0000010001
493950000000000...0000010001
11774880000000000...0000000002
22434580000000000...0001000000
57382700000000000...0001000000
\n","

5 rows × 8197 columns

\n","
"],"text/plain":[" group_id_6388 group_id_6510 group_id_8458 group_id_8940 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_12542 group_id_12907 group_id_14573 group_id_15324 \\\n","5715312 0 0 0 0 \n","49395 0 0 0 0 \n","1177488 0 0 0 0 \n","2243458 0 0 0 0 \n","5738270 0 0 0 0 \n","\n"," group_id_17921 group_id_18843 ... cities__Chicago Heights \\\n","5715312 0 0 ... 0 \n","49395 0 0 ... 0 \n","1177488 0 0 ... 0 \n","2243458 0 0 ... 0 \n","5738270 0 0 ... 0 \n","\n"," cities__Chicago Ridge cities__East Chicago cities__New York \\\n","5715312 0 0 0 \n","49395 0 0 0 \n","1177488 0 0 0 \n","2243458 0 0 1 \n","5738270 0 0 1 \n","\n"," cities__North Chicago cities__San Francisco \\\n","5715312 0 1 \n","49395 0 1 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__South San Francisco cities__West Chicago \\\n","5715312 0 0 \n","49395 0 0 \n","1177488 0 0 \n","2243458 0 0 \n","5738270 0 0 \n","\n"," cities__West New York cluster_number \n","5715312 0 1 \n","49395 0 1 \n","1177488 0 2 \n","2243458 0 0 \n","5738270 0 0 \n","\n","[5 rows x 8197 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"metadata":{"collapsed":true,"id":"Gf-V3eGcShy-","colab_type":"code","colab":{}},"cell_type":"code","source":["df2_sample.loc[:, \"cluster_number\"] = clusters6"],"execution_count":0,"outputs":[]},{"metadata":{"collapsed":true,"id":"9o2DAHqKShy_","colab_type":"code","colab":{}},"cell_type":"code","source":["df2_sample_merged = df2_sample.merge(df[['group_id', 'category.shortname']], on=['group_id'])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_UNjXbGPShzA","colab_type":"text"},"cell_type":"markdown","source":["## Tadah! We have a merged dataframe of members, clustered by their city and groups they're interested in, merged on what the group categories are (from the original groups df) and we can now export this and explore!"]},{"metadata":{"scrolled":true,"id":"apCh9PpqShzA","colab_type":"code","colab":{},"outputId":"53379f29-62c4-4eee-de57-8c639be7d6ab"},"cell_type":"code","source":["df2_sample_merged.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
member_idbiocitycountryhometownjoinedlatlinklonmember_namestatemember_statusvisitedgroup_idcluster_numbercategory.shortname
0234880949not_foundSan Franciscousnot_found2017-08-29 18:43:2637.78http://www.meetup.com/members/234880949-122.42Justine JenningsCAactive2017-09-05 17:34:4942604821socializing
1204944223not_foundSan Franciscousnot_found2016-05-11 14:15:3637.76http://www.meetup.com/members/204944223-122.48Leslie WCAactive2017-05-05 23:19:3842604821socializing
2235052959not_foundSan Franciscousnot_found2017-08-28 03:03:1037.72http://www.meetup.com/members/235052959-122.44Lauren WatermanCAactive2017-08-28 03:03:1042604821socializing
3118747522Love walking!San Franciscousnot_found2014-06-05 01:12:0337.78http://www.meetup.com/members/118747522-122.42Hannah KCAactive2014-11-30 21:56:5042604821socializing
481700682not_foundSan Franciscousnot_found2015-05-23 21:43:2337.76http://www.meetup.com/members/81700682-122.44CharlotteCAactive2017-09-24 06:09:2542604821socializing
\n","
"],"text/plain":[" member_id bio city country hometown \\\n","0 234880949 not_found San Francisco us not_found \n","1 204944223 not_found San Francisco us not_found \n","2 235052959 not_found San Francisco us not_found \n","3 118747522 Love walking! San Francisco us not_found \n","4 81700682 not_found San Francisco us not_found \n","\n"," joined lat link \\\n","0 2017-08-29 18:43:26 37.78 http://www.meetup.com/members/234880949 \n","1 2016-05-11 14:15:36 37.76 http://www.meetup.com/members/204944223 \n","2 2017-08-28 03:03:10 37.72 http://www.meetup.com/members/235052959 \n","3 2014-06-05 01:12:03 37.78 http://www.meetup.com/members/118747522 \n","4 2015-05-23 21:43:23 37.76 http://www.meetup.com/members/81700682 \n","\n"," lon member_name state member_status visited \\\n","0 -122.42 Justine Jennings CA active 2017-09-05 17:34:49 \n","1 -122.48 Leslie W CA active 2017-05-05 23:19:38 \n","2 -122.44 Lauren Waterman CA active 2017-08-28 03:03:10 \n","3 -122.42 Hannah K CA active 2014-11-30 21:56:50 \n","4 -122.44 Charlotte CA active 2017-09-24 06:09:25 \n","\n"," group_id cluster_number category.shortname \n","0 4260482 1 socializing \n","1 4260482 1 socializing \n","2 4260482 1 socializing \n","3 4260482 1 socializing \n","4 4260482 1 socializing "]},"metadata":{"tags":[]},"execution_count":43}]},{"metadata":{"id":"TOHE9tZtShzB","colab_type":"code","colab":{}},"cell_type":"code","source":["# This is the final file you will be using for this assignment to explore :) \n","df2_sample_merged.to_csv('members_cluster_group.csv')"],"execution_count":0,"outputs":[]}]}