diff --git a/week 3/.ipynb_checkpoints/Week 3 - Pandas-checkpoint.ipynb b/week 3/.ipynb_checkpoints/Week 3 - Pandas-checkpoint.ipynb index 2fd6442..23ea750 100644 --- a/week 3/.ipynb_checkpoints/Week 3 - Pandas-checkpoint.ipynb +++ b/week 3/.ipynb_checkpoints/Week 3 - Pandas-checkpoint.ipynb @@ -1,6 +1,2410 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Week 3 - Pandas\n", + "Pandas, a library written by Wes McKinney, is a great tool for data manipulation and analysis. It provides two classes:\n", + "* a Series object, which handles a single column of data;\n", + "* a DataFrame object, which handles multiple columns (like an Excel spreadsheet).\n", + "\n", + "You can build your own DataFrames or read in from other sources like CSVs or JSON. Pandas handles missing data beautifully; lets you sort, operate on and and merge datasets; provides plotting capabilities; and handles time series data (among other advantages)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.1 Creating Series and DataFrames" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Cardiff\n", + "1 Swansea\n", + "2 Abergavenny\n", + "3 Machynlleth\n", + "dtype: object" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a series by passing a list\n", + "\n", + "towns = pd.Series(['Cardiff', 'Swansea', 'Abergavenny','Machynlleth'])\n", + "towns" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "towns = ['Cardiff', 'Swansea', 'Abergavenny','Machynlleth']\n", + "populations = [335145, 230300, 12515, 2235]\n", + "number_of_pubs = [2100, 1680, 198, 48]" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "# Create an empty DataFrame, and add new columns to it\n", + "\n", + "towns_df = pd.DataFrame()\n", + "towns_df['name'] = towns\n", + "towns_df['population'] = populations" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name object\n", + "population int64\n", + "dtype: object" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The columns have different dtypes\n", + "\n", + "towns_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['name', 'population'], dtype='object')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can access the column names list as so:\n", + "towns_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubs
0Cardiff3351452100
1Swansea2303001680
2Abergavenny12515198
3Machynlleth223548
\n", + "
" + ], + "text/plain": [ + " name population n_pubs\n", + "0 Cardiff 335145 2100\n", + "1 Swansea 230300 1680\n", + "2 Abergavenny 12515 198\n", + "3 Machynlleth 2235 48" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a DataFrame using dictionaries to specify column name and data\n", + "\n", + "towns_df = pd.DataFrame({'name': towns,\n", + " 'population': populations,\n", + " 'n_pubs': number_of_pubs})\n", + "\n", + "towns_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 View and select data" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubs
0Cardiff3351452100
1Swansea2303001680
\n", + "
" + ], + "text/plain": [ + " name population n_pubs\n", + "0 Cardiff 335145 2100\n", + "1 Swansea 230300 1680" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# the .head() method shows the top rows\n", + "\n", + "towns_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 3)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check how many rows and columns\n", + "towns_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 335145\n", + "1 230300\n", + "2 12515\n", + "3 2235\n", + "Name: population, dtype: int64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Inspect only one series using square bracket notation\n", + "\n", + "towns_df['population']" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 335145\n", + "1 230300\n", + "2 12515\n", + "3 2235\n", + "Name: population, dtype: int64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Or dot notation WARNING: not available in all circumstances, e.g. when defining a new column\n", + "\n", + "towns_df.population" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubs
0Cardiff3351452100
1Swansea2303001680
2Abergavenny12515198
\n", + "
" + ], + "text/plain": [ + " name population n_pubs\n", + "0 Cardiff 335145 2100\n", + "1 Swansea 230300 1680\n", + "2 Abergavenny 12515 198" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Standard Python indexing works in the row direction\n", + "\n", + "towns_df[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2100\n", + "1 1680\n", + "2 198\n", + "3 48\n", + "Name: n_pubs, dtype: int64" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# typically, column indexing should come first\n", + "towns_df['n_pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2100\n", + "1 1680\n", + "2 198\n", + "Name: n_pubs, dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df['n_pubs'][:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 Select and manipulate data" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubs
2Abergavenny12515198
\n", + "
" + ], + "text/plain": [ + " name population n_pubs\n", + "2 Abergavenny 12515 198" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Use Boolean indexing to inspect values based on a condition\n", + "\n", + "towns_df.loc[towns_df.name == 'Abergavenny']" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new column with math outputs\n", + "\n", + "towns_df['pubs_per_capita'] = towns_df.n_pubs / towns_df.population\n", + "towns_df['people_per_pub'] = towns_df.population / towns_df.n_pubs" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubspubs_per_capitapeople_per_pub
1Swansea23030016800.007295137.083333
2Abergavenny125151980.01582163.207071
3Machynlleth2235480.02147746.562500
\n", + "
" + ], + "text/plain": [ + " name population n_pubs pubs_per_capita people_per_pub\n", + "1 Swansea 230300 1680 0.007295 137.083333\n", + "2 Abergavenny 12515 198 0.015821 63.207071\n", + "3 Machynlleth 2235 48 0.021477 46.562500" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Use a single column's value to select data\n", + "\n", + "towns_df.loc[towns_df.people_per_pub < 150]" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the .sort_values() method\n", + "# Helpful parameter: set inplace = True if you want to modify your df\n", + "\n", + "towns_df.sort_values(by = 'people_per_pub', inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAFPCAYAAABNkrmkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XuYFOWd9vHvLaAgCKKoK4IOUYwCKiIHE4znGNEYjKsREkWUFU9kV2Ny6eY1r0ZNXteY7KoxsmoUPOHZDZ6i0VXxjagMLIwgMaKijhJEjAdEFOS3f9Qz2Iw9TM9MD0WP9+e6+pruqqeqf90z03fXU1VPKSIwMzPbKO8CzMxsw+BAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhm1gySviHpxRLa/VTSdeujJms5B4IVJWkfSU9Lel/Su5L+LGlI3nWtT5ImSbq4kTYhaaf1VVNTSNpfUm1rrDsinoqIr5bQ7pcR8U+pnqr0frVvjZqs5fyLsS+Q1BW4HzgNuAPYGPgG8EmedTWVpPYRsSrvOswqRkT45ttaN2Aw8N465m8EnAe8BrwN3Ah0S/OqgABOBN4A/g6cCgwBaoD3gN/WW99JwPzU9mFgh3U895j0vEuBnwELgYPTvAuAu4CbgQ+Af0q1ngu8nJa5A9iiYH13An8D3gemAf3T9PHASuBTYBlwX5FapqXX+lFqc2yafjKwAHgXmAr0TNN/DlyZ7ndIy12aHncCVgDdC97DE4DXgXeA/1PwvEOB6vQaFwO/KVJbZ+BjYHWqbRnQE9gE+A/grXT7D2CTIstvkn5XAwqmbZXWuTWwP1BbMO8c4E3gQ+BF4KCC38nN6f7r6XXV1fO1vP/Wfav3e8+7AN82vBvQNX14TgZGAN3rzT8pfeB9BegC3APclObVfZhNBDoCh6QPuv9KHyTbkYXIfqn9kWldu5JtsZ4HPN1AXf3SB8k+ZFstl6UP7cJAWJnWuVH6kD0TeAbolT7k/hOYUu+1bFbwQTm7YN4k4OJG3qsAdip4fGD6AB+U1nklMK1g3vPp/tfJQurZgnlz6r2H16bXsAfZ1tmuaf504Ph0vwuwdwO1rfWhnaZdmN6Prck+4J8GLmpg+euBXxQ8PgP4Y/11A18lC/+eBfXvWPA7ubne62qf99+4bw38PeddgG8b5i19QE8CaoFVZN90t0nzHgNOL2j71fRB3L7gn367gvlLSd+e0+O7gTPT/YeAcQXzNgKWU2QrAfi/9T7MNyX7Bl8YCNPqLTOf9G01Pd62rtYi69881V63tTOJpgfC70nf+tPjLun5qvh8K2BLsq2Wn6b3twvZ1sMVaZm697BXwXqeA0al+9NS+x6N1FYsEF4GDit4/C1gYQPLHwy8UvD4z8CY+usGdiIL+YOBDvXW4UCooJt3KltRETE/IsZGRC9gAFl3w3+k2T3Jum3qvEYWBtsUTFtccP/jIo+7pPs7AJdLek/Se2TdLCLbkqivJ9k30boal5OFTaE36j3eAbi3YP3zgc+AbSS1k3SJpJclfUDW/QTQo8hzl2qt9yYilqUat4uIj8m6evYD9gWeJPuGPjxNe7Leuv5WcH85n79n44Cdgb9ImiHp282tL93v2UDb/wY6SRomaQdgIHBv/UYRsYBsS+wC4G1Jt0lqaJ22AXMgWKMi4i9k35YHpElvkX3Q1tmebCtiMU33BnBKRGxecOsUEU8XabuIrOsHAEmdyL5tr1VukfWPqLf+jhHxJvB9YCTZN9tuZN9gIQukYusqxVrvjaTOqcY306QnybqH9gRmpMffItsvMK2UJ4iIlyJiNFm3z78Bd6Xn+ULTxuoj+9291cDzrCbb5zKa7L26PyI+bKDtrRGxT1p3pLpKqcc2IA4E+wJJu0g6W1Kv9Lg32YfCM6nJFOAsSX0kdQF+CdwezTuiZyLwr5L6p+fqJumYBtreBRwh6euSNibrNlEDbQvX/4v0DRdJW0kameZtRtY3v5Ss++mX9ZZdTLafZF3qt7kVOFHSQEmbpHU+GxEL0/wnyXaMvxARnwJPkO38fjUiljTyXKTXcJykrdIH9ntp8mcN1LalpG4F06YA56X3oQdZN9zN63i6W4FjgR+k+8Xq+aqkA9PrXUG2BVisniVkO7kbe08tJw4EK+ZDYBjwrKSPyIJgLnB2mn89cBPZN9pXyT4EfticJ4qIe8m+Td6Wum3mku3ILtZ2Xnqe28i2Fj4k67te1+Gwl5Pt/3hE0ofptQxL824k6zJ5E3iBzwOvzu+Bfqm76b8aWP8FwOTU5nsR8RjZ0U93pxp3BEYVtH+abF9C3dbAC2TvX0lbB8mhwDxJy9LrGxURK+o3Slt2U4BXUn09gYvJuq1qgOeBWWlaURHxLNnRUD3J9vcUswlwCdnO9L+Rbbn8tMi6lgO/AP6c6tm7tJdr64sivBVnlSltnbwH9I2IV/Oux6zSeQvBKoqkIyRtmvrMLyP7lrsw36rM2gYHglWakXx+UlVfsu4Sb+aalYG7jMzMDPAWgpmZJQ4EMzMDKmy00x49ekRVVVXeZZiZVZSZM2e+ExFbNdauogKhqqqK6urqvMswM6sokl5rvJW7jMzMLHEgmJkZ4EAwM7OkovYhmFn5rFy5ktraWlas+MIwSFahOnbsSK9evejQoUOzlncgmH1J1dbWstlmm1FVVYXU2KCxtqGLCJYuXUptbS19+vRp1jrcZWT2JbVixQq23HJLh0EbIYktt9yyRVt8DgSzLzGHQdvS0t+nA8HMzADvQ6Dq3AfyLqEkCy85PO8SrI0r9/9Ca/zNXnDBBXTp0oUf//jHZV/3hmDixIlsuummjBkzhkmTJnHIIYfQs+f6uzz1lz4QzMxKtWrVKtq3b72PzVNPPXXN/UmTJjFgwID1GgjuMjKz3CxcuJBddtmFE044gd13352jjz6a5cuXU1VVxTvvvANAdXU1+++//5pl5syZw4EHHkjfvn259tprAVi0aBH77rsvAwcOZMCAATz11FMNPmeXLl04++yzGTRoEAcddBBLlmSXsn755Zc59NBD2WuvvfjGN77BX/7yFwDGjh3Lj370Iw444ADOOeecoutctmwZJ554Irvtthu77747d999NwCnnXYagwcPpn///px//vlr2ldVVXHOOecwdOhQhg4dyoIFC4BsC+iyyy7jrrvuorq6mh/84AcMHDiQjz/+mAsvvJAhQ4YwYMAAxo8fT2tcusCBYGa5evHFFxk/fjw1NTV07dqV3/3ud+tsX1NTwwMPPMD06dO58MILeeutt7j11lv51re+xezZs5kzZw4DBw5scPmPPvqIQYMGMWvWLPbbbz9+/vOfAzB+/HiuvPJKZs6cyWWXXcbpp5++Zpm//vWvPProo/z6178uus6LLrqIbt268fzzz1NTU8OBBx4IwC9+8Quqq6upqanhySefpKamZs0yXbt25bnnnmPChAmceeaZa63v6KOPZvDgwdxyyy3Mnj2bTp06MWHCBGbMmMHcuXP5+OOPuf/++9f9xjaDu4zMLFe9e/dm+PDhABx33HFcccUV62w/cuRIOnXqRKdOnTjggAN47rnnGDJkCCeddBIrV67kyCOPXGcgbLTRRhx77LFrnu+oo45i2bJlPP300xxzzDFr2n3yySdr7h9zzDG0a9euwXU++uij3HbbbWsed+/eHYA77riDa665hlWrVrFo0SJeeOEFdt99dwBGjx695udZZ521ztcM8Pjjj3PppZeyfPly3n33Xfr3788RRxzR6HJN4UAws1zVP1RSEu3bt2f16tUAXziuvlj7fffdl2nTpvHAAw9w/PHH85Of/IQxY8aU/PyrV69m8803Z/bs2UXbdO7ceZ3riIgv1PXqq69y2WWXMWPGDLp3787YsWPXei2F7Rs7XHTFihWcfvrpVFdX07t3by644IJWOcPcXUZmlqvXX3+d6dOnAzBlyhT22WcfqqqqmDlzJsCa/vg6f/jDH1ixYgVLly7liSeeYMiQIbz22mtsvfXWnHzyyYwbN45Zs2Y1+HyrV6/mrrvuAuDWW29ln332oWvXrvTp04c777wTyD7g58yZU/JrOOSQQ/jtb3+75vHf//53PvjgAzp37ky3bt1YvHgxDz300FrL3H777Wt+fu1rX/vCOjfbbDM+/PBD4PNQ7NGjB8uWLVtTf7l5C8HMgPwObd51112ZPHkyp5xyCn379uW0005j6NChjBs3jl/+8pcMGzZsrfZDhw7l8MMP5/XXX+dnP/sZPXv2ZPLkyfzqV7+iQ4cOdOnShRtvvLHB5+vcuTPz5s1jr732olu3bms+mG+55RZOO+00Lr74YlauXMmoUaPYY489SnoN5513HmeccQYDBgygXbt2nH/++Rx11FHsueee9O/fn6985StrusXqfPLJJwwbNozVq1czZcqUL6xz7NixnHrqqXTq1Inp06dz8skns9tuu1FVVcWQIUNKqqup1Bp7qlvL4MGDo9wXyPF5CPZlNX/+fHbddddca1i4cCHf/va3mTt37np7zi5durBs2bL19nzF1F3sq0ePHmVfd7Hfq6SZETG4sWXdZWRmZoC7jMwsR1VVVa22dTBs2LC1jhQCuOmmm1q0dXDDDTdw+eWXrzVt+PDhXHXVVU1az8KFC5tdQ2sqKRAkHQpcDrQDrouIS+rNV5p/GLAcGBsRsyT1Bm4E/gFYDVwTEZenZbYAbgeqgIXA9yLi72V4TWZmPPvss2Vf54knnsiJJ55Y9vVuKBrtMpLUDrgKGAH0A0ZL6lev2Qigb7qNB65O01cBZ0fErsDewBkFy54LPBYRfYHH0mMzW48qaR+iNa6lv89S9iEMBRZExCsR8SlwGzCyXpuRwI2ReQbYXNK2EbEoImalQj8E5gPbFSwzOd2fDBzZoldiZk3SsWNHli5d6lBoI+oukNOxY8dmr6OULqPtgDcKHtcCw0posx2wqG6CpCpgT6BuO26biFgEEBGLJG1d7MkljSfb6mD77bcvoVwzK0WvXr2ora1dM5aPVb66S2g2VymBUOwUuvpfKdbZRlIX4G7gzIj4oPTyICKuAa6B7LDTpixrZg3r0KFDsy+1aG1TKV1GtUDvgse9gLdKbSOpA1kY3BIR9xS0WSxp29RmW+DtppVuZmblVEogzAD6SuojaWNgFDC1XpupwBhl9gbeT91AAn4PzI+I3xRZ5oR0/wTgD81+FWZm1mKNdhlFxCpJE4CHyQ47vT4i5kk6Nc2fCDxIdsjpArLDTuuOyxoOHA88L6lu1KifRsSDwCXAHZLGAa8Dnw8zaGZm611J5yGkD/AH602bWHA/gDOKLPf/Kb5/gYhYChzUlGLNzKz1eOgKMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMgPZ5F2BtS9W5D+RdQqMWXnJ43iWYbZC8hWBmZoADwczMEgeCmZkBJQaCpEMlvShpgaRzi8yXpCvS/BpJgwrmXS/pbUlz6y1zgaQ3Jc1Ot8Na/nLMzKy5Gg0ESe2Aq4ARQD9gtKR+9ZqNAPqm23jg6oJ5k4BDG1j9v0fEwHR7sIm1m5lZGZWyhTAUWBARr0TEp8BtwMh6bUYCN0bmGWBzSdsCRMQ04N1yFm1mZuVXSiBsB7xR8Lg2TWtqm2ImpC6m6yV1L6G9mZm1klICQUWmRTPa1Hc1sCMwEFgE/Lrok0vjJVVLql6yZEljtZqZWTOVEgi1QO+Cx72At5rRZi0RsTgiPouI1cC1ZF1TxdpdExGDI2LwVlttVUK5ZmbWHKUEwgygr6Q+kjYGRgFT67WZCoxJRxvtDbwfEYvWtdK6fQzJd4G5DbU1M7PW1+jQFRGxStIE4GGgHXB9RMyTdGqaPxF4EDgMWAAsB06sW17SFGB/oIekWuD8iPg9cKmkgWRdSwuBU8r4uszMrIlKGssoHRL6YL1pEwvuB3BGA8uObmD68aWXaWZmrc1nKpuZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBpQYCJIOlfSipAWSzi0yX5KuSPNrJA0qmHe9pLclza23zBaS/iTppfSze8tfjpmZNVejgSCpHXAVMALoB4yW1K9esxFA33QbD1xdMG8ScGiRVZ8LPBYRfYHH0mMzM8tJKVsIQ4EFEfFKRHwK3AaMrNdmJHBjZJ4BNpe0LUBETAPeLbLekcDkdH8ycGRzXoCZmZVHKYGwHfBGwePaNK2pberbJiIWAaSfW5dQi5mZtZJSAkFFpkUz2jSLpPGSqiVVL1mypByrNDOzIkoJhFqgd8HjXsBbzWhT3+K6bqX08+1ijSLimogYHBGDt9pqqxLKNTOz5iglEGYAfSX1kbQxMAqYWq/NVGBMOtpob+D9uu6gdZgKnJDunwD8oQl1m5lZmTUaCBGxCpgAPAzMB+6IiHmSTpV0amr2IPAKsAC4Fji9bnlJU4DpwFcl1Uoal2ZdAnxT0kvAN9NjMzPLSftSGkXEg2Qf+oXTJhbcD+CMBpYd3cD0pcBBJVdqZmatymcqm5kZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAdA+7wLMrLiqcx/Iu4SSLLzk8LxLsDLxFoKZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAT5T2cy+JHzmd+O8hWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRlQYiBIOlTSi5IWSDq3yHxJuiLNr5E0qLFlJV0g6U1Js9PtsPK8JDMza45GA0FSO+AqYATQDxgtqV+9ZiOAvuk2Hri6xGX/PSIGptuDLX0xZmbWfKVsIQwFFkTEKxHxKXAbMLJem5HAjZF5Bthc0rYlLmtmZhuAUgJhO+CNgse1aVopbRpbdkLqYrpeUveSqzYzs7IrJRBUZFqU2GZdy14N7AgMBBYBvy765NJ4SdWSqpcsWVJCuWZm1hylBEIt0LvgcS/grRLbNLhsRCyOiM8iYjVwLVn30hdExDURMTgiBm+11VYllGtmZs1RSiDMAPpK6iNpY2AUMLVem6nAmHS00d7A+xGxaF3Lpn0Mdb4LzG3hazEzsxZodPjriFglaQLwMNAOuD4i5kk6Nc2fCDwIHAYsAJYDJ65r2bTqSyUNJOtCWgicUs4XZmZmTVPS9RDSIaEP1ps2seB+AGeUumyafnyTKjUzs1blM5XNzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmlpQUCJIOlfSipAWSzi0yX5KuSPNrJA1qbFlJW0j6k6SX0s/u5XlJZmbWHI0GgqR2wFXACKAfMFpSv3rNRgB90208cHUJy54LPBYRfYHH0mMzM8tJKVsIQ4EFEfFKRHwK3AaMrNdmJHBjZJ4BNpe0bSPLjgQmp/uTgSNb+FrMzKwF2pfQZjvgjYLHtcCwEtps18iy20TEIoCIWCRp62JPLmk82VYHwDJJL5ZQc956AO+Uc4X6t3KureKU9f30e+m/zTKqlPdzh1IalRIIKjItSmxTyrLrFBHXANc0ZZm8SaqOiMF519FW+P0sH7+X5dXW3s9Suoxqgd4Fj3sBb5XYZl3LLk7dSqSfb5detpmZlVspgTAD6Cupj6SNgVHA1HptpgJj0tFGewPvp+6gdS07FTgh3T8B+EMLX4uZmbVAo11GEbFK0gTgYaAdcH1EzJN0apo/EXgQOAxYACwHTlzXsmnVlwB3SBoHvA4cU9ZXlq+K6uKqAH4/y8fvZXm1qfdTEU3q0jczszbKZyqbmRngQDAzs8SBYGZmgAPBNkCStsi7BrNCkm5KP/8l71pak3cql1Eau2kbCo7eiojX86uoMkl6CZgN3AA8FP4jbTFJhwP9gY510yLiwvwqqiySXiAbk20qsD/1TrqNiHdzKKvsSjlT2Uog6YfA+cBiYHWaHMDuuRVVuXYGDgZOAq6UdDswKSL+mm9ZlUnSRGBT4ADgOuBo4Llci6o8E4E/Al8BZrJ2IESaXvG8hVAmkhYAwyJiad61tCWSDgBuBjoDc4BzI2J6vlVVFkk1EbF7wc8uwD0RcUjetVUKSX0i4lVJV0fEaXnX01q8D6F83gDez7uItkDSlpL+RVI18GPgh2SDiJ0N3JprcZXp4/RzuaSewEqgT471VKK70s+dc62ilbnLqIUk/SjdfQV4QtIDwCd18yPiN7kUVtmmAzcBR0ZEbcH06tT9YU1zv6TNgV8Bs8i6OK7Lt6SKs5Gk84GdC/7n12gr/+cOhJbbLP18Pd02Tjdo4siutsZXG9qRHBFf7sGWmyEiLkp375Z0P9AxIrw12zSjyK7Z0p7P/+fbHO9DKBNJx0TEnY1Ns8ZJ2pmsq6iKtY/YOjCvmiqZpE3Jutu2j4iTJfUlC937cy6t4kgaEREP5V1Ha3EglImkWRExqLFp1jhJc8iO6pgJfFY3PSJm5lZUBUtHac0ExkTEAEmdgOkRMTDn0iqGpOMi4mZJZ1Nky99dRgZk3xjIRnrdTtIVBbO6AqvyqarirYqIq/Muog3ZMSKOlTQaICI+llTs4lXWsM7pZ5dcq2hlDoSWewuoBr5D9i2szofAWblUVPnuk3Q6cC9r76BvEyf/5ODTtFUQAJJ2pOB9tcZFxH+mnz/Pu5bW5C6jMpHUgSxgt4+ISrju8wZL0qtFJkdEtImTf9Y3Sd8EzgP6AY8Aw4GxEfFEnnVVknpb/18QEf+8vmppTQ6EMpF0BHAZsHFE9JE0ELgwIr6Tc2lmSNoS2JvsDNtnIqKsF4Zv6yTVXd1xOFmw3p4eHwPMjIg20RvgQCgTSTOBA4EnImLPNK0mIjx0RTNI+jpfPMroxtwKqmCShgOzI+IjSccBg4DLI+K1nEurOJIeBw6JiJXpcQfgkYg4IN/KysNnKpfPKh/bXR5pZMnLgH2AIek2ONeiKtvVZGcp7wH8BHgNcLg2T0/WPg+hS5rWJnincvnMlfR9oF06zvufgadzrqlSDQb6eZTTslkVESFpJHBFRPy+oAvEmuYS4H/SlgLAfsAF+ZVTXt5CKJ8fkg0v/AkwBfgAODPXiirXXOAf8i6iDflQ0r8CxwEPpGHaO+RcU8VJh+o+CgwjOwLuXuBrETE518LKyPsQbIOTvn0NJBuiufCwU++gbwZJ/wB8H5gREU9J2h7Y3/tkmk7SzIjYK+86WosDoYUk3cc6xizyh1jTSdqv2PSIeHJ912JWSNJVZNfmmJF3La3BgdBCDX141fGHWPNI2gHoGxGPprF42kXEh3nXVYkkHQX8G7A12WGnIjuvo2uuhVWgdOW0ncl2zH/E5+9lmzia0IFgGxxJJwPjgS0iYse0k35iRByUc2kVKV286YiImJ93LZUufVH5grZyCK+PMmohSc+z7i6jNvHNYT07AxgKPAsQES9J2jrfkiraYodBedR98Ke/x46NNK84DoSW+3beBbRBn0TEp3Xjr0lqj68t0RLVacTT/2LtnfT35FdSZZL0HeDXZOcevA3sAMwnO8Kw4jkQWqitbCpuYJ6U9FOgUxqH53TgvpxrqmRdgeVA4TWUA3AgNN1FZEOAPBoRe6Zrfo/Ouaay8T6EMvGOu/KRtBEwjuwDTMDDwHU+Uc3yJqk6Igana3bsGRGrJT0XEUPzrq0cvIVQPpfiHXflMhK4MSKuzbuQtkBSR7KA7U9Bv3dEnJRbUZXrPUldgGnALZLepg1d98RnKpePd9yVz3eAv0q6SdLhaR+CNd9NZGd+fwt4EuhFdr0OK5GkndIggSPJut/OAv4ILCUbpaBNcJdRmUi6nOyfzjvuyiCNIjkCOJZskLs/RcQ/5VtVZZL0P6m/uyYidk/v7cO+RnXpJN0P/DQiaupNHwycHxFH5FNZefmbV/l4x10ZRcRKSQ+RvYedyL6ZORCaZ2X6+Z6kAcDfyIYWt9JV1Q8DgIiollS1/stpHQ6E8jnbl3gsD0mHAqOAA4AngOuA7+VZU4W7RlJ34GfAVLIhm3+Wb0kVZ13nHHRab1W0MncZlYmkl4DZwA3AQz4ipvkk3QbcRvY++tq/ljtJU4D/rn+gg6RxZBfMOTafysrLgVAmaWjcg4GTyM6yvZ1sEKy/5lqYfelJehl4BngKmBYRL+RcUsWRtA3ZcNefAjPT5MHAxsB3I+JvedVWTg6EVpBOVrkZ6AzMAc6NiOn5VlU5JO0NXAnsSvYP1w74yOd0NI+kTcjG8P8G2TWBdwHmRMR3cy2sAqX/7QHp4byI+O886yk370Mok3QR8+OA44HFZIeiTSUb1/9OoE9+1VWc35LtQ7iT7FvYGGCnXCuqbJ+R7Vj+DFhN9vf5dq4VVaiIeBx4vNGGFcqBUD7TyY73PjIiagumV0uamFNNFSsiFkhqFxGfATdI8uVIm+8D4HngN8AeZPkRAAAGBUlEQVS1EbE053psA+UuozKRJO9ILg9J08j2x1xHdojkImBsROyRa2EVKl1LeR+yfVufkl3re1pEPJZrYbbBcSCUiaSdgR+THd+9ZsvLJ/80XRpz/m2y6/6eBXQDfhcRC3ItrMJJ2oXsZL8zga0jos0cLmnl4UAokzTY1USyIxA+q5seETMbXMhsPZB0N9m+rAWkI42A5yJiRa6F2QbH+xDKZ1VEXJ13EW1BAxcdeh+oBi52H3hpJA0B3gAuAWaRHfTwj2Qj8s4FHAi2Fm8htJCkLdLdfybr5riXtccy8tnLTSTpUrKtrFvTpFFkw2C/D+zTVsaNaW2SZgEHR8S7kvYlO9nvh2RbC7tGxNG5FmgbHAdCC0l6lezbrIrMjoj4ynouqeJJ+nNEDC82TdLzEbFbXrVVEklz6nbES7oKWBIRF6THsyNiYJ712YbHXUYtFBE+v6D8ukgaFhHPAkgaSjb+DrShsefXg3aS2kfEKuAgYHzBPP/v2xf4j6JMJJ0B3BIR76XH3YHREfG7fCurSOPIzj2oC4EPgXGSOgP/L7+yKs4UssuRvgN8TLZDGUk7kXW/ma3FXUZlUmwTvG4c+rxqqkTp8plHR8QdkrqR/Y2+l3ddlSoNA7It8EhEfJSm7Qx0iYhZuRZnGxwHQplIqgH2qDs5TVI7oCYi+udbWeWRNC0i9s27DrMvG19Cs3weBu6QdJCkA8k21/+Yc02V6k+Sfiypt6Qt6m55F2XW1nkLoUxSV8cpZDvvBDwCXJfG4rEmSEdu1ecjtsxamQPBzMwAdxmVjaS+ku6S9IKkV+pueddViSRtKuk8Sdekx30lfTvvuszaOgdC+dwAXE12nPwBwI1kw2Fb091ANirn19PjWuDi/Mox+3JwIJRPpzScsCLitXRGqEc6bZ4dI+JSsou6EBEfU/xMcDMrI5+YVj4r0o7llyRNAN4kG0TMmu5TSZ1IA9xJ2pGC8aHMrHV4p3KZpJEl5wObAxeRjeF/aUQ8k2thFUjSN4HzgH5kR2sNJ7tAzhN51mXW1jkQbIOUrlG9N1lX0TMR8U7OJZm1ee4yaiFJU9c1PyK+s75qaWP2I7vsY5BdOe3efMsxa/u8hdBCkpaQXYRkCvAs9XZ+RsSTedRVyST9DtiJ7D0FOBZ4OSLOyK8qs7bPgdBCacyibwKjgd2BB4ApETEv18IqmKR5wICCcaE2Ap73uFBmrcuHnbZQRHwWEX+MiBPI+rwXAE9I+mHOpVWyF4HtCx73BmpyqsXsS8P7EMpA0ibA4WRbCVXAFcA9edZUiSTdR7bPoBswX9JzadYQYHpuhZl9SbjLqIUkTQYGAA8Bt0XE3JxLqliS9is2mWzn8mh3GZm1LgdCC0laDXyUHha+mSIbobPr+q+q8kkaCHwf+B7wKnBPRFyZb1VmbZu7jFooIrwfpkzSlbxGkXW9LQVuJ/vSckCuhZl9SXgLwTYYaWvrKWBcRCxI017xdRDM1g9/u7UNyT8CfwMel3StpLqLDZnZeuAtBNvgSOoMHEnWdXQgMBm4NyIeybUwszbOgWAbtHQt5WOAYyPCw4mbtSIHgpmZAd6HYGZmiQPBzMwAB4KZmSUOBDMzAxwIZl8gqUrS/HQuxDxJj0jqJOlkSTMkzZF0t6RNU/tJkq6W9LikVyTtJ+n6tI5JBes9RNJ0SbMk3SmpS24v0qwIB4JZcX2Bq9KAeu+RnTR3T0QMiYg9yK6fPa6gfXeycybOAu4D/h3oD+wmaaCkHmTXiT44IgYB1cCP1turMSuBxzIyK+7ViJid7s8kG9Z8gKSLgc2BLsDDBe3vi4iQ9DywOCKehzUX+6kCegH9gD9LAtgYD+ltGxgHgllxnxTc/wzoBEwCjoyIOZLGAvsXab+63rKryf7PPgP+FBGjW6lesxZzl5FZ6TYDFknqAPygics+AwyXtBOApE3T6K5mGwwHglnpfgY8C/wJ+EtTFoyIJcBYYIqkGrKA2KXcBZq1hIeuMDMzwFsIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMD4H8Be5sHHvGFObgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Plot charts using the .plot() method \n", + "\n", + "towns_df.plot(x = 'name', y = 'pubs_per_capita', kind = 'bar', title = 'Some great towns to visit')" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\charl\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " after removing the cwd from sys.path.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubspubs_per_capitapeople_per_pubto_visit
3Machynlleth2235480.02147746.562500yes!
2Abergavenny125151980.01582163.207071no
1Swansea23030016800.007295137.083333no
0Cardiff33514521000.006266159.592857no
\n", + "
" + ], + "text/plain": [ + " name population n_pubs pubs_per_capita people_per_pub to_visit\n", + "3 Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "2 Abergavenny 12515 198 0.015821 63.207071 no\n", + "1 Swansea 230300 1680 0.007295 137.083333 no\n", + "0 Cardiff 335145 2100 0.006266 159.592857 no" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Modify values\n", + "\n", + "towns_df['to_visit'] = 'no'\n", + "towns_df.to_visit[towns_df.people_per_pub <50] = 'yes!'\n", + "towns_df" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2100\n", + "Name: n_pubs, dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# More on .loc and .iloc\n", + "# .loc is a very flexible indexer. You can pass it pairs of (row, col) indexers to get a specific value:\n", + "towns_df.loc[towns_df['name'] == 'Cardiff', 'n_pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationn_pubspubs_per_capitapeople_per_pubto_visit
3Machynlleth2235480.02147746.562500yes!
2Abergavenny125151980.01582163.207071no
1Swansea23030016800.007295137.083333no
0Cardiff33514500.006266159.592857no
\n", + "
" + ], + "text/plain": [ + " name population n_pubs pubs_per_capita people_per_pub to_visit\n", + "3 Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "2 Abergavenny 12515 198 0.015821 63.207071 no\n", + "1 Swansea 230300 1680 0.007295 137.083333 no\n", + "0 Cardiff 335145 0 0.006266 159.592857 no" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can also use this to set values:\n", + "towns_df.loc[towns_df['name'] == 'Cardiff', 'n_pubs'] = 0\n", + "towns_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "#.iloc is used to get the row by its index - the special column to the furthest left.\n", + "# It only works with integer indexers, unlike .loc" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name Cardiff\n", + "population 335145\n", + "n_pubs 0\n", + "pubs_per_capita 0.00626594\n", + "people_per_pub 159.593\n", + "to_visit no\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df.iloc[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "# you can change this index column by setting a new one:" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationn_pubspubs_per_capitapeople_per_pubto_visit
name
Machynlleth2235480.02147746.562500yes!
Abergavenny125151980.01582163.207071no
Swansea23030016800.007295137.083333no
Cardiff33514500.006266159.592857no
\n", + "
" + ], + "text/plain": [ + " population n_pubs pubs_per_capita people_per_pub to_visit\n", + "name \n", + "Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "Abergavenny 12515 198 0.015821 63.207071 no\n", + "Swansea 230300 1680 0.007295 137.083333 no\n", + "Cardiff 335145 0 0.006266 159.592857 no" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df = towns_df.set_index('name')\n", + "towns_df" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Cannot index by location index with a non-integer key", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtowns_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Cardiff'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1498\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1499\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1500\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1501\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1502\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 2224\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2225\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2226\u001b[1;33m raise TypeError(\"Cannot index by location index with a \"\n\u001b[0m\u001b[0;32m 2227\u001b[0m \"non-integer key\")\n\u001b[0;32m 2228\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: Cannot index by location index with a non-integer key" + ] + } + ], + "source": [ + "# this will fail as Cardiff is not an indexer\n", + "towns_df.iloc['Cardiff']" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "335145" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# reformatting the towns_df DataFrame to make 'name' the index allows me to make calls like this using loc:\n", + "towns_df.loc['Cardiff','population']" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "# indexes can be reset at any time:\n", + "towns_df = towns_df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "# You can also select multiple columns at a time:" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
0Machynlleth2235
1Abergavenny12515
2Swansea230300
3Cardiff335145
\n", + "
" + ], + "text/plain": [ + " name population\n", + "0 Machynlleth 2235\n", + "1 Abergavenny 12515\n", + "2 Swansea 230300\n", + "3 Cardiff 335145" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df[['name','population']]" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=4, step=1)" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can see the index by calling it directly:\n", + "towns_df.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 Pandas II" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "# You can join pandas dataframes together in many ways" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "df_A = pd.DataFrame({\n", + " 'name':towns,\n", + " 'population':populations\n", + "})\n", + "\n", + "df_B = pd.DataFrame({\n", + " 'name':towns,\n", + " 'pubs':number_of_pubs\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationnamepubs
0Cardiff335145Cardiff2100
1Swansea230300Swansea1680
2Abergavenny12515Abergavenny198
3Machynlleth2235Machynlleth48
\n", + "
" + ], + "text/plain": [ + " name population name pubs\n", + "0 Cardiff 335145 Cardiff 2100\n", + "1 Swansea 230300 Swansea 1680\n", + "2 Abergavenny 12515 Abergavenny 198\n", + "3 Machynlleth 2235 Machynlleth 48" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df_A, df_B], axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\charl\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationpubs
0Cardiff335145.0NaN
1Swansea230300.0NaN
2Abergavenny12515.0NaN
3Machynlleth2235.0NaN
0CardiffNaN2100.0
1SwanseaNaN1680.0
2AbergavennyNaN198.0
3MachynllethNaN48.0
\n", + "
" + ], + "text/plain": [ + " name population pubs\n", + "0 Cardiff 335145.0 NaN\n", + "1 Swansea 230300.0 NaN\n", + "2 Abergavenny 12515.0 NaN\n", + "3 Machynlleth 2235.0 NaN\n", + "0 Cardiff NaN 2100.0\n", + "1 Swansea NaN 1680.0\n", + "2 Abergavenny NaN 198.0\n", + "3 Machynlleth NaN 48.0" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df_A, df_B], axis = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "# These don't look correct! (N.B. - but they would be if each DF contained the same columns). \n", + "# The way to get around this is to assign a common index that both frames share" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "df_A = df_A.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "df_B = df_B.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
population
name
Cardiff335145
Swansea230300
Abergavenny12515
Machynlleth2235
\n", + "
" + ], + "text/plain": [ + " population\n", + "name \n", + "Cardiff 335145\n", + "Swansea 230300\n", + "Abergavenny 12515\n", + "Machynlleth 2235" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_A" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pubs
name
Cardiff2100
Swansea1680
Abergavenny198
Machynlleth48
\n", + "
" + ], + "text/plain": [ + " pubs\n", + "name \n", + "Cardiff 2100\n", + "Swansea 1680\n", + "Abergavenny 198\n", + "Machynlleth 48" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_B" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [], + "source": [ + "df_A['pubs'] = df_B['pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100
Swansea2303001680
Abergavenny12515198
Machynlleth223548
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100\n", + "Swansea 230300 1680\n", + "Abergavenny 12515 198\n", + "Machynlleth 2235 48" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_A" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
03351452100
12303001680
212515198
3223548
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "0 335145 2100\n", + "1 230300 1680\n", + "2 12515 198\n", + "3 2235 48" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can also do this same operation via '.merge', a method of DataFrames, should they have the same index\n", + "df_A.merge(df_B, how = 'inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "# What if these dataFrames aren't the same size?" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = pd.DataFrame({\n", + " 'name':towns,\n", + " 'population':populations\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [], + "source": [ + "df_D = pd.DataFrame({'name':'Winchester','population':40005}, index = [4])" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
4Winchester40005
\n", + "
" + ], + "text/plain": [ + " name population\n", + "4 Winchester 40005" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_D" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = df_C.append(df_D)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
0Cardiff335145
1Swansea230300
2Abergavenny12515
3Machynlleth2235
4Winchester40005
\n", + "
" + ], + "text/plain": [ + " name population\n", + "0 Cardiff 335145\n", + "1 Swansea 230300\n", + "2 Abergavenny 12515\n", + "3 Machynlleth 2235\n", + "4 Winchester 40005" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = df_C.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
population
name
Cardiff335145
Swansea230300
Abergavenny12515
Machynlleth2235
Winchester40005
\n", + "
" + ], + "text/plain": [ + " population\n", + "name \n", + "Cardiff 335145\n", + "Swansea 230300\n", + "Abergavenny 12515\n", + "Machynlleth 2235\n", + "Winchester 40005" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pubs
name
Cardiff2100
Swansea1680
Abergavenny198
Machynlleth48
\n", + "
" + ], + "text/plain": [ + " pubs\n", + "name \n", + "Cardiff 2100\n", + "Swansea 1680\n", + "Abergavenny 198\n", + "Machynlleth 48" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_B" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "df_C['pubs'] = df_B['pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100.0
Swansea2303001680.0
Abergavenny12515198.0
Machynlleth223548.0
Winchester40005NaN
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100.0\n", + "Swansea 230300 1680.0\n", + "Abergavenny 12515 198.0\n", + "Machynlleth 2235 48.0\n", + "Winchester 40005 NaN" + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C['pubs'].loc['Winchester']" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.float64" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df_C['pubs'].loc['Winchester'])" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [], + "source": [ + "df_C['pubs'] = df_C['pubs'].fillna(850)" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100.0
Swansea2303001680.0
Abergavenny12515198.0
Machynlleth223548.0
Winchester40005850.0
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100.0\n", + "Swansea 230300 1680.0\n", + "Abergavenny 12515 198.0\n", + "Machynlleth 2235 48.0\n", + "Winchester 40005 850.0" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.1 Read data from files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a dataframe using read_csv()\n", + "# Here, we would use os.path.join() to \n", + "df = pd.read_csv('pluto_18v2_1.csv')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df[::20]\n", + "df.shape\n", + "df.to_csv('pluto_shortened.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('pluto_shortened.csv')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.2 Save data back to files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(os.path.join( [ your file location here!! ]) )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/week 3/Week 3 - Pandas.ipynb b/week 3/Week 3 - Pandas.ipynb index 8b9e3a0..23ea750 100644 --- a/week 3/Week 3 - Pandas.ipynb +++ b/week 3/Week 3 - Pandas.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 54, "metadata": {}, "outputs": [ { @@ -44,7 +44,7 @@ "dtype: object" ] }, - "execution_count": 5, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -93,7 +93,7 @@ "dtype: object" ] }, - "execution_count": 128, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -106,7 +106,28 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['name', 'population'], dtype='object')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can access the column names list as so:\n", + "towns_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -172,7 +193,7 @@ "3 Machynlleth 2235 48" ] }, - "execution_count": 129, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -196,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -248,7 +269,7 @@ "1 Swansea 230300 1680" ] }, - "execution_count": 130, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -261,16 +282,16 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(4, 5)" + "(4, 3)" ] }, - "execution_count": 154, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -282,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 62, "metadata": {}, "outputs": [ { @@ -295,7 +316,7 @@ "Name: population, dtype: int64" ] }, - "execution_count": 131, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -308,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -321,52 +342,20 @@ "Name: population, dtype: int64" ] }, - "execution_count": 132, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Or dot notation\n", + "# Or dot notation WARNING: not available in all circumstances, e.g. when defining a new column\n", "\n", "towns_df.population" ] }, { "cell_type": "code", - "execution_count": 133, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2100\n", - "1 1680\n", - "2 198\n", - "Name: n_pubs, dtype: int64" - ] - }, - "execution_count": 133, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Standard Python indexing works\n", - "\n", - "towns_df.n_pubs[:3]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1.3 Select and manipulate data" - ] - }, - { - "cell_type": "code", - "execution_count": 134, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -397,6 +386,18 @@ " \n", " \n", " \n", + " 0\n", + " Cardiff\n", + " 335145\n", + " 2100\n", + " \n", + " \n", + " 1\n", + " Swansea\n", + " 230300\n", + " 1680\n", + " \n", + " \n", " 2\n", " Abergavenny\n", " 12515\n", @@ -408,23 +409,80 @@ ], "text/plain": [ " name population n_pubs\n", + "0 Cardiff 335145 2100\n", + "1 Swansea 230300 1680\n", "2 Abergavenny 12515 198" ] }, - "execution_count": 134, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Use Boolean indexing to inspect values based on a condition\n", + "# Standard Python indexing works in the row direction\n", "\n", - "towns_df[towns_df.name == 'Abergavenny']" + "towns_df[:3]" ] }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2100\n", + "1 1680\n", + "2 198\n", + "3 48\n", + "Name: n_pubs, dtype: int64" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# typically, column indexing should come first\n", + "towns_df['n_pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2100\n", + "1 1680\n", + "2 198\n", + "Name: n_pubs, dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df['n_pubs'][:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 Select and manipulate data" + ] + }, + { + "cell_type": "code", + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -451,72 +509,50 @@ " name\n", " population\n", " n_pubs\n", - " pubs_per_capita\n", - " people_per_pub\n", " \n", " \n", " \n", " \n", - " 0\n", - " Cardiff\n", - " 335145\n", - " 2100\n", - " 0.006266\n", - " 159.592857\n", - " \n", - " \n", - " 1\n", - " Swansea\n", - " 230300\n", - " 1680\n", - " 0.007295\n", - " 137.083333\n", - " \n", - " \n", " 2\n", " Abergavenny\n", " 12515\n", " 198\n", - " 0.015821\n", - " 63.207071\n", - " \n", - " \n", - " 3\n", - " Machynlleth\n", - " 2235\n", - " 48\n", - " 0.021477\n", - " 46.562500\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name population n_pubs pubs_per_capita people_per_pub\n", - "0 Cardiff 335145 2100 0.006266 159.592857\n", - "1 Swansea 230300 1680 0.007295 137.083333\n", - "2 Abergavenny 12515 198 0.015821 63.207071\n", - "3 Machynlleth 2235 48 0.021477 46.562500" + " name population n_pubs\n", + "2 Abergavenny 12515 198" ] }, - "execution_count": 135, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "# Use Boolean indexing to inspect values based on a condition\n", + "\n", + "towns_df.loc[towns_df.name == 'Abergavenny']" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], "source": [ "# Create a new column with math outputs\n", "\n", "towns_df['pubs_per_capita'] = towns_df.n_pubs / towns_df.population\n", - "towns_df['people_per_pub'] = towns_df.population / towns_df.n_pubs\n", - "\n", - "towns_df" + "towns_df['people_per_pub'] = towns_df.population / towns_df.n_pubs" ] }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -583,7 +619,7 @@ "3 Machynlleth 2235 48 0.021477 46.562500" ] }, - "execution_count": 136, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } @@ -591,12 +627,12 @@ "source": [ "# Use a single column's value to select data\n", "\n", - "towns_df[towns_df.people_per_pub < 150]" + "towns_df.loc[towns_df.people_per_pub < 150]" ] }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ @@ -608,22 +644,22 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 151, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAFPCAYAAABNkrmkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XmcFPWd//HXWw5BEDSg2SDoEMGooKJymOCtQV01uK6uGI0XK/HKrkbz0OSn0Xjkp8Zs1tv1BFFBxbjBKxrXAzciMhhA8UhQUScYD7xARUE++0d9hzRDD9Mz0zM1Pbyfj0c/prvqW9Wf7pnpd9e3qr6liMDMzGydvAswM7O2wYFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMysCSTtIumVEtr9TNKNrVGTNZ8DwYqStLOkpyV9LOkDSX+UNCzvulqTpPGSLmygTUga0Fo1NYak3SXVtMS6I+KpiPhWCe1+GRH/muqpSu9Xx5aoyZrPvxhbjaQewP3AicBdQGdgF+CLPOtqLEkdI2J53nWYVYyI8M23VW7AUOCjNcxfBzgbeAN4F7gV6JnmVQEBHAu8BXwInAAMA+YCHwFX1VnfccBLqe3DwGZreO6j0vMuAs4BFgB7p3nnAVOA24BPgH9NtZ4FvJqWuQv4WsH67gb+BnwMTAMGpenjgGXAl8AS4L4itUxLr/XT1OawNP14YD7wATAV6JOm/wK4Mt3vlJa7ND3uCiwFNix4D48G3gTeB/5fwfMOB6rTa3wH+I8itXUDPgdWpNqWAH2AdYH/BBam238C6xZZft30uxpcMG2jtM6Ngd2BmoJ5ZwJ/BRYDrwB7FfxObkv330yvq7aeb+f9t+5bnd973gX41vZuQI/04TkB2A/YsM7849IH3jeB7sBvgYlpXu2H2XVAF2BU+qD77/RBsglZiOyW2h+U1rUV2Rbr2cDT9dS1dfog2Zlsq+Wy9KFdGAjL0jrXSR+ypwLPAH3Th9x/AZPqvJb1Cz4oZxfMGw9c2MB7FcCAgsd7pg/wHdI6rwSmFcx7Pt3/DllIzSiYN6fOe3hDeg3bkW2dbZXmTwd+kO53B3aqp7ZVPrTTtPPT+7Ex2Qf808AF9Sx/M3BRweOTgd/XXTfwLbLw71NQ/+YFv5Pb6ryujnn/jftWz99z3gX41jZv6QN6PFADLCf7pvv1NO9/gJMK2n4rfRB3LPin36Rg/iLSt+f0+B7g1HT/IWBswbx1gM8ospUA/LzOh/l6ZN/gCwNhWp1lXiJ9W02Pv1Fba5H1b5Bqr93aGU/jA+Em0rf+9Lh7er4q/r4V0Itsq+Vn6f3tTrb1cEVapvY97FuwnmeBMen+tNS+dwO1FQuEV4F/LHi8D7CgnuX3Bl4rePxH4Ki66wYGkIX83kCnOutwIFTQzTuVraiIeCkijomIvsBgsu6G/0yz+5B129R6gywMvl4w7Z2C+58Xedw93d8MuFzSR5I+IutmEdmWRF19yL6J1tb4GVnYFHqrzuPNgHsL1v8S8BXwdUkdJF0s6VVJn5B1PwH0LvLcpVrlvYmIJanGTSLic7Kunt2AXYEnyb6hj0zTnqyzrr8V3P+Mv79nY4EtgJclzZR0QFPrS/f71NP2MaCrpBGSNgOGAPfWbRQR88m2xM4D3pU0WVJ967Q2zIFgDYqIl8m+LQ9OkxaSfdDW2pRsK+IdGu8t4IcRsUHBrWtEPF2k7dtkXT8ASOpK9m17lXKLrH+/OuvvEhF/Bb4PjCb7ZtuT7BssZIFUbF2lWOW9kdQt1fjXNOlJsu6h7YGZ6fE+ZPsFppXyBBHxl4g4nKzb5xJgSnqe1Zo2VB/Z725hPc+zgmyfy+Fk79X9EbG4nrZ3RMTOad2R6iqlHmtDHAi2GklbSjpdUt/0uB/Zh8Izqckk4DRJ/SV1B34J3BlNO6LnOuCnkgal5+op6dB62k4BDpT0HUmdybpNVE/bwvVflL7hImkjSaPTvPXJ+uYXkXU//bLOsu+Q7SdZk7pt7gCOlTRE0rppnTMiYkGa/yTZjvEXI+JL4Amynd+vR8R7DTwX6TUcKWmj9IH9UZr8VT219ZLUs2DaJODs9D70JuuGu20NT3cHcBhwRLpfrJ5vSdozvd6lZFuAxep5j2wnd0PvqeXEgWDFLAZGADMkfUoWBC8Ap6f5NwMTyb7Rvk72IfCjpjxRRNxL9m1ycuq2eYFsR3axtvPS80wm21pYTNZ3vabDYS8n2//xiKTF6bWMSPNuJesy+SvwIn8PvFo3AVun7qb/rmf95wETUpt/iYj/ITv66Z5U4+bAmIL2T5PtS6jdGniR7P0raesg2ReYJ2lJen1jImJp3UZpy24S8Fqqrw9wIVm31VzgeeC5NK2oiJhBdjRUH7L9PcWsC1xMtjP9b2RbLj8rsq7PgIuAP6Z6dirt5VprUYS34qwypa2Tj4CBEfF63vWYVTpvIVhFkXSgpPVSn/llZN9yF+RblVn74ECwSjOav59UNZCsu8SbuWZl4C4jMzMDvIVgZmaJA8HMzIAKG+20d+/eUVVVlXcZZmYVZdasWe9HxEYNtauoQKiqqqK6ujrvMszMKoqkNxpu5S4jMzNLHAhmZgY4EMzMLKmofQhmVj7Lli2jpqaGpUtXGwbJKlSXLl3o27cvnTp1atLyDgSztVRNTQ3rr78+VVVVSA0NGmttXUSwaNEiampq6N+/f5PW4S4js7XU0qVL6dWrl8OgnZBEr169mrXF50AwW4s5DNqX5v4+HQhmZgZ4HwJVZz2QdwkNWnDx/nmXYGuBcv8vtMTf7XnnnUf37t0544wzyr7utuC6665jvfXW46ijjmL8+PGMGjWKPn1a7/LUa30gmJmVavny5XTs2HIfmyeccMLK++PHj2fw4MGtGgjuMjKz3CxYsIAtt9ySo48+mm233ZZDDjmEzz77jKqqKt5//30Aqqur2X333VcuM2fOHPbcc08GDhzIDTfcAMDbb7/NrrvuypAhQxg8eDBPPfVUvc/ZvXt3Tj/9dHbYYQf22msv3nsvu5T1q6++yr777suOO+7ILrvswssvvwzAMcccw49//GP22GMPzjzzzKLrXLJkCcceeyzbbLMN2267Lffccw8AJ554IkOHDmXQoEGce+65K9tXVVVx5plnMnz4cIYPH878+fOBbAvosssuY8qUKVRXV3PEEUcwZMgQPv/8c84//3yGDRvG4MGDGTduHC1x6QIHgpnl6pVXXmHcuHHMnTuXHj16cM0116yx/dy5c3nggQeYPn06559/PgsXLuSOO+5gn332Yfbs2cyZM4chQ4bUu/ynn37KDjvswHPPPcduu+3GL37xCwDGjRvHlVdeyaxZs7jssss46aSTVi7z5z//mUcffZRf//rXRdd5wQUX0LNnT55//nnmzp3LnnvuCcBFF11EdXU1c+fO5cknn2Tu3Lkrl+nRowfPPvssp5xyCqeeeuoq6zvkkEMYOnQot99+O7Nnz6Zr166ccsopzJw5kxdeeIHPP/+c+++/f81vbBO4y8jMctWvXz9GjhwJwJFHHskVV1yxxvajR4+ma9eudO3alT322INnn32WYcOGcdxxx7Fs2TIOOuigNQbCOuusw2GHHbby+Q4++GCWLFnC008/zaGHHrqy3RdffLHy/qGHHkqHDh3qXeejjz7K5MmTVz7ecMMNAbjrrru4/vrrWb58OW+//TYvvvgi2267LQCHH374yp+nnXbaGl8zwOOPP86ll17KZ599xgcffMCgQYM48MADG1yuMRwIZparuodKSqJjx46sWLECYLXj6ou133XXXZk2bRoPPPAAP/jBD/jJT37CUUcdVfLzr1ixgg022IDZs2cXbdOtW7c1riMiVqvr9ddf57LLLmPmzJlsuOGGHHPMMau8lsL2DR0uunTpUk466SSqq6vp168f5513XoucYe4uIzPL1Ztvvsn06dMBmDRpEjvvvDNVVVXMmjULYGV/fK3f/e53LF26lEWLFvHEE08wbNgw3njjDTbeeGOOP/54xo4dy3PPPVfv861YsYIpU6YAcMcdd7DzzjvTo0cP+vfvz9133w1kH/Bz5swp+TWMGjWKq666auXjDz/8kE8++YRu3brRs2dP3nnnHR566KFVlrnzzjtX/vz2t7+92jrXX399Fi9eDPw9FHv37s2SJUtW1l9u3kIwMyC/w5u32morJkyYwA9/+EMGDhzIiSeeyPDhwxk7diy//OUvGTFixCrthw8fzv7778+bb77JOeecQ58+fZgwYQK/+tWv6NSpE927d+fWW2+t9/m6devGvHnz2HHHHenZs+fKD+bbb7+dE088kQsvvJBly5YxZswYtttuu5Jew9lnn83JJ5/M4MGD6dChA+eeey4HH3ww22+/PYMGDeKb3/zmym6xWl988QUjRoxgxYoVTJo0abV1HnPMMZxwwgl07dqV6dOnc/zxx7PNNttQVVXFsGHDSqqrsdQSe6pbytChQ6PcF8jxeQi2tnrppZfYaqutcq1hwYIFHHDAAbzwwgut9pzdu3dnyZIlrfZ8xdRe7Kt3795lX3ex36ukWRExtKFl3WVkZmaAu4zMLEdVVVUttnUwYsSIVY4UApg4cWKztg5uueUWLr/88lWmjRw5kquvvrpR61mwYEGTa2hJJQWCpH2By4EOwI0RcXGd+esCtwI7AouAwyJigaTvAhcDnYEvgZ9ExGNpmR2B8UBX4EHg36OS+q/MrE2bMWNG2dd57LHHcuyxx5Z9vW1Fg11GkjoAVwP7AVsDh0vauk6zscCHETEA+A1wSZr+PnBgRGwDHA1MLFjmWmAcMDDd9m3G6zCzJvB3sPalub/PUvYhDAfmR8RrEfElMBkYXafNaGBCuj8F2EuSIuJPEbEwTZ8HdJG0rqRvAD0iYnraKrgVOKhZr8TMGqVLly4sWrTIodBO1F4gp0uXLk1eRyldRpsAbxU8rgFG1NcmIpZL+hjoRbaFUOufgT9FxBeSNknrKVznJsWeXNI4si0JNt100xLKNbNS9O3bl5qampVj+Vjlq72EZlOVEgjFTqGr+5VijW0kDSLrRhrViHVmEyOuB66H7LDThoo1s9J06tSpyZdatPaplC6jGqBfweO+wML62kjqCPQEPkiP+wL3AkdFxKsF7QtjrNg6zcysFZUSCDOBgZL6S+oMjAGm1mkzlWynMcAhwGMREZI2AB4AfhoRf6xtHBFvA4sl7aRsEI+jgN8187WYmVkzNBgIEbEcOAV4GHgJuCsi5kk6X9L3UrObgF6S5gM/Bs5K008BBgDnSJqdbhuneScCNwLzgVeBVQf6MDOzVlXSeQgR8SDZuQKF035ecH8pcGiR5S4ELqxnndXA4MYUa2ZmLcdDV5iZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZgB0zLsAaz+qznog7xJKsuDi/fMuwaxN8haCmZkBDgQzM0scCGZmBpQYCJL2lfSKpPmSzioyf11Jd6b5MyRVpem9JD0uaYmkq+os80Ra5+x027gcL8jMzJqmwZ3KkjoAVwPfBWqAmZKmRsSLBc3GAh9GxABJY4BLgMOApcA5wOB0q+uIiKhu5mswM7MyKGULYTgwPyJei4gvgcnA6DptRgMT0v0pwF6SFBGfRsT/kgWDmZm1YaUEwibAWwWPa9K0om0iYjnwMdCrhHXfkrqLzpGkEtqbmVkLKSUQin1QRxPa1HVERGwD7JJuPyj65NI4SdWSqt97770GizUzs6YpJRBqgH4Fj/sCC+trI6kj0BP4YE0rjYi/pp+LgTvIuqaKtbs+IoZGxNCNNtqohHLNzKwpSgmEmcBASf0ldQbGAFPrtJkKHJ3uHwI8FhH1biFI6iipd7rfCTgAeKGxxZuZWfk0eJRRRCyXdArwMNABuDki5kk6H6iOiKnATcBESfPJtgzG1C4vaQHQA+gs6SBgFPAG8HAKgw7Ao8ANZX1lZmbWKCWNZRQRDwIP1pn284L7S4FD61m2qp7V7lhaiWZm1hp8prKZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZkCJgSBpX0mvSJov6awi89eVdGeaP0NSVZreS9LjkpZIuqrOMjtKej4tc4UkleMFmZlZ0zQYCJI6AFcD+wFbA4dL2rpOs7HAhxExAPgNcEmavhQ4BzijyKqvBcYBA9Nt36a8ADMzK49SthCGA/Mj4rWI+BKYDIyu02Y0MCHdnwLsJUkR8WlE/C9ZMKwk6RtAj4iYHhEB3Aoc1JwXYmZmzVNKIGwCvFXwuCZNK9omIpYDHwO9GlhnTQPrNDOzVlRKIBTr248mtGlSe0njJFVLqn7vvffWsEozM2uOUgKhBuhX8LgvsLC+NpI6Aj2BDxpYZ98G1glARFwfEUMjYuhGG21UQrlmZtYUpQTCTGCgpP6SOgNjgKl12kwFjk73DwEeS/sGioqIt4HFknZKRxcdBfyu0dWbmVnZdGyoQUQsl3QK8DDQAbg5IuZJOh+ojoipwE3AREnzybYMxtQuL2kB0APoLOkgYFREvAicCIwHugIPpZuZmeWkwUAAiIgHgQfrTPt5wf2lwKH1LFtVz/RqYHCphZqZWcvymcpmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmYAdMy7ADMrruqsB/IuoSQLLt4/7xKsTLyFYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAz1Q2s7WEz/xumLcQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzA0oMBEn7SnpF0nxJZxWZv66kO9P8GZKqCub9NE1/RdI+BdMXSHpe0mxJ1eV4MWZm1nQNnpgmqQNwNfBdoAaYKWlqRLxY0Gws8GFEDJA0BrgEOEzS1sAYYBDQB3hU0hYR8VVabo+IeL+Mr8fMzJqolC2E4cD8iHgtIr4EJgOj67QZDUxI96cAe0lSmj45Ir6IiNeB+Wl9ZmbWxpQSCJsAbxU8rknTiraJiOXAx0CvBpYN4BFJsySNa3zpZmZWTqWMZaQi06LENmtadmRELJS0MfAHSS9HxLTVnjwLi3EAm266aQnlmplZU5SyhVAD9Ct43BdYWF8bSR2BnsAHa1o2Imp/vgvcSz1dSRFxfUQMjYihG220UQnlmplZU5QSCDOBgZL6S+pMtpN4ap02U4Gj0/1DgMciItL0MekopP7AQOBZSd0krQ8gqRswCnih+S/HzMyaqsEuo4hYLukU4GGgA3BzRMyTdD5QHRFTgZuAiZLmk20ZjEnLzpN0F/AisBw4OSK+kvR14N5svzMdgTsi4vct8PrMzKxEJV0PISIeBB6sM+3nBfeXAofWs+xFwEV1pr0GbNfYYs3MrOX4TGUzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJSUFgqR9Jb0iab6ks4rMX1fSnWn+DElVBfN+mqa/ImmfUtdpZmatq8FAkNQBuBrYD9gaOFzS1nWajQU+jIgBwG+AS9KyWwNjgEHAvsA1kjqUuE4zM2tFpWwhDAfmR8RrEfElMBkYXafNaGBCuj8F2EuS0vTJEfFFRLwOzE/rK2WdZmbWijqW0GYT4K2CxzXAiPraRMRySR8DvdL0Z+osu0m639A6AZA0DhiXHi6R9EoJNeetN/B+uVamS8q1popU1vcS/H7i97OcKuX93KyURqUEgopMixLb1De92JZJ3XVmEyOuB65fU4FtjaTqiBiadx3tgd/L8vL7WV7t7f0spcuoBuhX8LgvsLC+NpI6Aj2BD9awbCnrNDOzVlRKIMwEBkrqL6kz2U7iqXXaTAWOTvcPAR6LiEjTx6SjkPoDA4FnS1ynmZm1oga7jNI+gVOAh4EOwM0RMU/S+UB1REwFbgImSppPtmUwJi07T9JdwIvAcuDkiPgKoNg6y//yclNRXVxtnN/L8vL7WV7t6v1U9kXezMzWdj5T2czMAAeCmZklDgQzMwMcCNbGSPpa3jWY1SVpYvr573nX0pK8U7mM0hhNX6fg6K2IeDO/iiqPpL8As4FbgIfCf6BlIWl/sjHFutROi4jz86uoskh6kWzstanA7tQ56TYiPsihrLIr5UxlK4GkHwHnAu8AK9LkALbNrajKtAWwN3AccKWkO4HxEfHnfMuqXJKuA9YD9gBuJDtX6Nlci6o81wG/B74JzGLVQIg0veJ5C6FM0jkYIyJiUd61tBeS9gBuA7oBc4CzImJ6vlVVHklzI2Lbgp/dgd9GxKi8a6sUkvpHxOuSro2IE/Oup6V4H0L5vAV8nHcRlU5SL0n/LqkaOAP4EdkAYqcDd+RaXOX6PP38TFIfYBnQP8d6KtGU9HOLXKtoYe4yaiZJP053XwOekPQA8EXt/Ij4j1wKq1zTgYnAQRFRUzC9OnV9WOPdL2kD4FfAc2RdHDfmW1LFWUfSucAWBf/zK7WX/3MHQvOtn36+mW6d0w3qGcHV1uhb9e1Ijoi1e6DlJoqIC9LdeyTdD3SJCG/NNs4Y4CCyz8z1G2hbsbwPoUwkHRoRdzc0zdZM0hZkXUVVrHq01p551VTpJK1H1uW2aUQcL2kgWfDen3NpFUfSfhHxUN51tBQHQplIei4idmhomq2ZpDlkR3TMAr6qnR4Rs3IrqsKlI7VmAUdFxGBJXYHpETEk59IqhqQjI+I2SadTZMvfXUYGZN8YgH8ENpF0RcGsHmQjvFrjLI+Ia/Muop3ZPCIOk3Q4QER8ni5xa6Xrln52z7WKFuZAaL6FQDXwPbJvYbUWA6flUlFlu0/SScC9rLpzvl2c+JOTL9NWQQBI2pyC99YaFhH/lX7+Iu9aWpK7jMpEUieygN00Iirhus9tkqTXi0yOiGgXJ/7kQdJ3gbOBrYFHgJHAMRHxRJ51VZI6W/+riYh/a61aWpIDoUwkHQhcBnSOiP6ShgDnR8T3ci7NDEm9gJ3IzrB9JiLKemH49k5S7RUhR5IF653p8aHArIhoF70BDoQykTQL2BN4IiK2T9PmRoSHrmgkSd9h9aOMbs2toAonaSQwOyI+lXQksANweUS8kXNpFUfS48CoiFiWHncCHomIPfKtrDx8pnL5LPex3c2XRpW8DNgZGJZuQ3MtqvJdS3aW8nbAT4A3AAds0/Rh1fMQuqdp7YJ3KpfPC5K+D3RIx3n/G/B0zjVVoqHA1h7ltKyWR0RIGg1cERE3FXSBWONcDPwpbSkA7Aacl1855eUthPL5Ednwwl8Ak4BPgFNzragyvQD8Q95FtDOLJf0UOBJ4IA3T3innmipOOlT3UWAE2VFw9wLfjogJuRZWRt6HYG1K+uY1hGx45sLDTr1zvokk/QPwfWBmRDwlaVNgd++XaTxJsyJix7zraCkOhGaSdB9rGLPIH2SNI2m3YtMj4snWrsWsLklXk12fY2betbQEB0Iz1fcBVssfZI0naTNgYEQ8msbh6RARi/Ouq1JJOhi4BNiY7LBTkZ3b0SPXwipQunLaFmQ75j/l7+9luzia0IFgbYqk44FxwNciYvO0g/66iNgr59IqVrp404ER8VLetVS69GVlNe3lEF4fZdRMkp5nzV1G7eKbQys6GRgOzACIiL9I2jjfkireOw6D8qj94E9/k10aaF5xHAjNd0DeBbQzX0TEl7Vjr0nqiK8r0VzVacTT/2bVHfW/za+kyiTpe8Cvyc49eBfYDHiJ7AjDiudAaKb2sqnYhjwp6WdA1zQGz0nAfTnXVOl6AJ8BhddQDsCB0HgXkA0B8mhEbJ+u+314zjWVjfchlIl33JWHpHWAsWQfXgIeBm70iWrWFkiqjoih6bod20fECknPRsTwvGsrB28hlM+leMddOYwGbo2IG/IupL2Q1IUsZAdR0O8dEcflVlTl+khSd2AacLukd2lH1z3xmcrl4x135fE94M+SJkraP+1DsOaZSHb29z7Ak0Bfsut1WIkkDUiDBI4m6347Dfg9sIhslIJ2wV1GZSLpcrJ/Ou+4a6Y0guR+wGFkg9z9ISL+Nd+qKpekP6X+7rkRsW16fx/2dapLJ+l+4GcRMbfO9KHAuRFxYD6VlZe/fZWPd9yVSUQsk/QQ2fvXlexbmQOh6Zalnx9JGgz8jWx4cStdVd0wAIiIaklVrV9Oy3AglM/pvsxj80naFxgD7AE8AdwI/EueNbUD10vaEDgHmEo2ZPM5+ZZUcdZ0zkHXVquihbnLqEwk/QWYDdwCPOSjYppG0mRgMtl76Ov+WpsgaRLwWN2DHSSNJbtgzmH5VFZeDoQySUPj7g0cR3am7Z1kg2D9OdfCbK0n6VXgGeApYFpEvJhzSRVH0tfJhrv+EpiVJg8FOgP/FBF/y6u2cnIgtIB0ssptQDdgDnBWREzPt6rKIGlVYrnkAAAGpElEQVQn4EpgK7J/tg7Apz6fo+kkrUs2hv8uZNcE3hKYExH/lGthFSj9bw9OD+dFxGN51lNu3odQJuki5kcCPwDeITsUbSrZ2P53A/3zq66iXEW2D+Fusm9gRwEDcq2o8n1FtmP5K2AF2d/nu7lWVKEi4nHg8QYbVigHQvlMJzve+6CIqCmYXi3pupxqqkgRMV9Sh4j4CrhFki9F2jyfAM8D/wHcEBGLcq7H2ih3GZWJJHlHcvNJmka2L+ZGssMj3waOiYjtci2sgqVrKe9Mtm/rS7JrfU+LiP/JtTBrcxwIZSJpC+AMsuO7V255+eSfxknjzb9Lds3f04CewDURMT/XwtoBSVuSnfB3KrBxRLSbwyWtPBwIZZIGu7qO7AiEr2qnR8SsehcyawWS7iHblzWfdKQR8GxELM21MGtzvA+hfJZHxLV5F1Hp6rng0MdANXCh+79LJ2kY8BZwMfAc2UEP/0w2Iu8LgAPBVuEthGaS9LV099/IujruZdWxjHz2ciNIupRsC+uONGkM2TDYHwM7t5cxY1qDpOeAvSPiA0m7kp3w9yOyrYWtIuKQXAu0NseB0EySXif7RqsisyMivtnKJVU0SX+MiJHFpkl6PiK2yau2SiNpTu3OeElXA+9FxHnp8eyIGJJnfdb2uMuomSLC5xeUV3dJIyJiBoCk4WRj70A7Gne+lXSQ1DEilgN7AeMK5vl/31bjP4oykXQycHtEfJQebwgcHhHX5FtZxRlLdu5BbQgsBsZK6gb8//zKqkiTyC5J+j7wOdkOZSQNIOuCM1uFu4zKpNgmeO049HnVVGnS5TMPiYi7JPUk+/v8KO+6KlkaCuQbwCMR8WmatgXQPSKey7U4a3McCGUiaS6wXe3JaZI6AHMjYlC+lVUWSdMiYte86zBbG/kSmuXzMHCXpL0k7Um2uf77nGuqRH+QdIakfpK+VnvLuyiztYG3EMokdXf8kGznnYBHgBvTeDxWonTUVl0+WsusFTgQzMwMcJdR2UgaKGmKpBclvVZ7y7uuSiNpPUlnS7o+PR4o6YC86zJbGzgQyucW4FqyY+X3AG4lGw7bGucWshE5v5Me1wAX5leO2drDgVA+XdNwwoqIN9IZoR7ptPE2j4hLyS7oQkR8TvGzwM2szHxiWvksTTuW/yLpFOCvZIOIWeN8KakraYA7SZtTMDaUmbUc71QukzSy5EvABsAFZOP4XxoRz+RaWIWR9F3gbGBrsiO1RpJdIOeJPOsyWxs4EKzNSden3omsq+iZiHg/55LM1gruMmomSVPXND8ivtdatbQju5Fd8jHIrpx2b77lmK0dvIXQTJLeI7sIySRgBnV2gEbEk3nUVakkXQMMIHs/AQ4DXo2Ik/Orymzt4EBopjRm0XeBw4FtgQeASRExL9fCKpSkecDggjGh1gGe95hQZi3Ph502U0R8FRG/j4ijyfq95wNPSPpRzqVVqleATQse9wPm5lSL2VrF+xDKQNK6wP5kWwlVwBXAb/OsqdJIuo9sn0FP4CVJz6ZZw4DpuRVmthZxl1EzSZoADAYeAiZHxAs5l1SRJO1WbDLZzuXD3WVk1vIcCM0kaQXwaXpY+GaKbJTOHq1fVWWTNAT4PvAvwOvAbyPiynyrMmv/3GXUTBHh/TBlkK7iNYas220RcCfZF5Y9ci3MbC3iLQRrE9KW1lPA2IiYn6a95usgmLUef7u1tuKfgb8Bj0u6QVLthYbMrJV4C8HaFEndgIPIuo72BCYA90bEI7kWZrYWcCBYm5WupXwocFhEeChxsxbmQDAzM8D7EMzMLHEgmJkZ4EAwM7PEgWBmZoADwWw1kqokvZTOh5gn6RFJXSUdL2mmpDmS7pG0Xmo/XtK1kh6X9Jqk3STdnNYxvmC9oyRNl/ScpLsldc/tRZoV4UAwK24gcHUaVO8jshPnfhsRwyJiO7LrZ48taL8h2XkTpwH3Ab8BBgHbSBoiqTfZtaL3jogdgGrgx632asxK4LGMzIp7PSJmp/uzyIY1HyzpQmADoDvwcEH7+yIiJD0PvBMRz8PKC/5UAX2BrYE/SgLojIf1tjbGgWBW3BcF978CugLjgYMiYo6kY4Ddi7RfUWfZFWT/Z18Bf4iIw1uoXrNmc5eRWenWB96W1Ak4opHLPgOMlDQAQNJ6aYRXszbDgWBWunOAGcAfgJcbs2BEvAccA0ySNJcsILYsd4FmzeGhK8zMDPAWgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAPg/zqIXjG4NUuMAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAFPCAYAAABNkrmkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XuYFOWd9vHvLaAgCKKoK4IOUYwCKiIHE4znGNEYjKsREkWUFU9kV2Ny6eY1r0ZNXteY7KoxsmoUPOHZDZ6i0VXxjagMLIwgMaKijhJEjAdEFOS3f9Qz2Iw9TM9MD0WP9+e6+pruqqeqf90z03fXU1VPKSIwMzPbKO8CzMxsw+BAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhm1gySviHpxRLa/VTSdeujJms5B4IVJWkfSU9Lel/Su5L+LGlI3nWtT5ImSbq4kTYhaaf1VVNTSNpfUm1rrDsinoqIr5bQ7pcR8U+pnqr0frVvjZqs5fyLsS+Q1BW4HzgNuAPYGPgG8EmedTWVpPYRsSrvOswqRkT45ttaN2Aw8N465m8EnAe8BrwN3Ah0S/OqgABOBN4A/g6cCgwBaoD3gN/WW99JwPzU9mFgh3U895j0vEuBnwELgYPTvAuAu4CbgQ+Af0q1ngu8nJa5A9iiYH13An8D3gemAf3T9PHASuBTYBlwX5FapqXX+lFqc2yafjKwAHgXmAr0TNN/DlyZ7ndIy12aHncCVgDdC97DE4DXgXeA/1PwvEOB6vQaFwO/KVJbZ+BjYHWqbRnQE9gE+A/grXT7D2CTIstvkn5XAwqmbZXWuTWwP1BbMO8c4E3gQ+BF4KCC38nN6f7r6XXV1fO1vP/Wfav3e8+7AN82vBvQNX14TgZGAN3rzT8pfeB9BegC3APclObVfZhNBDoCh6QPuv9KHyTbkYXIfqn9kWldu5JtsZ4HPN1AXf3SB8k+ZFstl6UP7cJAWJnWuVH6kD0TeAbolT7k/hOYUu+1bFbwQTm7YN4k4OJG3qsAdip4fGD6AB+U1nklMK1g3vPp/tfJQurZgnlz6r2H16bXsAfZ1tmuaf504Ph0vwuwdwO1rfWhnaZdmN6Prck+4J8GLmpg+euBXxQ8PgP4Y/11A18lC/+eBfXvWPA7ubne62qf99+4bw38PeddgG8b5i19QE8CaoFVZN90t0nzHgNOL2j71fRB3L7gn367gvlLSd+e0+O7gTPT/YeAcQXzNgKWU2QrAfi/9T7MNyX7Bl8YCNPqLTOf9G01Pd62rtYi69881V63tTOJpgfC70nf+tPjLun5qvh8K2BLsq2Wn6b3twvZ1sMVaZm697BXwXqeA0al+9NS+x6N1FYsEF4GDit4/C1gYQPLHwy8UvD4z8CY+usGdiIL+YOBDvXW4UCooJt3KltRETE/IsZGRC9gAFl3w3+k2T3Jum3qvEYWBtsUTFtccP/jIo+7pPs7AJdLek/Se2TdLCLbkqivJ9k30boal5OFTaE36j3eAbi3YP3zgc+AbSS1k3SJpJclfUDW/QTQo8hzl2qt9yYilqUat4uIj8m6evYD9gWeJPuGPjxNe7Leuv5WcH85n79n44Cdgb9ImiHp282tL93v2UDb/wY6SRomaQdgIHBv/UYRsYBsS+wC4G1Jt0lqaJ22AXMgWKMi4i9k35YHpElvkX3Q1tmebCtiMU33BnBKRGxecOsUEU8XabuIrOsHAEmdyL5tr1VukfWPqLf+jhHxJvB9YCTZN9tuZN9gIQukYusqxVrvjaTOqcY306QnybqH9gRmpMffItsvMK2UJ4iIlyJiNFm3z78Bd6Xn+ULTxuoj+9291cDzrCbb5zKa7L26PyI+bKDtrRGxT1p3pLpKqcc2IA4E+wJJu0g6W1Kv9Lg32YfCM6nJFOAsSX0kdQF+CdwezTuiZyLwr5L6p+fqJumYBtreBRwh6euSNibrNlEDbQvX/4v0DRdJW0kameZtRtY3v5Ss++mX9ZZdTLafZF3qt7kVOFHSQEmbpHU+GxEL0/wnyXaMvxARnwJPkO38fjUiljTyXKTXcJykrdIH9ntp8mcN1LalpG4F06YA56X3oQdZN9zN63i6W4FjgR+k+8Xq+aqkA9PrXUG2BVisniVkO7kbe08tJw4EK+ZDYBjwrKSPyIJgLnB2mn89cBPZN9pXyT4EfticJ4qIe8m+Td6Wum3mku3ILtZ2Xnqe28i2Fj4k67te1+Gwl5Pt/3hE0ofptQxL824k6zJ5E3iBzwOvzu+Bfqm76b8aWP8FwOTU5nsR8RjZ0U93pxp3BEYVtH+abF9C3dbAC2TvX0lbB8mhwDxJy9LrGxURK+o3Slt2U4BXUn09gYvJuq1qgOeBWWlaURHxLNnRUD3J9vcUswlwCdnO9L+Rbbn8tMi6lgO/AP6c6tm7tJdr64sivBVnlSltnbwH9I2IV/Oux6zSeQvBKoqkIyRtmvrMLyP7lrsw36rM2gYHglWakXx+UlVfsu4Sb+aalYG7jMzMDPAWgpmZJQ4EMzMDKmy00x49ekRVVVXeZZiZVZSZM2e+ExFbNdauogKhqqqK6urqvMswM6sokl5rvJW7jMzMLHEgmJkZ4EAwM7OkovYhmFn5rFy5ktraWlas+MIwSFahOnbsSK9evejQoUOzlncgmH1J1dbWstlmm1FVVYXU2KCxtqGLCJYuXUptbS19+vRp1jrcZWT2JbVixQq23HJLh0EbIYktt9yyRVt8DgSzLzGHQdvS0t+nA8HMzADvQ6Dq3AfyLqEkCy85PO8SrI0r9/9Ca/zNXnDBBXTp0oUf//jHZV/3hmDixIlsuummjBkzhkmTJnHIIYfQs+f6uzz1lz4QzMxKtWrVKtq3b72PzVNPPXXN/UmTJjFgwID1GgjuMjKz3CxcuJBddtmFE044gd13352jjz6a5cuXU1VVxTvvvANAdXU1+++//5pl5syZw4EHHkjfvn259tprAVi0aBH77rsvAwcOZMCAATz11FMNPmeXLl04++yzGTRoEAcddBBLlmSXsn755Zc59NBD2WuvvfjGN77BX/7yFwDGjh3Lj370Iw444ADOOeecoutctmwZJ554Irvtthu77747d999NwCnnXYagwcPpn///px//vlr2ldVVXHOOecwdOhQhg4dyoIFC4BsC+iyyy7jrrvuorq6mh/84AcMHDiQjz/+mAsvvJAhQ4YwYMAAxo8fT2tcusCBYGa5evHFFxk/fjw1NTV07dqV3/3ud+tsX1NTwwMPPMD06dO58MILeeutt7j11lv51re+xezZs5kzZw4DBw5scPmPPvqIQYMGMWvWLPbbbz9+/vOfAzB+/HiuvPJKZs6cyWWXXcbpp5++Zpm//vWvPProo/z6178uus6LLrqIbt268fzzz1NTU8OBBx4IwC9+8Quqq6upqanhySefpKamZs0yXbt25bnnnmPChAmceeaZa63v6KOPZvDgwdxyyy3Mnj2bTp06MWHCBGbMmMHcuXP5+OOPuf/++9f9xjaDu4zMLFe9e/dm+PDhABx33HFcccUV62w/cuRIOnXqRKdOnTjggAN47rnnGDJkCCeddBIrV67kyCOPXGcgbLTRRhx77LFrnu+oo45i2bJlPP300xxzzDFr2n3yySdr7h9zzDG0a9euwXU++uij3HbbbWsed+/eHYA77riDa665hlWrVrFo0SJeeOEFdt99dwBGjx695udZZ521ztcM8Pjjj3PppZeyfPly3n33Xfr3788RRxzR6HJN4UAws1zVP1RSEu3bt2f16tUAXziuvlj7fffdl2nTpvHAAw9w/PHH85Of/IQxY8aU/PyrV69m8803Z/bs2UXbdO7ceZ3riIgv1PXqq69y2WWXMWPGDLp3787YsWPXei2F7Rs7XHTFihWcfvrpVFdX07t3by644IJWOcPcXUZmlqvXX3+d6dOnAzBlyhT22WcfqqqqmDlzJsCa/vg6f/jDH1ixYgVLly7liSeeYMiQIbz22mtsvfXWnHzyyYwbN45Zs2Y1+HyrV6/mrrvuAuDWW29ln332oWvXrvTp04c777wTyD7g58yZU/JrOOSQQ/jtb3+75vHf//53PvjgAzp37ky3bt1YvHgxDz300FrL3H777Wt+fu1rX/vCOjfbbDM+/PBD4PNQ7NGjB8uWLVtTf7l5C8HMgPwObd51112ZPHkyp5xyCn379uW0005j6NChjBs3jl/+8pcMGzZsrfZDhw7l8MMP5/XXX+dnP/sZPXv2ZPLkyfzqV7+iQ4cOdOnShRtvvLHB5+vcuTPz5s1jr732olu3bms+mG+55RZOO+00Lr74YlauXMmoUaPYY489SnoN5513HmeccQYDBgygXbt2nH/++Rx11FHsueee9O/fn6985StrusXqfPLJJwwbNozVq1czZcqUL6xz7NixnHrqqXTq1Inp06dz8skns9tuu1FVVcWQIUNKqqup1Bp7qlvL4MGDo9wXyPF5CPZlNX/+fHbddddca1i4cCHf/va3mTt37np7zi5durBs2bL19nzF1F3sq0ePHmVfd7Hfq6SZETG4sWXdZWRmZoC7jMwsR1VVVa22dTBs2LC1jhQCuOmmm1q0dXDDDTdw+eWXrzVt+PDhXHXVVU1az8KFC5tdQ2sqKRAkHQpcDrQDrouIS+rNV5p/GLAcGBsRsyT1Bm4E/gFYDVwTEZenZbYAbgeqgIXA9yLi72V4TWZmPPvss2Vf54knnsiJJ55Y9vVuKBrtMpLUDrgKGAH0A0ZL6lev2Qigb7qNB65O01cBZ0fErsDewBkFy54LPBYRfYHH0mMzW48qaR+iNa6lv89S9iEMBRZExCsR8SlwGzCyXpuRwI2ReQbYXNK2EbEoImalQj8E5gPbFSwzOd2fDBzZoldiZk3SsWNHli5d6lBoI+oukNOxY8dmr6OULqPtgDcKHtcCw0posx2wqG6CpCpgT6BuO26biFgEEBGLJG1d7MkljSfb6mD77bcvoVwzK0WvXr2ora1dM5aPVb66S2g2VymBUOwUuvpfKdbZRlIX4G7gzIj4oPTyICKuAa6B7LDTpixrZg3r0KFDsy+1aG1TKV1GtUDvgse9gLdKbSOpA1kY3BIR9xS0WSxp29RmW+DtppVuZmblVEogzAD6SuojaWNgFDC1XpupwBhl9gbeT91AAn4PzI+I3xRZ5oR0/wTgD81+FWZm1mKNdhlFxCpJE4CHyQ47vT4i5kk6Nc2fCDxIdsjpArLDTuuOyxoOHA88L6lu1KifRsSDwCXAHZLGAa8Dnw8zaGZm611J5yGkD/AH602bWHA/gDOKLPf/Kb5/gYhYChzUlGLNzKz1eOgKMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMgPZ5F2BtS9W5D+RdQqMWXnJ43iWYbZC8hWBmZoADwczMEgeCmZkBJQaCpEMlvShpgaRzi8yXpCvS/BpJgwrmXS/pbUlz6y1zgaQ3Jc1Ot8Na/nLMzKy5Gg0ESe2Aq4ARQD9gtKR+9ZqNAPqm23jg6oJ5k4BDG1j9v0fEwHR7sIm1m5lZGZWyhTAUWBARr0TEp8BtwMh6bUYCN0bmGWBzSdsCRMQ04N1yFm1mZuVXSiBsB7xR8Lg2TWtqm2ImpC6m6yV1L6G9mZm1klICQUWmRTPa1Hc1sCMwEFgE/Lrok0vjJVVLql6yZEljtZqZWTOVEgi1QO+Cx72At5rRZi0RsTgiPouI1cC1ZF1TxdpdExGDI2LwVlttVUK5ZmbWHKUEwgygr6Q+kjYGRgFT67WZCoxJRxvtDbwfEYvWtdK6fQzJd4G5DbU1M7PW1+jQFRGxStIE4GGgHXB9RMyTdGqaPxF4EDgMWAAsB06sW17SFGB/oIekWuD8iPg9cKmkgWRdSwuBU8r4uszMrIlKGssoHRL6YL1pEwvuB3BGA8uObmD68aWXaWZmrc1nKpuZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAQ4EMzNLHAhmZgY4EMzMLHEgmJkZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBpQYCJIOlfSipAWSzi0yX5KuSPNrJA0qmHe9pLclza23zBaS/iTppfSze8tfjpmZNVejgSCpHXAVMALoB4yW1K9esxFA33QbD1xdMG8ScGiRVZ8LPBYRfYHH0mMzM8tJKVsIQ4EFEfFKRHwK3AaMrNdmJHBjZJ4BNpe0LUBETAPeLbLekcDkdH8ycGRzXoCZmZVHKYGwHfBGwePaNK2pberbJiIWAaSfW5dQi5mZtZJSAkFFpkUz2jSLpPGSqiVVL1mypByrNDOzIkoJhFqgd8HjXsBbzWhT3+K6bqX08+1ijSLimogYHBGDt9pqqxLKNTOz5iglEGYAfSX1kbQxMAqYWq/NVGBMOtpob+D9uu6gdZgKnJDunwD8oQl1m5lZmTUaCBGxCpgAPAzMB+6IiHmSTpV0amr2IPAKsAC4Fji9bnlJU4DpwFcl1Uoal2ZdAnxT0kvAN9NjMzPLSftSGkXEg2Qf+oXTJhbcD+CMBpYd3cD0pcBBJVdqZmatymcqm5kZ4EAwM7PEgWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRngQDAzs8SBYGZmgAPBzMwSB4KZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAdA+7wLMrLiqcx/Iu4SSLLzk8LxLsDLxFoKZmQEOBDMzSxwIZmYGOBDMzCxxIJiZGeBAMDOzxIFgZmaAA8HMzBIHgpmZAT5T2cy+JHzmd+O8hWBmZoADwczMEgeCmZkBDgQzM0scCGZmBjgQzMwscSCYmRlQYiBIOlTSi5IWSDq3yHxJuiLNr5E0qLFlJV0g6U1Js9PtsPK8JDMza45GA0FSO+AqYATQDxgtqV+9ZiOAvuk2Hri6xGX/PSIGptuDLX0xZmbWfKVsIQwFFkTEKxHxKXAbMLJem5HAjZF5Bthc0rYlLmtmZhuAUgJhO+CNgse1aVopbRpbdkLqYrpeUveSqzYzs7IrJRBUZFqU2GZdy14N7AgMBBYBvy765NJ4SdWSqpcsWVJCuWZm1hylBEIt0LvgcS/grRLbNLhsRCyOiM8iYjVwLVn30hdExDURMTgiBm+11VYllGtmZs1RSiDMAPpK6iNpY2AUMLVem6nAmHS00d7A+xGxaF3Lpn0Mdb4LzG3hazEzsxZodPjriFglaQLwMNAOuD4i5kk6Nc2fCDwIHAYsAJYDJ65r2bTqSyUNJOtCWgicUs4XZmZmTVPS9RDSIaEP1ps2seB+AGeUumyafnyTKjUzs1blM5XNzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmljgQzMwMcCCYmVniQDAzM8CBYGZmiQPBzMwAB4KZmSUOBDMzAxwIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMDHAhmZpY4EMzMDHAgmJlZ4kAwMzPAgWBmZokDwczMAAeCmZklDgQzMwMcCGZmlpQUCJIOlfSipAWSzi0yX5KuSPNrJA1qbFlJW0j6k6SX0s/u5XlJZmbWHI0GgqR2wFXACKAfMFpSv3rNRgB90208cHUJy54LPBYRfYHH0mMzM8tJKVsIQ4EFEfFKRHwK3AaMrNdmJHBjZJ4BNpe0bSPLjgQmp/uTgSNb+FrMzKwF2pfQZjvgjYLHtcCwEtps18iy20TEIoCIWCRp62JPLmk82VYHwDJJL5ZQc956AO+Uc4X6t3KureKU9f30e+m/zTKqlPdzh1IalRIIKjItSmxTyrLrFBHXANc0ZZm8SaqOiMF519FW+P0sH7+X5dXW3s9Suoxqgd4Fj3sBb5XYZl3LLk7dSqSfb5detpmZlVspgTAD6Cupj6SNgVHA1HptpgJj0tFGewPvp+6gdS07FTgh3T8B+EMLX4uZmbVAo11GEbFK0gTgYaAdcH1EzJN0apo/EXgQOAxYACwHTlzXsmnVlwB3SBoHvA4cU9ZXlq+K6uKqAH4/y8fvZXm1qfdTEU3q0jczszbKZyqbmRngQDAzs8SBYGZmgAPBNkCStsi7BrNCkm5KP/8l71pak3cql1Eau2kbCo7eiojX86uoMkl6CZgN3AA8FP4jbTFJhwP9gY510yLiwvwqqiySXiAbk20qsD/1TrqNiHdzKKvsSjlT2Uog6YfA+cBiYHWaHMDuuRVVuXYGDgZOAq6UdDswKSL+mm9ZlUnSRGBT4ADgOuBo4Llci6o8E4E/Al8BZrJ2IESaXvG8hVAmkhYAwyJiad61tCWSDgBuBjoDc4BzI2J6vlVVFkk1EbF7wc8uwD0RcUjetVUKSX0i4lVJV0fEaXnX01q8D6F83gDez7uItkDSlpL+RVI18GPgh2SDiJ0N3JprcZXp4/RzuaSewEqgT471VKK70s+dc62ilbnLqIUk/SjdfQV4QtIDwCd18yPiN7kUVtmmAzcBR0ZEbcH06tT9YU1zv6TNgV8Bs8i6OK7Lt6SKs5Gk84GdC/7n12gr/+cOhJbbLP18Pd02Tjdo4siutsZXG9qRHBFf7sGWmyEiLkp375Z0P9AxIrw12zSjyK7Z0p7P/+fbHO9DKBNJx0TEnY1Ns8ZJ2pmsq6iKtY/YOjCvmiqZpE3Jutu2j4iTJfUlC937cy6t4kgaEREP5V1Ha3EglImkWRExqLFp1jhJc8iO6pgJfFY3PSJm5lZUBUtHac0ExkTEAEmdgOkRMTDn0iqGpOMi4mZJZ1Nky99dRgZk3xjIRnrdTtIVBbO6AqvyqarirYqIq/Muog3ZMSKOlTQaICI+llTs4lXWsM7pZ5dcq2hlDoSWewuoBr5D9i2szofAWblUVPnuk3Q6cC9r76BvEyf/5ODTtFUQAJJ2pOB9tcZFxH+mnz/Pu5bW5C6jMpHUgSxgt4+ISrju8wZL0qtFJkdEtImTf9Y3Sd8EzgP6AY8Aw4GxEfFEnnVVknpb/18QEf+8vmppTQ6EMpF0BHAZsHFE9JE0ELgwIr6Tc2lmSNoS2JvsDNtnIqKsF4Zv6yTVXd1xOFmw3p4eHwPMjIg20RvgQCgTSTOBA4EnImLPNK0mIjx0RTNI+jpfPMroxtwKqmCShgOzI+IjSccBg4DLI+K1nEurOJIeBw6JiJXpcQfgkYg4IN/KysNnKpfPKh/bXR5pZMnLgH2AIek2ONeiKtvVZGcp7wH8BHgNcLg2T0/WPg+hS5rWJnincvnMlfR9oF06zvufgadzrqlSDQb6eZTTslkVESFpJHBFRPy+oAvEmuYS4H/SlgLAfsAF+ZVTXt5CKJ8fkg0v/AkwBfgAODPXiirXXOAf8i6iDflQ0r8CxwEPpGHaO+RcU8VJh+o+CgwjOwLuXuBrETE518LKyPsQbIOTvn0NJBuiufCwU++gbwZJ/wB8H5gREU9J2h7Y3/tkmk7SzIjYK+86WosDoYUk3cc6xizyh1jTSdqv2PSIeHJ912JWSNJVZNfmmJF3La3BgdBCDX141fGHWPNI2gHoGxGPprF42kXEh3nXVYkkHQX8G7A12WGnIjuvo2uuhVWgdOW0ncl2zH/E5+9lmzia0IFgGxxJJwPjgS0iYse0k35iRByUc2kVKV286YiImJ93LZUufVH5grZyCK+PMmohSc+z7i6jNvHNYT07AxgKPAsQES9J2jrfkiraYodBedR98Ke/x46NNK84DoSW+3beBbRBn0TEp3Xjr0lqj68t0RLVacTT/2LtnfT35FdSZZL0HeDXZOcevA3sAMwnO8Kw4jkQWqitbCpuYJ6U9FOgUxqH53TgvpxrqmRdgeVA4TWUA3AgNN1FZEOAPBoRe6Zrfo/Ouaay8T6EMvGOu/KRtBEwjuwDTMDDwHU+Uc3yJqk6Igana3bsGRGrJT0XEUPzrq0cvIVQPpfiHXflMhK4MSKuzbuQtkBSR7KA7U9Bv3dEnJRbUZXrPUldgGnALZLepg1d98RnKpePd9yVz3eAv0q6SdLhaR+CNd9NZGd+fwt4EuhFdr0OK5GkndIggSPJut/OAv4ILCUbpaBNcJdRmUi6nOyfzjvuyiCNIjkCOJZskLs/RcQ/5VtVZZL0P6m/uyYidk/v7cO+RnXpJN0P/DQiaupNHwycHxFH5FNZefmbV/l4x10ZRcRKSQ+RvYedyL6ZORCaZ2X6+Z6kAcDfyIYWt9JV1Q8DgIiollS1/stpHQ6E8jnbl3gsD0mHAqOAA4AngOuA7+VZU4W7RlJ34GfAVLIhm3+Wb0kVZ13nHHRab1W0MncZlYmkl4DZwA3AQz4ipvkk3QbcRvY++tq/ljtJU4D/rn+gg6RxZBfMOTafysrLgVAmaWjcg4GTyM6yvZ1sEKy/5lqYfelJehl4BngKmBYRL+RcUsWRtA3ZcNefAjPT5MHAxsB3I+JvedVWTg6EVpBOVrkZ6AzMAc6NiOn5VlU5JO0NXAnsSvYP1w74yOd0NI+kTcjG8P8G2TWBdwHmRMR3cy2sAqX/7QHp4byI+O886yk370Mok3QR8+OA44HFZIeiTSUb1/9OoE9+1VWc35LtQ7iT7FvYGGCnXCuqbJ+R7Vj+DFhN9vf5dq4VVaiIeBx4vNGGFcqBUD7TyY73PjIiagumV0uamFNNFSsiFkhqFxGfATdI8uVIm+8D4HngN8AeZPkRAAAGBUlEQVS1EbE053psA+UuozKRJO9ILg9J08j2x1xHdojkImBsROyRa2EVKl1LeR+yfVufkl3re1pEPJZrYbbBcSCUiaSdgR+THd+9ZsvLJ/80XRpz/m2y6/6eBXQDfhcRC3ItrMJJ2oXsZL8zga0jos0cLmnl4UAokzTY1USyIxA+q5seETMbXMhsPZB0N9m+rAWkI42A5yJiRa6F2QbH+xDKZ1VEXJ13EW1BAxcdeh+oBi52H3hpJA0B3gAuAWaRHfTwj2Qj8s4FHAi2Fm8htJCkLdLdfybr5riXtccy8tnLTSTpUrKtrFvTpFFkw2C/D+zTVsaNaW2SZgEHR8S7kvYlO9nvh2RbC7tGxNG5FmgbHAdCC0l6lezbrIrMjoj4ynouqeJJ+nNEDC82TdLzEbFbXrVVEklz6nbES7oKWBIRF6THsyNiYJ712YbHXUYtFBE+v6D8ukgaFhHPAkgaSjb+DrShsefXg3aS2kfEKuAgYHzBPP/v2xf4j6JMJJ0B3BIR76XH3YHREfG7fCurSOPIzj2oC4EPgXGSOgP/L7+yKs4UssuRvgN8TLZDGUk7kXW/ma3FXUZlUmwTvG4c+rxqqkTp8plHR8QdkrqR/Y2+l3ddlSoNA7It8EhEfJSm7Qx0iYhZuRZnGxwHQplIqgH2qDs5TVI7oCYi+udbWeWRNC0i9s27DrMvG19Cs3weBu6QdJCkA8k21/+Yc02V6k+Sfiypt6Qt6m55F2XW1nkLoUxSV8cpZDvvBDwCXJfG4rEmSEdu1ecjtsxamQPBzMwAdxmVjaS+ku6S9IKkV+pueddViSRtKuk8Sdekx30lfTvvuszaOgdC+dwAXE12nPwBwI1kw2Fb091ANirn19PjWuDi/Mox+3JwIJRPpzScsCLitXRGqEc6bZ4dI+JSsou6EBEfU/xMcDMrI5+YVj4r0o7llyRNAN4kG0TMmu5TSZ1IA9xJ2pGC8aHMrHV4p3KZpJEl5wObAxeRjeF/aUQ8k2thFUjSN4HzgH5kR2sNJ7tAzhN51mXW1jkQbIOUrlG9N1lX0TMR8U7OJZm1ee4yaiFJU9c1PyK+s75qaWP2I7vsY5BdOe3efMsxa/u8hdBCkpaQXYRkCvAs9XZ+RsSTedRVyST9DtiJ7D0FOBZ4OSLOyK8qs7bPgdBCacyibwKjgd2BB4ApETEv18IqmKR5wICCcaE2Ap73uFBmrcuHnbZQRHwWEX+MiBPI+rwXAE9I+mHOpVWyF4HtCx73BmpyqsXsS8P7EMpA0ibA4WRbCVXAFcA9edZUiSTdR7bPoBswX9JzadYQYHpuhZl9SbjLqIUkTQYGAA8Bt0XE3JxLqliS9is2mWzn8mh3GZm1LgdCC0laDXyUHha+mSIbobPr+q+q8kkaCHwf+B7wKnBPRFyZb1VmbZu7jFooIrwfpkzSlbxGkXW9LQVuJ/vSckCuhZl9SXgLwTYYaWvrKWBcRCxI017xdRDM1g9/u7UNyT8CfwMel3StpLqLDZnZeuAtBNvgSOoMHEnWdXQgMBm4NyIeybUwszbOgWAbtHQt5WOAYyPCw4mbtSIHgpmZAd6HYGZmiQPBzMwAB4KZmSUOBDMzAxwIZl8gqUrS/HQuxDxJj0jqJOlkSTMkzZF0t6RNU/tJkq6W9LikVyTtJ+n6tI5JBes9RNJ0SbMk3SmpS24v0qwIB4JZcX2Bq9KAeu+RnTR3T0QMiYg9yK6fPa6gfXeycybOAu4D/h3oD+wmaaCkHmTXiT44IgYB1cCP1turMSuBxzIyK+7ViJid7s8kG9Z8gKSLgc2BLsDDBe3vi4iQ9DywOCKehzUX+6kCegH9gD9LAtgYD+ltGxgHgllxnxTc/wzoBEwCjoyIOZLGAvsXab+63rKryf7PPgP+FBGjW6lesxZzl5FZ6TYDFknqAPygics+AwyXtBOApE3T6K5mGwwHglnpfgY8C/wJ+EtTFoyIJcBYYIqkGrKA2KXcBZq1hIeuMDMzwFsIZmaWOBDMzAxwIJiZWeJAMDMzwIFgZmaJA8HMzAAHgpmZJQ4EMzMD4H8Be5sHHvGFObgAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -642,14 +678,14 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/nicholasjones/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", + "C:\\Users\\charl\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", @@ -687,12 +723,21 @@ " \n", " \n", " \n", - " 0\n", - " Cardiff\n", - " 335145\n", - " 2100\n", - " 0.006266\n", - " 159.592857\n", + " 3\n", + " Machynlleth\n", + " 2235\n", + " 48\n", + " 0.021477\n", + " 46.562500\n", + " yes!\n", + " \n", + " \n", + " 2\n", + " Abergavenny\n", + " 12515\n", + " 198\n", + " 0.015821\n", + " 63.207071\n", " no\n", " \n", " \n", @@ -705,36 +750,27 @@ " no\n", " \n", " \n", - " 2\n", - " Abergavenny\n", - " 12515\n", - " 183\n", - " 0.014622\n", - " 68.387978\n", + " 0\n", + " Cardiff\n", + " 335145\n", + " 2100\n", + " 0.006266\n", + " 159.592857\n", " no\n", " \n", - " \n", - " 3\n", - " Machynlleth\n", - " 2235\n", - " 48\n", - " 0.021477\n", - " 46.562500\n", - " yes!\n", - " \n", " \n", "\n", "" ], "text/plain": [ " name population n_pubs pubs_per_capita people_per_pub to_visit\n", - "0 Cardiff 335145 2100 0.006266 159.592857 no\n", + "3 Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "2 Abergavenny 12515 198 0.015821 63.207071 no\n", "1 Swansea 230300 1680 0.007295 137.083333 no\n", - "2 Abergavenny 12515 183 0.014622 68.387978 no\n", - "3 Machynlleth 2235 48 0.021477 46.562500 yes!" + "0 Cardiff 335145 2100 0.006266 159.592857 no" ] }, - "execution_count": 125, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -747,79 +783,32 @@ "towns_df" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2.1 Read data from files" - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/nicholasjones/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3049: DtypeWarning: Columns (16,17,18,20,22,77) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - }, - { - "data": { - "text/plain": [ - "(858982, 96)" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create a dataframe using read_csv()\n", - "\n", - "df = pd.read_csv('pluto_18v2_1.csv')\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "metadata": {}, - "outputs": [], - "source": [ - "df = df[::20]\n", - "df.shape\n", - "df.to_csv('pluto_shortened.csv')" - ] - }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(108, 97)" + "0 2100\n", + "Name: n_pubs, dtype: int64" ] }, - "execution_count": 159, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = pd.read_csv('pluto_shortened.csv')\n", - "df.shape" + "# More on .loc and .iloc\n", + "# .loc is a very flexible indexer. You can pass it pairs of (row, col) indexers to get a specific value:\n", + "towns_df.loc[towns_df['name'] == 'Cardiff', 'n_pubs']" ] }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 74, "metadata": {}, "outputs": [ { @@ -843,188 +832,1558 @@ " \n", " \n", " \n", - " Unnamed: 0\n", - " borough\n", - " block\n", - " lot\n", - " cd\n", - " ct2010\n", - " cb2010\n", - " schooldist\n", - " council\n", - " zipcode\n", - " ...\n", - " firm07_flag\n", - " pfirm15_flag\n", - " rpaddate\n", - " dcasdate\n", - " zoningdate\n", - " landmkdate\n", - " basempdate\n", - " masdate\n", - " polidate\n", - " edesigdate\n", + " name\n", + " population\n", + " n_pubs\n", + " pubs_per_capita\n", + " people_per_pub\n", + " to_visit\n", " \n", " \n", " \n", " \n", - " 0\n", - " 0\n", - " BX\n", - " 5641.0\n", - " 670.0\n", - " 210.0\n", - " 516.00\n", - " 2.0\n", - " NaN\n", - " 13.0\n", - " NaN\n", - " ...\n", - " 1.0\n", - " 1.0\n", - " 12/6/2018\n", - " 12/20/2018\n", - " 12/21/2018\n", - " 12/20/2018\n", - " 12/21/2018\n", - " NaN\n", - " NaN\n", - " 12/20/2018\n", - " \n", - " \n", - " 1\n", - " 8000\n", - " SI\n", - " 7864.0\n", - " 67.0\n", - " 503.0\n", - " 244.02\n", - " 1012.0\n", - " 31.0\n", - " 51.0\n", - " 10307.0\n", - " ...\n", - " NaN\n", - " NaN\n", - " 12/6/2018\n", - " 12/20/2018\n", - " 12/21/2018\n", - " 12/20/2018\n", - " 12/21/2018\n", + " 3\n", + " Machynlleth\n", + " 2235\n", + " 48\n", + " 0.021477\n", + " 46.562500\n", + " yes!\n", + " \n", + " \n", + " 2\n", + " Abergavenny\n", + " 12515\n", + " 198\n", + " 0.015821\n", + " 63.207071\n", + " no\n", + " \n", + " \n", + " 1\n", + " Swansea\n", + " 230300\n", + " 1680\n", + " 0.007295\n", + " 137.083333\n", + " no\n", + " \n", + " \n", + " 0\n", + " Cardiff\n", + " 335145\n", + " 0\n", + " 0.006266\n", + " 159.592857\n", + " no\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " name population n_pubs pubs_per_capita people_per_pub to_visit\n", + "3 Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "2 Abergavenny 12515 198 0.015821 63.207071 no\n", + "1 Swansea 230300 1680 0.007295 137.083333 no\n", + "0 Cardiff 335145 0 0.006266 159.592857 no" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can also use this to set values:\n", + "towns_df.loc[towns_df['name'] == 'Cardiff', 'n_pubs'] = 0\n", + "towns_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "#.iloc is used to get the row by its index - the special column to the furthest left.\n", + "# It only works with integer indexers, unlike .loc" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "name Cardiff\n", + "population 335145\n", + "n_pubs 0\n", + "pubs_per_capita 0.00626594\n", + "people_per_pub 159.593\n", + "to_visit no\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df.iloc[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "# you can change this index column by setting a new one:" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationn_pubspubs_per_capitapeople_per_pubto_visit
name
Machynlleth2235480.02147746.562500yes!
Abergavenny125151980.01582163.207071no
Swansea23030016800.007295137.083333no
Cardiff33514500.006266159.592857no
\n", + "
" + ], + "text/plain": [ + " population n_pubs pubs_per_capita people_per_pub to_visit\n", + "name \n", + "Machynlleth 2235 48 0.021477 46.562500 yes!\n", + "Abergavenny 12515 198 0.015821 63.207071 no\n", + "Swansea 230300 1680 0.007295 137.083333 no\n", + "Cardiff 335145 0 0.006266 159.592857 no" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df = towns_df.set_index('name')\n", + "towns_df" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Cannot index by location index with a non-integer key", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtowns_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Cardiff'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1498\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1499\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1500\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1501\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1502\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 2224\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2225\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2226\u001b[1;33m raise TypeError(\"Cannot index by location index with a \"\n\u001b[0m\u001b[0;32m 2227\u001b[0m \"non-integer key\")\n\u001b[0;32m 2228\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: Cannot index by location index with a non-integer key" + ] + } + ], + "source": [ + "# this will fail as Cardiff is not an indexer\n", + "towns_df.iloc['Cardiff']" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "335145" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# reformatting the towns_df DataFrame to make 'name' the index allows me to make calls like this using loc:\n", + "towns_df.loc['Cardiff','population']" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "# indexes can be reset at any time:\n", + "towns_df = towns_df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "# You can also select multiple columns at a time:" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
0Machynlleth2235
1Abergavenny12515
2Swansea230300
3Cardiff335145
\n", + "
" + ], + "text/plain": [ + " name population\n", + "0 Machynlleth 2235\n", + "1 Abergavenny 12515\n", + "2 Swansea 230300\n", + "3 Cardiff 335145" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "towns_df[['name','population']]" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=4, step=1)" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can see the index by calling it directly:\n", + "towns_df.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 Pandas II" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "# You can join pandas dataframes together in many ways" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "df_A = pd.DataFrame({\n", + " 'name':towns,\n", + " 'population':populations\n", + "})\n", + "\n", + "df_B = pd.DataFrame({\n", + " 'name':towns,\n", + " 'pubs':number_of_pubs\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationnamepubs
0Cardiff335145Cardiff2100
1Swansea230300Swansea1680
2Abergavenny12515Abergavenny198
3Machynlleth2235Machynlleth48
\n", + "
" + ], + "text/plain": [ + " name population name pubs\n", + "0 Cardiff 335145 Cardiff 2100\n", + "1 Swansea 230300 Swansea 1680\n", + "2 Abergavenny 12515 Abergavenny 198\n", + "3 Machynlleth 2235 Machynlleth 48" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df_A, df_B], axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\charl\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulationpubs
0Cardiff335145.0NaN
1Swansea230300.0NaN
2Abergavenny12515.0NaN
3Machynlleth2235.0NaN
0CardiffNaN12/20/20182100.0
1SwanseaNaN1680.0
2AbergavennyNaN198.0
3MachynllethNaN48.0
\n", + "
" + ], + "text/plain": [ + " name population pubs\n", + "0 Cardiff 335145.0 NaN\n", + "1 Swansea 230300.0 NaN\n", + "2 Abergavenny 12515.0 NaN\n", + "3 Machynlleth 2235.0 NaN\n", + "0 Cardiff NaN 2100.0\n", + "1 Swansea NaN 1680.0\n", + "2 Abergavenny NaN 198.0\n", + "3 Machynlleth NaN 48.0" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df_A, df_B], axis = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "# These don't look correct! (N.B. - but they would be if each DF contained the same columns). \n", + "# The way to get around this is to assign a common index that both frames share" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "df_A = df_A.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "df_B = df_B.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
population
name
Cardiff335145
Swansea230300
Abergavenny12515
Machynlleth2235
\n", + "
" + ], + "text/plain": [ + " population\n", + "name \n", + "Cardiff 335145\n", + "Swansea 230300\n", + "Abergavenny 12515\n", + "Machynlleth 2235" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_A" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pubs
name
Cardiff2100
Swansea1680
Abergavenny198
Machynlleth48
\n", + "
" + ], + "text/plain": [ + " pubs\n", + "name \n", + "Cardiff 2100\n", + "Swansea 1680\n", + "Abergavenny 198\n", + "Machynlleth 48" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_B" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [], + "source": [ + "df_A['pubs'] = df_B['pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100
Swansea2303001680
Abergavenny12515198
Machynlleth223548
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100\n", + "Swansea 230300 1680\n", + "Abergavenny 12515 198\n", + "Machynlleth 2235 48" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_A" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
03351452100
12303001680
212515198
3223548
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "0 335145 2100\n", + "1 230300 1680\n", + "2 12515 198\n", + "3 2235 48" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can also do this same operation via '.merge', a method of DataFrames, should they have the same index\n", + "df_A.merge(df_B, how = 'inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "# What if these dataFrames aren't the same size?" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = pd.DataFrame({\n", + " 'name':towns,\n", + " 'population':populations\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [], + "source": [ + "df_D = pd.DataFrame({'name':'Winchester','population':40005}, index = [4])" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
4Winchester40005
\n", + "
" + ], + "text/plain": [ + " name population\n", + "4 Winchester 40005" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_D" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = df_C.append(df_D)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namepopulation
0Cardiff335145
1Swansea230300
2Abergavenny12515
3Machynlleth2235
4Winchester40005
\n", + "
" + ], + "text/plain": [ + " name population\n", + "0 Cardiff 335145\n", + "1 Swansea 230300\n", + "2 Abergavenny 12515\n", + "3 Machynlleth 2235\n", + "4 Winchester 40005" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [], + "source": [ + "df_C = df_C.set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
population
name
Cardiff335145
Swansea230300
Abergavenny12515
Machynlleth2235
Winchester40005
\n", + "
" + ], + "text/plain": [ + " population\n", + "name \n", + "Cardiff 335145\n", + "Swansea 230300\n", + "Abergavenny 12515\n", + "Machynlleth 2235\n", + "Winchester 40005" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
pubs
216000QN11551.0110.0410.088.001009.027.032.0NaN...NaNNaN12/6/201812/20/201812/21/201812/20/201812/21/2018NaNNaN12/20/2018name
324000BK6094.014.0310.0152.002002.020.043.011228.0...NaNNaN12/6/201812/20/201812/21/201812/20/201812/21/2018NaNNaN12/20/2018Cardiff2100
432000QN7791.02.0411.01291.021001.026.023.011364.0...NaNNaN12/6/201812/20/201812/21/201812/20/201812/21/2018NaNNaN12/20/2018Swansea1680
Abergavenny198
Machynlleth48
\n", - "

5 rows × 97 columns

\n", "
" ], "text/plain": [ - " Unnamed: 0 borough block lot cd ct2010 cb2010 schooldist \\\n", - "0 0 BX 5641.0 670.0 210.0 516.00 2.0 NaN \n", - "1 8000 SI 7864.0 67.0 503.0 244.02 1012.0 31.0 \n", - "2 16000 QN 11551.0 110.0 410.0 88.00 1009.0 27.0 \n", - "3 24000 BK 6094.0 14.0 310.0 152.00 2002.0 20.0 \n", - "4 32000 QN 7791.0 2.0 411.0 1291.02 1001.0 26.0 \n", + " pubs\n", + "name \n", + "Cardiff 2100\n", + "Swansea 1680\n", + "Abergavenny 198\n", + "Machynlleth 48" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_B" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "df_C['pubs'] = df_B['pubs']" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100.0
Swansea2303001680.0
Abergavenny12515198.0
Machynlleth223548.0
Winchester40005NaN
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100.0\n", + "Swansea 230300 1680.0\n", + "Abergavenny 12515 198.0\n", + "Machynlleth 2235 48.0\n", + "Winchester 40005 NaN" + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_C['pubs'].loc['Winchester']" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.float64" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df_C['pubs'].loc['Winchester'])" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [], + "source": [ + "df_C['pubs'] = df_C['pubs'].fillna(850)" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
populationpubs
name
Cardiff3351452100.0
Swansea2303001680.0
Abergavenny12515198.0
Machynlleth223548.0
Winchester40005850.0
\n", + "
" + ], + "text/plain": [ + " population pubs\n", + "name \n", + "Cardiff 335145 2100.0\n", + "Swansea 230300 1680.0\n", + "Abergavenny 12515 198.0\n", + "Machynlleth 2235 48.0\n", + "Winchester 40005 850.0" ] }, - "execution_count": 160, + "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "df_C" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.1 Read data from files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a dataframe using read_csv()\n", + "# Here, we would use os.path.join() to \n", + "df = pd.read_csv('pluto_18v2_1.csv')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df[::20]\n", + "df.shape\n", + "df.to_csv('pluto_shortened.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('pluto_shortened.csv')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "df.head()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.2 Save data back to files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(os.path.join( [ your file location here!! ]) )" + ] } ], "metadata": {