From 33ee7afed7581028e386faee95e4cdcfc7dd980f Mon Sep 17 00:00:00 2001 From: nsuberi Date: Thu, 31 May 2018 16:36:14 -0400 Subject: [PATCH] adding back in climate watch api files --- cli_047_cw_api_ndc_ratification/.gitignore | 9 + cli_047_cw_api_ndc_ratification/Dockerfile | 31 ++++ cli_047_cw_api_ndc_ratification/README.md | 0 .../contents/main.py | 4 + .../contents/src/__init__.py | 162 ++++++++++++++++++ cli_047_cw_api_ndc_ratification/start.sh | 12 ++ cli_047_cw_api_ndc_ratification/time.cron | 1 + 7 files changed, 219 insertions(+) create mode 100644 cli_047_cw_api_ndc_ratification/.gitignore create mode 100644 cli_047_cw_api_ndc_ratification/Dockerfile create mode 100644 cli_047_cw_api_ndc_ratification/README.md create mode 100644 cli_047_cw_api_ndc_ratification/contents/main.py create mode 100644 cli_047_cw_api_ndc_ratification/contents/src/__init__.py create mode 100644 cli_047_cw_api_ndc_ratification/start.sh create mode 100644 cli_047_cw_api_ndc_ratification/time.cron diff --git a/cli_047_cw_api_ndc_ratification/.gitignore b/cli_047_cw_api_ndc_ratification/.gitignore new file mode 100644 index 00000000..ba1ec090 --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/.gitignore @@ -0,0 +1,9 @@ +.env +*.py[c|o] +.DS_Store + +data/* +__pycache__ + +*.csv +*# \ No newline at end of file diff --git a/cli_047_cw_api_ndc_ratification/Dockerfile b/cli_047_cw_api_ndc_ratification/Dockerfile new file mode 100644 index 00000000..b3bc7543 --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.6 +MAINTAINER Nathan Suberi + +# install core libraries +RUN apt-get update +RUN apt-get install git + +# install application libraries +RUN pip install --upgrade pip +RUN pip install pandas +RUN pip install requests \ + cartoframes + +RUN pip install -e git+https://github.com/fgassert/cartosql.py.git#egg=cartosql + +# set name +ARG NAME=climatewatchapi +ENV NAME ${NAME} + +# copy the application folder inside the container +RUN mkdir -p /opt/$NAME/data +WORKDIR /opt/$NAME/ +COPY contents/ . +VOLUME ./data + +# Set user for container security +RUN useradd $NAME +RUN chown -R $NAME:$NAME . +USER $NAME + +CMD ["python", "main.py"] diff --git a/cli_047_cw_api_ndc_ratification/README.md b/cli_047_cw_api_ndc_ratification/README.md new file mode 100644 index 00000000..e69de29b diff --git a/cli_047_cw_api_ndc_ratification/contents/main.py b/cli_047_cw_api_ndc_ratification/contents/main.py new file mode 100644 index 00000000..eecdfb9a --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/contents/main.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import src + src.main() diff --git a/cli_047_cw_api_ndc_ratification/contents/src/__init__.py b/cli_047_cw_api_ndc_ratification/contents/src/__init__.py new file mode 100644 index 00000000..ce3d5bbe --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/contents/src/__init__.py @@ -0,0 +1,162 @@ +import os +import logging +import sys +import requests +from collections import OrderedDict +from datetime import datetime, timedelta +from functools import reduce +import cartoframes +import cartosql +import pandas as pd + +# Constants +SOURCE_URL = 'https://www.climatewatchdata.org/api/v1/ndcs/' +CLEAR_TABLE_FIRST = False +DATE_FORMAT = '%Y-%m-%d' +COUNTRY_CODE = 'country_code' + +CARTO_KEY_WRIRW = os.environ.get('CARTO_KEY_WRIRW') +CARTO_KEY_RWNRT = os.environ.get('CARTO_KEY') + +cc_wrirw = cartoframes.CartoContext(base_url='https://wri-rw.carto.com/', + api_key=CARTO_KEY_WRIRW) +cc_rwnrt = cartoframes.CartoContext(base_url='https://rw-nrt.carto.com/', + api_key=CARTO_KEY_RWNRT) + +# Need to drop the alias column b/c otherwise get multiple matches +ISO_ALIAS_INFO = cc_wrirw.read('country_aliases_extended') +ISO_ALIAS_INFO = ISO_ALIAS_INFO.drop(['alias', 'index', 'the_geom'], axis=1).drop_duplicates() +logging.info('Alias table shape: {}'.format(ISO_ALIAS_INFO)) + +CARTO_TABLE = 'cli_047_ndc_ratification' +CARTO_SCHEMA = OrderedDict([ + ("country_code", "text"), + ("ratification_status", "text"), + ("last_update", "timestamp"), + ("rw_country_code", "text"), + ("rw_country_name", "text"), +]) +UID_FIELD = 'country_code' +TIME_FIELD = 'last_update' +DATA_DIR = 'data' +LOG_LEVEL = logging.INFO + +def genRow(obs): + last_update = datetime.today().strftime(DATE_FORMAT) + row = [] + for field in CARTO_SCHEMA.keys(): + if field == 'country_code': + row.append(obs['country_code']) + elif field == 'ratification_status': + row.append(obs['ratification_status']) + elif field == 'last_update': + row.append(last_update) + else: + # Placeholder for rw_country_code and rw_country_name + row.append(' ') + return row + +def keep_rat_stat(item): + if item['name'] == 'Status of ratification': + return True + else: + return False + +def make_obs(agg, elem): + obs = { + 'country_code':elem[0], + 'ratification_status':elem[1][0]['value'] + } + agg.append(obs) + return agg + +def georef_by_ccode(df, ccode): + # Weird behavior of globals in a local scope here: + # https://stackoverflow.com/questions/10851906/python-3-unboundlocalerror-local-variable-referenced-before-assignment + df.index = list(range(df.shape[0])) + data_with_alias = df.merge(ISO_ALIAS_INFO, + left_on=ccode, + right_on='iso', + how='left') + try: + null_isos = pd.isnull(data_with_alias['iso']) + except: + null_isos = pd.isnull(data_with_alias['iso_y']) + + if sum(null_isos): + no_iso_match = data_with_alias[null_isos] + logging.info('no match for these isos in the data being processed: ') + try: + missed_isos = no_iso_match[ccode].unique() + logging.info(missed_isos) + except: + ccode = ccode +'_x' + missed_isos = no_iso_match[ccode].unique() + logging.info(missed_isos) + + logging.info('df shape: {}'.format(df.shape)) + logging.info('data_with_alias shape: {}'.format(data_with_alias.shape)) + + try: + df['rw_country_code'] = data_with_alias['iso'].values + except: + df['rw_country_code'] = data_with_alias['iso_y'].values + try: + df['rw_country_name'] = data_with_alias['name'] + except: + df['rw_country_name'] = data_with_alias['name_y'] + + # Enforce correct ordering of columns here + return df[list(CARTO_SCHEMA.keys())] + +def processNewData(): + ''' + Iterively fetch parse and post new data + ''' + data = requests.get(SOURCE_URL).json()['indicators'] + rat_stat = list(filter(keep_rat_stat, data)) + country_rat_stat = rat_stat[0]['locations'] + country_rat_stat_obs = reduce(make_obs, country_rat_stat.items(), []) + parsed_rows = pd.DataFrame(list(map(genRow, country_rat_stat_obs))) + logging.info(parsed_rows) + parsed_rows.columns = CARTO_SCHEMA.keys() + georeffed_rows = georef_by_ccode(parsed_rows, COUNTRY_CODE) + num_new = len(georeffed_rows) + if num_new: + logging.info('Pushing {} new rows'.format(num_new)) + #cc_rwnrt.write(georeffed_rows, CARTO_TABLE, overwrite=True) + cartosql.truncateTable(CARTO_TABLE) + cartosql.blockInsertRows(CARTO_TABLE, CARTO_SCHEMA.keys(), + CARTO_SCHEMA.values(), list(georeffed_rows.values)) + + return num_new + + +############################################################## +# General logic for Carto +# should be the same for most tabular datasets +############################################################## + +def createTableWithIndex(table, schema, id_field, time_field=''): + '''Get existing ids or create table''' + cartosql.createTable(table, schema) + cartosql.createIndex(table, id_field, unique=True) + if time_field: + cartosql.createIndex(table, time_field) + +def main(): + logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) + logging.info('STARTING') + + # 1. Check if table exists and create table + if cartosql.tableExists(CARTO_TABLE): + logging.info('Table {} already exists'.format(CARTO_TABLE)) + else: + logging.info('Table {} does not exist, creating'.format(CARTO_TABLE)) + createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) + + # 2. Fetch and post new data + num_new = processNewData() + logging.info('Total rows: {}'.format(num_new)) + + logging.info('SUCCESS') diff --git a/cli_047_cw_api_ndc_ratification/start.sh b/cli_047_cw_api_ndc_ratification/start.sh new file mode 100644 index 00000000..07b9c96a --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/start.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +NAME=$(basename $(pwd)) +LOG=${LOG:-udp://localhost} + +docker build -t $NAME --build-arg NAME=$NAME . +docker run --log-driver=syslog --log-opt syslog-address=$LOG \ + --log-opt tag=$NAME \ + --env-file .env \ + -e CARTO_KEY_WRIRW='65efbcc00e9591f334fcad66b3d9515228a7deef' \ + -e CARTO_KEY='8ab811877d79ed8238945c1c2524313daf4d6625' \ + --rm $NAME python main.py diff --git a/cli_047_cw_api_ndc_ratification/time.cron b/cli_047_cw_api_ndc_ratification/time.cron new file mode 100644 index 00000000..4d4845f5 --- /dev/null +++ b/cli_047_cw_api_ndc_ratification/time.cron @@ -0,0 +1 @@ +0 2 30 * *