From fdbea8ff01fb28496c8dfafe36490004ea53d0cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Kek=C3=A4l=C3=A4inen?= Date: Wed, 15 Apr 2020 01:30:06 +0300 Subject: [PATCH 1/2] Add country parameter to API endpoint 'stats' --- api/fevermap/api/stats.py | 85 +++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 12 deletions(-) diff --git a/api/fevermap/api/stats.py b/api/fevermap/api/stats.py index 5186838..434fb9f 100644 --- a/api/fevermap/api/stats.py +++ b/api/fevermap/api/stats.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta from flask_restful import Resource +from flask import request from fevermap.db import db_session from fevermap.db.models.submission import Submission @@ -13,13 +14,25 @@ app = Flask('fevermap') class StatsResource(Resource): + def get(self, **kwargs): """Get stats.""" - # @TODO: Add cache headers so Nginx will cache the respons and not # hit the Flask server for all requests - app.logger.info('Serving stats') + # Define filter array. Empty by default, which means no filtering. + filters = [] + + # If request has "?country=FI" then stats will apply only to submission + # data with location_country_code="FI". Otherwise server global stats. + if 'country' in request.args: + app.logger.info( + 'Serving stats for country %s' % request.args['country']) + filters.append( + Submission.location_country_code.__eq__( + request.args['country'])) + else: + app.logger.info('Serving stats') data = { 'generated': datetime.now().isoformat(timespec='seconds'), @@ -39,9 +52,10 @@ class StatsResource(Resource): # First check if the database is empty (e.g new development install) # and bail out quickly with all zeros response in that case - submitters_count = db_session.query(Submitter).count() + submission_count = db_session.\ + query(Submission).filter(*filters).count() - if submitters_count == 0: + if submission_count == 0: return { 'success': True, 'data': data @@ -50,18 +64,65 @@ class StatsResource(Resource): midnight = datetime.combine(datetime.now(), datetime.min.time()) midnight_7_days_ago = midnight - timedelta(days=7) + # Analyze submissions, but group by submitter id to see how many unique + # submitters submitted data['submitters'] = { - 'since': db_session.query(Submitter).order_by(Submitter.timestamp_created).limit(1).one().timestamp_created.isoformat(timespec='seconds'), - 'total': submitters_count, - 'today': db_session.query(Submitter).filter(Submitter.timestamp_modified > midnight).count(), - 'past_7_days': db_session.query(Submitter).filter(Submitter.timestamp_modified > midnight_7_days_ago).count(), + # When was the first submitter created + 'since': + db_session.query(Submitter). + outerjoin(Submission, Submitter.id == Submission.submitter_id). + filter(*filters). + order_by(Submitter.timestamp_created). + limit(1). + one(). + timestamp_created.isoformat(timespec='seconds'), + # How many submitters have submitted in total + 'total': + db_session.query(Submission). + filter(*filters). + group_by(Submission.submitter_id). + count(), + # How many submitters have submitted today + 'today': + db_session.query(Submission). + filter(*filters). + filter(Submission.timestamp_modified > midnight). + group_by(Submission.submitter_id). + count(), + # How many submitters have submitted in past week + 'past_7_days': + db_session.query(Submission). + filter(*filters). + filter(Submission.timestamp_modified > midnight_7_days_ago). + group_by(Submission.submitter_id). + count(), } data['submissions'] = { - 'since': db_session.query(Submission).order_by(Submission.timestamp_created).limit(1).one().timestamp_created.isoformat(timespec='seconds'), - 'total': db_session.query(Submission).count(), - 'today': db_session.query(Submission).filter(Submission.timestamp_modified > midnight).count(), - 'past_7_days': db_session.query(Submission).filter(Submission.timestamp_modified > midnight_7_days_ago).count(), + # When was the first submission created + 'since': + db_session.query(Submission). + filter(*filters). + order_by(Submission.timestamp_created). + limit(1). + one().timestamp_created.isoformat(timespec='seconds'), + # How many submission has there been in total + 'total': + db_session.query(Submission). + filter(*filters). + count(), + # How many submission has there been today + 'today': + db_session.query(Submission). + filter(*filters). + filter(Submission.timestamp_modified > midnight). + count(), + # How many submission has there been in past week + 'past_7_days': + db_session.query(Submission). + filter(*filters). + filter(Submission.timestamp_modified > midnight_7_days_ago). + count(), } return { -- GitLab From 065f8ef0ae7017b56cb992ea3906d78871227b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Kek=C3=A4l=C3=A4inen?= Date: Sun, 19 Apr 2020 16:21:49 +0300 Subject: [PATCH 2/2] Add new API endpoint 'location' for stats per country or post code - Update README to show an SQL example of how to fetch all submissions with a JOIN on submitter data. - Split API tests into submitting and fetching tests, introduce 'make test-api-stats' for ease of use. - Add new route /api/v0/location/ to simply fetch how many submissions there are. - Implement API examples that both use built-int SQLAlchemy functions as well as shows how to run plain SQL queries for maximum flexibility. Document both examples heavily --- Makefile | 13 ++-- api/README.md | 21 ++++++ api/fevermap/api/__init__.py | 12 ++++ api/fevermap/api/location.py | 85 +++++++++++++++++++++++++ api/test-api-location.sh | 25 ++++++++ api/{test-api.sh => test-api-submit.sh} | 3 - 6 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 api/fevermap/api/location.py create mode 100755 api/test-api-location.sh rename api/{test-api.sh => test-api-submit.sh} (98%) diff --git a/Makefile b/Makefile index ceb8142..3f5b607 100644 --- a/Makefile +++ b/Makefile @@ -26,13 +26,18 @@ test-extra: @echo "----------- Extra tests completed successfully -----------" test-api: - # Test common API requests (requires API server to be running) - # This always tests http://localhost:9000 and is intended only for local testing. - ./api/test-api.sh + # Test API submissions and responses + # Intended only for local testing and expects http://localhost:9000 to be running. + ./api/test-api-submit.sh + +test-api-stats: + # Test read-only API requests + # Intended only for local testing and expects http://localhost:9000 to be running. + ./api/test-api-location.sh test-data-generate: # Run test data generator inside the container, where it automatically has - # corrent permissions to access the database. + # correct permissions to access the database directly (bypassing API). docker exec -it fevermap_api_1 /app/test-data-generator.py update-master: diff --git a/api/README.md b/api/README.md index d698f80..cc78c6b 100644 --- a/api/README.md +++ b/api/README.md @@ -227,3 +227,24 @@ MariaDB [fevermap]> select * from submissions; +---------+---------------------+---------------------+--------------+------------+-----------------------------+---------------+---------------------+---------------------+-------------------+-----------------------+----------------------+--------------+--------------+--------------+ | 3937597 | 2020-04-13 07:18:45 | 2020-04-13 07:18:45 | 0 | NULL | NULL | NULL | NULL | NULL | 0 | US | 70-17710 | 22 | 60 | 187580 | +---------+---------------------+---------------------+--------------+------------+-----------------------------+---------------+---------------------+---------------------+-------------------+-----------------------+----------------------+--------------+--------------+--------------+ + +MariaDB [fevermap]> SELECT submissions.timestamp_created,fever_status,fever_temp,diagnosed_covid19,location_country_code,location_postal_code,location_lng,location_lat,submitter_id,device_id,birth_year,gender FROM submissions LEFT JOIN submitters ON submissions.submitter_id=submitters.id; ++---------------------+--------------+------------+-------------------+-----------------------+----------------------+--------------+--------------+--------------+---------------+------------+--------+ +| timestamp_created | fever_status | fever_temp | diagnosed_covid19 | location_country_code | location_postal_code | location_lng | location_lat | submitter_id | device_id | birth_year | gender | ++---------------------+--------------+------------+-------------------+-----------------------+----------------------+--------------+--------------+--------------+---------------+------------+--------+ +| 2020-04-16 16:12:14 | 0 | NULL | 0 | FI | 20100 | 22 | 60 | 1 | 1584694478111 | 2000 | F | +| 2020-04-16 16:12:14 | 0 | NULL | 0 | SE | 7017710 | 22 | 60 | 2 | 1584694478222 | 2000 | F | +| 2020-04-16 16:12:15 | 1 | 38 | 1 | FI | 20100 | 22 | 60 | 3 | 1584694478333 | 2000 | M | +| 2020-04-16 16:12:15 | 0 | NULL | 0 | US | 70-17710 | 22 | 60 | 4 | 1584694478444 | 2000 | M | +| 2020-04-16 16:12:15 | 0 | 37 | 1 | IE | H91 E2K3 | -9 | 53 | 5 | 1584694478555 | 1980 | M | +| 2020-04-16 16:12:15 | 0 | 37.3 | 0 | FI | 33100 | 24 | 61 | 6 | 1584694478666 | 1980 | M | +| 2020-04-16 16:12:15 | 0 | NULL | 0 | US | 70-17710 | 22 | 60 | 7 | 1587053535437 | 2000 | M | +| 2020-04-19 11:08:33 | 0 | NULL | 0 | FI | 20100 | 22 | 60 | 1 | 1584694478111 | 2000 | F | +| 2020-04-19 11:08:33 | 0 | NULL | 0 | SE | 7017710 | 22 | 60 | 2 | 1584694478222 | 2000 | F | +| 2020-04-19 11:08:33 | 1 | 38 | 1 | FI | 20100 | 22 | 60 | 3 | 1584694478333 | 2000 | M | +| 2020-04-19 11:08:33 | 0 | NULL | 0 | US | 70-17710 | 22 | 60 | 4 | 1584694478444 | 2000 | M | +| 2020-04-19 11:08:33 | 0 | 37 | 1 | IE | H91 E2K3 | -9 | 53 | 5 | 1584694478555 | 1980 | M | +| 2020-04-19 11:08:33 | 0 | 37.3 | 0 | FI | 33100 | 24 | 61 | 6 | 1584694478666 | 1980 | M | +| 2020-04-19 11:08:33 | 0 | NULL | 0 | US | 70-17710 | 22 | 60 | 8 | 1587294513836 | 2000 | M | +| 2020-04-19 11:08:40 | 0 | NULL | 0 | US | 70-17710 | 22 | 60 | 9 | 1587294520558 | 2000 | M | ++---------------------+--------------+------------+-------------------+-----------------------+----------------------+--------------+--------------+--------------+---------------+------------+--------+ diff --git a/api/fevermap/api/__init__.py b/api/fevermap/api/__init__.py index d684ff0..3240e9f 100644 --- a/api/fevermap/api/__init__.py +++ b/api/fevermap/api/__init__.py @@ -7,8 +7,12 @@ from flask_restful import Api from .ping import PingResource from .submission import SubmissionResource from .stats import StatsResource +from .location import LocationResource v0_blueprint = Blueprint('v0', __name__) + +# Use Flask API object +# https://flask.palletsprojects.com/en/1.1.x/api/ api = Api(v0_blueprint) api.add_resource(PingResource, '/ping') @@ -16,3 +20,11 @@ api.add_resource(PingResource, '/ping') api.add_resource(SubmissionResource, '/submit') api.add_resource(StatsResource, '/stats') + +# Arguments 'country_code' and 'postal_code' will be set automatically as +# arguments to the get() function in LocationResource +# https://flask.palletsprojects.com/en/1.1.x/api/#url-route-registrations +api.add_resource( + LocationResource, + '/location', + '/location/') diff --git a/api/fevermap/api/location.py b/api/fevermap/api/location.py new file mode 100644 index 0000000..678cf7c --- /dev/null +++ b/api/fevermap/api/location.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime + +from flask_restful import Resource + +from fevermap.db import db_session +from fevermap.db.models.submission import Submission + +from sqlalchemy import func + +from flask import Flask +app = Flask('fevermap') + + +class LocationResource(Resource): + + def get(self, country_code=None, **kwargs): + """Get data by location.""" + # @TODO: Add cache headers so Nginx will cache the respons and not + # hit the Flask server for all requests + + data = { + 'generated': datetime.now().isoformat(timespec='seconds'), + 'submissions': {} + } + + # If a country_code is given, return counts by postal codes + # within that country + if country_code: + + # Construct plain SQL query manually + # Note placeholder :country_code inside query, which does not need + # to have any quotes as SQLAlchemy will later handle it. + # Use only first 3 characters from postal code to have larger + # clusters of results and improve privacy as listing exact postal + # codes would yield lots of data points with very low values. + sql = """ + SELECT + SUBSTRING(location_postal_code,1,3) as post_code_prefix, + COUNT(*) AS count + FROM submissions + WHERE location_country_code = :country_code + GROUP BY post_code_prefix + ORDER BY post_code_prefix ASC + """ + + # Execute SQL query passing the variable parameters to it + # Load the full result set into a variable (beware that if the + # result set is big, this will be slow and consume a lot of + # memory, so try to make the SQL query as specific as possible) + result = db_session.execute( + sql, + {'country_code': country_code} + ).fetchall() + + # Iterate over result set and store the result items in a Python + # dict which later is automatically turned into the JSON response + for r in result: + data['submissions'][r.post_code_prefix] = r.count + + return { + 'success': True, + 'data': data, + 'country_code': country_code + # Include country so result set self-documents what country the + # postal codes listed apply for + } + + # Default response if no country_code was given, + # return counts by countries as a global list + result = db_session.query( + Submission.location_country_code, + func.count(Submission.location_country_code) + ).group_by(Submission.location_country_code).all() + + # Iterate over result set and store the result items in a dict + for item, count in result: + data['submissions'][item] = count + + # Python dict that becomes the response JSON + return { + 'success': True, + 'data': data, + } diff --git a/api/test-api-location.sh b/api/test-api-location.sh new file mode 100755 index 0000000..580d1f3 --- /dev/null +++ b/api/test-api-location.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e +# +# This runs dummy API queries (using curl) against the API server. +# +# The script is hard-coded to only run against a local development environment. +API_URL="http://localhost:9000" + +function api_test(){ + echo "-----------------------------------------------------------------------" + echo "==> Request endpoint: $1" + echo "<== Response:" + curl -iLsS "$API_URL/api/v0/$1" > /tmp/response + tail -n +7 /tmp/response + if ! grep --quiet "200 OK" /tmp/response + then + echo "^ Error, fix it!" + exit 1 + fi +} + +api_test stats +api_test stats?country=fi +api_test location +api_test location/fi diff --git a/api/test-api.sh b/api/test-api-submit.sh similarity index 98% rename from api/test-api.sh rename to api/test-api-submit.sh index 8710ff4..0fe64da 100755 --- a/api/test-api.sh +++ b/api/test-api-submit.sh @@ -167,6 +167,3 @@ api_test '{ "location_lng":"22.28", "location_lat":"60.45" }' - -# Also test the stats API -curl -iLsS "$API_URL/api/v0/stats" -- GitLab