From 117f72abb7ce74d75b05a8c5338245c926a39193 Mon Sep 17 00:00:00 2001 From: Gagan Date: Thu, 18 Jul 2024 19:21:28 +0530 Subject: [PATCH] feat(pg-usage-data): Add cache to batch tracking data (#4308) --- api/app/settings/common.py | 1 + api/app_analytics/cache.py | 33 +++++++++ api/app_analytics/middleware.py | 23 ++++-- .../migrations/0004_apiusageraw_count.py | 18 +++++ api/app_analytics/models.py | 1 + api/app_analytics/tasks.py | 7 +- .../unit/app_analytics/test_middleware.py | 57 +++++++++++++-- .../test_unit_app_analytics_cache.py | 73 +++++++++++++++++++ 8 files changed, 197 insertions(+), 16 deletions(-) create mode 100644 api/app_analytics/cache.py create mode 100644 api/app_analytics/migrations/0004_apiusageraw_count.py create mode 100644 api/tests/unit/app_analytics/test_unit_app_analytics_cache.py diff --git a/api/app/settings/common.py b/api/app/settings/common.py index 3593e1579176..948165862fac 100644 --- a/api/app/settings/common.py +++ b/api/app/settings/common.py @@ -328,6 +328,7 @@ INFLUXDB_ORG = env.str("INFLUXDB_ORG", default="") USE_POSTGRES_FOR_ANALYTICS = env.bool("USE_POSTGRES_FOR_ANALYTICS", default=False) +USE_CACHE_FOR_USAGE_DATA = env.bool("USE_CACHE_FOR_USAGE_DATA", default=False) ENABLE_API_USAGE_TRACKING = env.bool("ENABLE_API_USAGE_TRACKING", default=True) diff --git a/api/app_analytics/cache.py b/api/app_analytics/cache.py new file mode 100644 index 000000000000..aea7c84f7184 --- /dev/null +++ b/api/app_analytics/cache.py @@ -0,0 +1,33 @@ +from app_analytics.tasks import track_request +from django.utils import timezone + +CACHE_FLUSH_INTERVAL = 60 # seconds + + +class APIUsageCache: + def __init__(self): + self._cache = {} + self._last_flushed_at = timezone.now() + + def _flush(self): + for key, value in self._cache.items(): + track_request.delay( + kwargs={ + "resource": key[0], + "host": key[1], + "environment_key": key[2], + "count": value, + } + ) + + self._cache = {} + self._last_flushed_at = timezone.now() + + def track_request(self, resource: int, host: str, environment_key: str): + key = (resource, host, environment_key) + if key not in self._cache: + self._cache[key] = 1 + else: + self._cache[key] += 1 + if (timezone.now() - self._last_flushed_at).seconds > CACHE_FLUSH_INTERVAL: + self._flush() diff --git a/api/app_analytics/middleware.py b/api/app_analytics/middleware.py index 65bc2df4cca4..faa9ba67a8db 100644 --- a/api/app_analytics/middleware.py +++ b/api/app_analytics/middleware.py @@ -1,5 +1,8 @@ +from app_analytics.cache import APIUsageCache +from app_analytics.tasks import track_request +from django.conf import settings + from .models import Resource -from .tasks import track_request from .track import ( TRACKED_RESOURCE_ACTIONS, get_resource_from_uri, @@ -7,6 +10,8 @@ track_request_influxdb_async, ) +api_usage_cache = APIUsageCache() + class GoogleAnalyticsMiddleware: def __init__(self, get_response): @@ -41,13 +46,15 @@ def __init__(self, get_response): def __call__(self, request): resource = get_resource_from_uri(request.path) if resource in TRACKED_RESOURCE_ACTIONS: - track_request.delay( - kwargs={ - "resource": Resource.get_from_resource_name(resource), - "host": request.get_host(), - "environment_key": request.headers.get("X-Environment-Key"), - } - ) + kwargs = { + "resource": Resource.get_from_resource_name(resource), + "host": request.get_host(), + "environment_key": request.headers.get("X-Environment-Key"), + } + if settings.USE_CACHE_FOR_USAGE_DATA: + api_usage_cache.track_request(**kwargs) + else: + track_request.delay(kwargs=kwargs) response = self.get_response(request) diff --git a/api/app_analytics/migrations/0004_apiusageraw_count.py b/api/app_analytics/migrations/0004_apiusageraw_count.py new file mode 100644 index 000000000000..a997f44f0835 --- /dev/null +++ b/api/app_analytics/migrations/0004_apiusageraw_count.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2024-07-08 09:12 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('app_analytics', '0003_add_feature_name_index'), + ] + + operations = [ + migrations.AddField( + model_name='apiusageraw', + name='count', + field=models.PositiveIntegerField(default=1), + ), + ] diff --git a/api/app_analytics/models.py b/api/app_analytics/models.py index eada57ebd3c8..716b62dc14e4 100644 --- a/api/app_analytics/models.py +++ b/api/app_analytics/models.py @@ -32,6 +32,7 @@ class APIUsageRaw(models.Model): created_at = models.DateTimeField(auto_now_add=True) host = models.CharField(max_length=255) resource = models.IntegerField(choices=Resource.choices) + count = models.PositiveIntegerField(default=1) class Meta: index_together = (("environment_id", "created_at"),) diff --git a/api/app_analytics/tasks.py b/api/app_analytics/tasks.py index c1493ac5209f..dcc47478f101 100644 --- a/api/app_analytics/tasks.py +++ b/api/app_analytics/tasks.py @@ -3,7 +3,7 @@ from app_analytics.constants import ANALYTICS_READ_BUCKET_SIZE from django.conf import settings -from django.db.models import Count, Q, Sum +from django.db.models import Q, Sum from django.utils import timezone from task_processor.decorators import ( register_recurring_task, @@ -97,7 +97,7 @@ def track_feature_evaluation( @register_task_handler() -def track_request(resource: int, host: str, environment_key: str): +def track_request(resource: int, host: str, environment_key: str, count: int = 1): environment = Environment.get_from_cache(environment_key) if environment is None: return @@ -105,6 +105,7 @@ def track_request(resource: int, host: str, environment_key: str): environment_id=environment.id, resource=resource, host=host, + count=count, ) @@ -187,7 +188,7 @@ def _get_api_usage_source_data( return ( APIUsageRaw.objects.filter(filters) .values("environment_id", "resource") - .annotate(count=Count("id")) + .annotate(count=Sum("count")) ) diff --git a/api/tests/unit/app_analytics/test_middleware.py b/api/tests/unit/app_analytics/test_middleware.py index a1cfaabf274b..b792abc7b80a 100644 --- a/api/tests/unit/app_analytics/test_middleware.py +++ b/api/tests/unit/app_analytics/test_middleware.py @@ -1,6 +1,9 @@ import pytest from app_analytics.middleware import APIUsageMiddleware from app_analytics.models import Resource +from django.test import RequestFactory +from pytest_django.fixtures import SettingsWrapper +from pytest_mock import MockerFixture @pytest.mark.parametrize( @@ -12,13 +15,56 @@ ("/api/v1/environment-document", Resource.ENVIRONMENT_DOCUMENT), ], ) -def test_APIUsageMiddleware_calls_track_request_correctly( - rf, mocker, path, enum_resource_value -): +def test_APIUsageMiddleware_calls_track_request_correctly_with_cache( + rf: RequestFactory, + mocker: MockerFixture, + path: str, + enum_resource_value: int, + settings: SettingsWrapper, +) -> None: # Given environment_key = "test" headers = {"HTTP_X-Environment-Key": environment_key} request = rf.get(path, **headers) + settings.USE_CACHE_FOR_USAGE_DATA = True + + mocked_api_usage_cache = mocker.patch( + "app_analytics.middleware.api_usage_cache", autospec=True + ) + + mocked_get_response = mocker.MagicMock() + middleware = APIUsageMiddleware(mocked_get_response) + + # When + middleware(request) + + # Then + mocked_api_usage_cache.track_request.assert_called_once_with( + resource=enum_resource_value, host="testserver", environment_key=environment_key + ) + + +@pytest.mark.parametrize( + "path, enum_resource_value", + [ + ("/api/v1/flags", Resource.FLAGS), + ("/api/v1/traits", Resource.TRAITS), + ("/api/v1/identities", Resource.IDENTITIES), + ("/api/v1/environment-document", Resource.ENVIRONMENT_DOCUMENT), + ], +) +def test_APIUsageMiddleware_calls_track_request_correctly_without_cache( + rf: RequestFactory, + mocker: MockerFixture, + path: str, + enum_resource_value: int, + settings: SettingsWrapper, +) -> None: + # Given + environment_key = "test" + headers = {"HTTP_X-Environment-Key": environment_key} + request = rf.get(path, **headers) + settings.USE_CACHE_FOR_USAGE_DATA = False mocked_track_request = mocker.patch("app_analytics.middleware.track_request") @@ -39,13 +85,14 @@ def test_APIUsageMiddleware_calls_track_request_correctly( def test_APIUsageMiddleware_avoids_calling_track_request_if_resoure_is_not_tracked( - rf, mocker -): + rf: RequestFactory, mocker: MockerFixture, settings: SettingsWrapper +) -> None: # Given environment_key = "test" headers = {"HTTP_X-Environment-Key": environment_key} path = "/api/v1/unknown" request = rf.get(path, **headers) + settings.USE_CACHE_FOR_USAGE_DATA = False mocked_track_request = mocker.patch("app_analytics.middleware.track_request") diff --git a/api/tests/unit/app_analytics/test_unit_app_analytics_cache.py b/api/tests/unit/app_analytics/test_unit_app_analytics_cache.py new file mode 100644 index 000000000000..de5f9114d589 --- /dev/null +++ b/api/tests/unit/app_analytics/test_unit_app_analytics_cache.py @@ -0,0 +1,73 @@ +from app_analytics.cache import CACHE_FLUSH_INTERVAL, APIUsageCache +from app_analytics.models import Resource +from django.utils import timezone +from freezegun import freeze_time +from pytest_mock import MockerFixture + + +def test_api_usage_cache(mocker: MockerFixture) -> None: + # Given + cache = APIUsageCache() + now = timezone.now() + mocked_track_request_task = mocker.patch("app_analytics.cache.track_request") + host = "host" + environment_key_1 = "environment_key_1" + environment_key_2 = "environment_key_2" + + with freeze_time(now) as frozen_time: + # Make some tracking requests + for _ in range(10): + for resource in Resource: + cache.track_request(resource, host, environment_key_1) + cache.track_request(resource, host, environment_key_2) + + # make sure track_request task was not called + assert not mocked_track_request_task.called + + # Now, let's move the time forward + frozen_time.tick(CACHE_FLUSH_INTERVAL + 1) + + # let's track another request(to trigger flush) + cache.track_request( + Resource.FLAGS, + host, + environment_key_1, + ) + + # Then - track request lambda was called for every resource and environment_key combination + expected_calls = [] + for resource in Resource: + expected_calls.append( + mocker.call( + kwargs={ + "resource": resource, + "host": host, + "environment_key": environment_key_1, + "count": 11 if resource == Resource.FLAGS else 10, + } + ) + ) + expected_calls.append( + mocker.call( + kwargs={ + "resource": resource, + "host": host, + "environment_key": environment_key_2, + "count": 10, + } + ) + ) + mocked_track_request_task.delay.assert_has_calls(expected_calls) + + # Next, let's reset the mock + mocked_track_request_task.reset_mock() + + # and track another request + cache.track_request( + Resource.FLAGS, + host, + environment_key_1, + ) + + # finally, make sure track_request task was not called + assert not mocked_track_request_task.called