Skip to content

Commit

Permalink
feat(pg-usage-data): Add cache to batch tracking data (#4308)
Browse files Browse the repository at this point in the history
  • Loading branch information
gagantrivedi authored Jul 18, 2024
1 parent 691590d commit 117f72a
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 16 deletions.
1 change: 1 addition & 0 deletions api/app/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@
INFLUXDB_ORG = env.str("INFLUXDB_ORG", default="")

USE_POSTGRES_FOR_ANALYTICS = env.bool("USE_POSTGRES_FOR_ANALYTICS", default=False)
USE_CACHE_FOR_USAGE_DATA = env.bool("USE_CACHE_FOR_USAGE_DATA", default=False)

ENABLE_API_USAGE_TRACKING = env.bool("ENABLE_API_USAGE_TRACKING", default=True)

Expand Down
33 changes: 33 additions & 0 deletions api/app_analytics/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from app_analytics.tasks import track_request
from django.utils import timezone

CACHE_FLUSH_INTERVAL = 60 # seconds


class APIUsageCache:
def __init__(self):
self._cache = {}
self._last_flushed_at = timezone.now()

def _flush(self):
for key, value in self._cache.items():
track_request.delay(
kwargs={
"resource": key[0],
"host": key[1],
"environment_key": key[2],
"count": value,
}
)

self._cache = {}
self._last_flushed_at = timezone.now()

def track_request(self, resource: int, host: str, environment_key: str):
key = (resource, host, environment_key)
if key not in self._cache:
self._cache[key] = 1
else:
self._cache[key] += 1
if (timezone.now() - self._last_flushed_at).seconds > CACHE_FLUSH_INTERVAL:
self._flush()
23 changes: 15 additions & 8 deletions api/app_analytics/middleware.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from app_analytics.cache import APIUsageCache
from app_analytics.tasks import track_request
from django.conf import settings

from .models import Resource
from .tasks import track_request
from .track import (
TRACKED_RESOURCE_ACTIONS,
get_resource_from_uri,
track_request_googleanalytics_async,
track_request_influxdb_async,
)

api_usage_cache = APIUsageCache()


class GoogleAnalyticsMiddleware:
def __init__(self, get_response):
Expand Down Expand Up @@ -41,13 +46,15 @@ def __init__(self, get_response):
def __call__(self, request):
resource = get_resource_from_uri(request.path)
if resource in TRACKED_RESOURCE_ACTIONS:
track_request.delay(
kwargs={
"resource": Resource.get_from_resource_name(resource),
"host": request.get_host(),
"environment_key": request.headers.get("X-Environment-Key"),
}
)
kwargs = {
"resource": Resource.get_from_resource_name(resource),
"host": request.get_host(),
"environment_key": request.headers.get("X-Environment-Key"),
}
if settings.USE_CACHE_FOR_USAGE_DATA:
api_usage_cache.track_request(**kwargs)
else:
track_request.delay(kwargs=kwargs)

response = self.get_response(request)

Expand Down
18 changes: 18 additions & 0 deletions api/app_analytics/migrations/0004_apiusageraw_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.25 on 2024-07-08 09:12

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('app_analytics', '0003_add_feature_name_index'),
]

operations = [
migrations.AddField(
model_name='apiusageraw',
name='count',
field=models.PositiveIntegerField(default=1),
),
]
1 change: 1 addition & 0 deletions api/app_analytics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class APIUsageRaw(models.Model):
created_at = models.DateTimeField(auto_now_add=True)
host = models.CharField(max_length=255)
resource = models.IntegerField(choices=Resource.choices)
count = models.PositiveIntegerField(default=1)

class Meta:
index_together = (("environment_id", "created_at"),)
Expand Down
7 changes: 4 additions & 3 deletions api/app_analytics/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from app_analytics.constants import ANALYTICS_READ_BUCKET_SIZE
from django.conf import settings
from django.db.models import Count, Q, Sum
from django.db.models import Q, Sum
from django.utils import timezone
from task_processor.decorators import (
register_recurring_task,
Expand Down Expand Up @@ -97,14 +97,15 @@ def track_feature_evaluation(


@register_task_handler()
def track_request(resource: int, host: str, environment_key: str):
def track_request(resource: int, host: str, environment_key: str, count: int = 1):
environment = Environment.get_from_cache(environment_key)
if environment is None:
return
APIUsageRaw.objects.create(
environment_id=environment.id,
resource=resource,
host=host,
count=count,
)


Expand Down Expand Up @@ -187,7 +188,7 @@ def _get_api_usage_source_data(
return (
APIUsageRaw.objects.filter(filters)
.values("environment_id", "resource")
.annotate(count=Count("id"))
.annotate(count=Sum("count"))
)


Expand Down
57 changes: 52 additions & 5 deletions api/tests/unit/app_analytics/test_middleware.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pytest
from app_analytics.middleware import APIUsageMiddleware
from app_analytics.models import Resource
from django.test import RequestFactory
from pytest_django.fixtures import SettingsWrapper
from pytest_mock import MockerFixture


@pytest.mark.parametrize(
Expand All @@ -12,13 +15,56 @@
("/api/v1/environment-document", Resource.ENVIRONMENT_DOCUMENT),
],
)
def test_APIUsageMiddleware_calls_track_request_correctly(
rf, mocker, path, enum_resource_value
):
def test_APIUsageMiddleware_calls_track_request_correctly_with_cache(
rf: RequestFactory,
mocker: MockerFixture,
path: str,
enum_resource_value: int,
settings: SettingsWrapper,
) -> None:
# Given
environment_key = "test"
headers = {"HTTP_X-Environment-Key": environment_key}
request = rf.get(path, **headers)
settings.USE_CACHE_FOR_USAGE_DATA = True

mocked_api_usage_cache = mocker.patch(
"app_analytics.middleware.api_usage_cache", autospec=True
)

mocked_get_response = mocker.MagicMock()
middleware = APIUsageMiddleware(mocked_get_response)

# When
middleware(request)

# Then
mocked_api_usage_cache.track_request.assert_called_once_with(
resource=enum_resource_value, host="testserver", environment_key=environment_key
)


@pytest.mark.parametrize(
"path, enum_resource_value",
[
("/api/v1/flags", Resource.FLAGS),
("/api/v1/traits", Resource.TRAITS),
("/api/v1/identities", Resource.IDENTITIES),
("/api/v1/environment-document", Resource.ENVIRONMENT_DOCUMENT),
],
)
def test_APIUsageMiddleware_calls_track_request_correctly_without_cache(
rf: RequestFactory,
mocker: MockerFixture,
path: str,
enum_resource_value: int,
settings: SettingsWrapper,
) -> None:
# Given
environment_key = "test"
headers = {"HTTP_X-Environment-Key": environment_key}
request = rf.get(path, **headers)
settings.USE_CACHE_FOR_USAGE_DATA = False

mocked_track_request = mocker.patch("app_analytics.middleware.track_request")

Expand All @@ -39,13 +85,14 @@ def test_APIUsageMiddleware_calls_track_request_correctly(


def test_APIUsageMiddleware_avoids_calling_track_request_if_resoure_is_not_tracked(
rf, mocker
):
rf: RequestFactory, mocker: MockerFixture, settings: SettingsWrapper
) -> None:
# Given
environment_key = "test"
headers = {"HTTP_X-Environment-Key": environment_key}
path = "/api/v1/unknown"
request = rf.get(path, **headers)
settings.USE_CACHE_FOR_USAGE_DATA = False

mocked_track_request = mocker.patch("app_analytics.middleware.track_request")

Expand Down
73 changes: 73 additions & 0 deletions api/tests/unit/app_analytics/test_unit_app_analytics_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from app_analytics.cache import CACHE_FLUSH_INTERVAL, APIUsageCache
from app_analytics.models import Resource
from django.utils import timezone
from freezegun import freeze_time
from pytest_mock import MockerFixture


def test_api_usage_cache(mocker: MockerFixture) -> None:
# Given
cache = APIUsageCache()
now = timezone.now()
mocked_track_request_task = mocker.patch("app_analytics.cache.track_request")
host = "host"
environment_key_1 = "environment_key_1"
environment_key_2 = "environment_key_2"

with freeze_time(now) as frozen_time:
# Make some tracking requests
for _ in range(10):
for resource in Resource:
cache.track_request(resource, host, environment_key_1)
cache.track_request(resource, host, environment_key_2)

# make sure track_request task was not called
assert not mocked_track_request_task.called

# Now, let's move the time forward
frozen_time.tick(CACHE_FLUSH_INTERVAL + 1)

# let's track another request(to trigger flush)
cache.track_request(
Resource.FLAGS,
host,
environment_key_1,
)

# Then - track request lambda was called for every resource and environment_key combination
expected_calls = []
for resource in Resource:
expected_calls.append(
mocker.call(
kwargs={
"resource": resource,
"host": host,
"environment_key": environment_key_1,
"count": 11 if resource == Resource.FLAGS else 10,
}
)
)
expected_calls.append(
mocker.call(
kwargs={
"resource": resource,
"host": host,
"environment_key": environment_key_2,
"count": 10,
}
)
)
mocked_track_request_task.delay.assert_has_calls(expected_calls)

# Next, let's reset the mock
mocked_track_request_task.reset_mock()

# and track another request
cache.track_request(
Resource.FLAGS,
host,
environment_key_1,
)

# finally, make sure track_request task was not called
assert not mocked_track_request_task.called

0 comments on commit 117f72a

Please sign in to comment.