From 20fb43ee2c032ba3ebc02ac838ce5596e0953538 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Thu, 13 Jun 2024 13:33:20 +0100 Subject: [PATCH] feat(analytics): Command to populate arbitrary periods of analytics data (#4155) --- .../management/commands/populate_buckets.py | 35 ++++++++ api/app_analytics/tasks.py | 8 +- api/tests/unit/app_analytics/test_commands.py | 71 ++++++++++++++++ api/tests/unit/app_analytics/test_tasks.py | 83 ++++++++++++++++++- 4 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 api/app_analytics/management/commands/populate_buckets.py create mode 100644 api/tests/unit/app_analytics/test_commands.py diff --git a/api/app_analytics/management/commands/populate_buckets.py b/api/app_analytics/management/commands/populate_buckets.py new file mode 100644 index 000000000000..14508a7db78a --- /dev/null +++ b/api/app_analytics/management/commands/populate_buckets.py @@ -0,0 +1,35 @@ +import argparse +from typing import Any + +from app_analytics.constants import ANALYTICS_READ_BUCKET_SIZE +from app_analytics.tasks import ( + populate_api_usage_bucket, + populate_feature_evaluation_bucket, +) +from django.conf import settings +from django.core.management import BaseCommand + +MINUTES_IN_DAY: int = 1440 + + +class Command(BaseCommand): + def add_arguments(self, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--days-to-populate", + type=int, + dest="days_to_populate", + help="Last n days to populate", + default=30, + ) + + def handle(self, *args: Any, days_to_populate: int, **options: Any) -> None: + if settings.USE_POSTGRES_FOR_ANALYTICS: + minutes_to_populate = MINUTES_IN_DAY * days_to_populate + populate_api_usage_bucket( + ANALYTICS_READ_BUCKET_SIZE, + minutes_to_populate, + ) + populate_feature_evaluation_bucket( + ANALYTICS_READ_BUCKET_SIZE, + minutes_to_populate, + ) diff --git a/api/app_analytics/tasks.py b/api/app_analytics/tasks.py index 0df21b9d5c4e..b7818a232e4b 100644 --- a/api/app_analytics/tasks.py +++ b/api/app_analytics/tasks.py @@ -145,10 +145,10 @@ def populate_api_usage_bucket( bucket_start_time, bucket_end_time, source_bucket_size ) for row in data: - APIUsageBucket.objects.create( + APIUsageBucket.objects.update_or_create( + defaults={"total_count": row["count"]}, environment_id=row["environment_id"], resource=row["resource"], - total_count=row["count"], bucket_size=bucket_size, created_at=bucket_start_time, ) @@ -162,10 +162,10 @@ def populate_feature_evaluation_bucket( bucket_start_time, bucket_end_time, source_bucket_size ) for row in data: - FeatureEvaluationBucket.objects.create( + FeatureEvaluationBucket.objects.update_or_create( + defaults={"total_count": row["count"]}, environment_id=row["environment_id"], feature_name=row["feature_name"], - total_count=row["count"], bucket_size=bucket_size, created_at=bucket_start_time, ) diff --git a/api/tests/unit/app_analytics/test_commands.py b/api/tests/unit/app_analytics/test_commands.py new file mode 100644 index 000000000000..0915b601565a --- /dev/null +++ b/api/tests/unit/app_analytics/test_commands.py @@ -0,0 +1,71 @@ +from typing import Any + +import pytest +from django.core.management import call_command +from pytest_django.fixtures import SettingsWrapper +from pytest_mock import MockerFixture + + +def test_populate_buckets__postgres_analytics_disabled__noop( + settings: SettingsWrapper, + mocker: MockerFixture, +) -> None: + # Given + settings.USE_POSTGRES_FOR_ANALYTICS = False + populate_api_usage_bucket_mock = mocker.patch( + "app_analytics.management.commands.populate_buckets.populate_api_usage_bucket" + ) + populate_feature_evaluation_bucket = mocker.patch( + "app_analytics.management.commands.populate_buckets.populate_feature_evaluation_bucket" + ) + + # When + call_command("populate_buckets") + + # Then + populate_api_usage_bucket_mock.assert_not_called() + populate_feature_evaluation_bucket.assert_not_called() + + +@pytest.mark.parametrize( + "options, expected_call_every", + [ + ({}, 43200), + ( + {"days_to_populate": 10}, + 14400, + ), + ], +) +def test_populate_buckets__postgres_analytics_enabled__calls_expected( + settings: SettingsWrapper, + mocker: MockerFixture, + options: dict[str, Any], + expected_call_every: int, +) -> None: + # Given + settings.USE_POSTGRES_FOR_ANALYTICS = True + populate_api_usage_bucket_mock = mocker.patch( + "app_analytics.management.commands.populate_buckets.populate_api_usage_bucket" + ) + populate_feature_evaluation_bucket = mocker.patch( + "app_analytics.management.commands.populate_buckets.populate_feature_evaluation_bucket" + ) + expected_bucket_size = 15 + mocker.patch( + "app_analytics.management.commands.populate_buckets.ANALYTICS_READ_BUCKET_SIZE", + new=expected_bucket_size, + ) + + # When + call_command("populate_buckets", **options) + + # Then + populate_api_usage_bucket_mock.assert_called_once_with( + expected_bucket_size, + expected_call_every, + ) + populate_feature_evaluation_bucket.assert_called_once_with( + expected_bucket_size, + expected_call_every, + ) diff --git a/api/tests/unit/app_analytics/test_tasks.py b/api/tests/unit/app_analytics/test_tasks.py index b9c021192a3e..3e485aa8c5c0 100644 --- a/api/tests/unit/app_analytics/test_tasks.py +++ b/api/tests/unit/app_analytics/test_tasks.py @@ -17,6 +17,7 @@ ) from django.conf import settings from django.utils import timezone +from freezegun.api import FrozenDateTimeFactory from pytest_django.fixtures import SettingsWrapper if "analytics" not in settings.DATABASES: @@ -40,7 +41,7 @@ def _create_api_usage_event(environment_id: str, when: datetime): @pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00") @pytest.mark.django_db(databases=["analytics"]) -def test_populate_api_usage_bucket_multiple_runs(freezer): +def test_populate_api_usage_bucket_multiple_runs(freezer: FrozenDateTimeFactory): # Given environment_id = 1 bucket_size = 15 @@ -111,7 +112,9 @@ def test_populate_api_usage_bucket_multiple_runs(freezer): ) @pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00") @pytest.mark.django_db(databases=["analytics"]) -def test_populate_api_usage_bucket(freezer, bucket_size, runs_every): +def test_populate_api_usage_bucket( + freezer: FrozenDateTimeFactory, bucket_size: int, runs_every: int +): # Given environment_id = 1 now = timezone.now() @@ -194,7 +197,7 @@ def test_track_feature_evaluation(): @pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00") @pytest.mark.django_db(databases=["analytics"]) -def test_populate_feature_evaluation_bucket_15m(freezer): +def test_populate_feature_evaluation_bucket_15m(freezer: FrozenDateTimeFactory): # Given environment_id = 1 bucket_size = 15 @@ -280,7 +283,79 @@ def test_populate_feature_evaluation_bucket_15m(freezer): @pytest.mark.freeze_time("2023-01-19T09:00:00+00:00") @pytest.mark.django_db(databases=["analytics"]) -def test_populate_api_usage_bucket_using_a_bucket(freezer): +def test_populate_feature_evaluation_bucket__upserts_buckets( + freezer: FrozenDateTimeFactory, +) -> None: + # Given + environment_id = 1 + bucket_size = 15 + feature_name = "feature1" + then = timezone.now() + + _create_feature_evaluation_event(environment_id, feature_name, 1, then) + + # move the time to 9:47 + freezer.move_to(timezone.now().replace(minute=47)) + + # populate buckets to have an existing one + populate_feature_evaluation_bucket(bucket_size=bucket_size, run_every=60) + + # add historical raw data + _create_feature_evaluation_event(environment_id, feature_name, 1, then) + + # When + # Feature usage is populated over existing buckets + populate_feature_evaluation_bucket(bucket_size=bucket_size, run_every=60) + + # Then + # Buckets are correctly set according to current raw data + buckets = FeatureEvaluationBucket.objects.filter( + environment_id=environment_id, + bucket_size=bucket_size, + ).all() + assert len(buckets) == 1 + assert buckets[0].total_count == 2 + + +@pytest.mark.freeze_time("2023-01-19T09:00:00+00:00") +@pytest.mark.django_db(databases=["analytics"]) +def test_populate_api_usage_bucket__upserts_buckets( + freezer: FrozenDateTimeFactory, +) -> None: + # Given + environment_id = 1 + bucket_size = 15 + + then = timezone.now() + + _create_api_usage_event(environment_id, then) + + # move the time to 9:47 + freezer.move_to(timezone.now().replace(minute=47)) + + # populate buckets to have an existing one + populate_api_usage_bucket(bucket_size=bucket_size, run_every=60) + + # add historical raw data + _create_api_usage_event(environment_id, then) + + # When + # API usage is populated over existing buckets + populate_api_usage_bucket(bucket_size=bucket_size, run_every=60) + + # Then + # Buckets are correctly set according to current raw data + buckets = APIUsageBucket.objects.filter( + environment_id=environment_id, + bucket_size=bucket_size, + ).all() + assert len(buckets) == 1 + assert buckets[0].total_count == 2 + + +@pytest.mark.freeze_time("2023-01-19T09:00:00+00:00") +@pytest.mark.django_db(databases=["analytics"]) +def test_populate_api_usage_bucket_using_a_bucket(freezer: FrozenDateTimeFactory): # Given environment_id = 1