Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(analytics): Command to populate arbitrary periods of analytics data #4155

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions api/app_analytics/management/commands/populate_buckets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import argparse
from typing import Any

from app_analytics.constants import ANALYTICS_READ_BUCKET_SIZE
from app_analytics.tasks import (
populate_api_usage_bucket,
populate_feature_evaluation_bucket,
)
from django.conf import settings
from django.core.management import BaseCommand

MINUTES_IN_DAY: int = 1440


class Command(BaseCommand):
def add_arguments(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--days-to-populate",
type=int,
dest="days_to_populate",
help="Last n days to populate",
default=30,
)

def handle(self, *args: Any, days_to_populate: int, **options: Any) -> None:
if settings.USE_POSTGRES_FOR_ANALYTICS:
minutes_to_populate = MINUTES_IN_DAY * days_to_populate
populate_api_usage_bucket(
ANALYTICS_READ_BUCKET_SIZE,
minutes_to_populate,
)
populate_feature_evaluation_bucket(
ANALYTICS_READ_BUCKET_SIZE,
minutes_to_populate,
)
8 changes: 4 additions & 4 deletions api/app_analytics/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ def populate_api_usage_bucket(
bucket_start_time, bucket_end_time, source_bucket_size
)
for row in data:
APIUsageBucket.objects.create(
APIUsageBucket.objects.update_or_create(
defaults={"total_count": row["count"]},
environment_id=row["environment_id"],
resource=row["resource"],
total_count=row["count"],
bucket_size=bucket_size,
created_at=bucket_start_time,
)
Expand All @@ -162,10 +162,10 @@ def populate_feature_evaluation_bucket(
bucket_start_time, bucket_end_time, source_bucket_size
)
for row in data:
FeatureEvaluationBucket.objects.create(
FeatureEvaluationBucket.objects.update_or_create(
defaults={"total_count": row["count"]},
environment_id=row["environment_id"],
feature_name=row["feature_name"],
total_count=row["count"],
bucket_size=bucket_size,
created_at=bucket_start_time,
)
Expand Down
71 changes: 71 additions & 0 deletions api/tests/unit/app_analytics/test_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Any

import pytest
from django.core.management import call_command
from pytest_django.fixtures import SettingsWrapper
from pytest_mock import MockerFixture


def test_populate_buckets__postgres_analytics_disabled__noop(
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
# Given
settings.USE_POSTGRES_FOR_ANALYTICS = False
populate_api_usage_bucket_mock = mocker.patch(
"app_analytics.management.commands.populate_buckets.populate_api_usage_bucket"
)
populate_feature_evaluation_bucket = mocker.patch(
"app_analytics.management.commands.populate_buckets.populate_feature_evaluation_bucket"
)

# When
call_command("populate_buckets")

# Then
populate_api_usage_bucket_mock.assert_not_called()
populate_feature_evaluation_bucket.assert_not_called()


@pytest.mark.parametrize(
"options, expected_call_every",
[
({}, 43200),
(
{"days_to_populate": 10},
14400,
),
],
)
def test_populate_buckets__postgres_analytics_enabled__calls_expected(
settings: SettingsWrapper,
mocker: MockerFixture,
options: dict[str, Any],
expected_call_every: int,
) -> None:
# Given
settings.USE_POSTGRES_FOR_ANALYTICS = True
populate_api_usage_bucket_mock = mocker.patch(
"app_analytics.management.commands.populate_buckets.populate_api_usage_bucket"
)
populate_feature_evaluation_bucket = mocker.patch(
"app_analytics.management.commands.populate_buckets.populate_feature_evaluation_bucket"
)
expected_bucket_size = 15
mocker.patch(
"app_analytics.management.commands.populate_buckets.ANALYTICS_READ_BUCKET_SIZE",
new=expected_bucket_size,
)

# When
call_command("populate_buckets", **options)

# Then
populate_api_usage_bucket_mock.assert_called_once_with(
expected_bucket_size,
expected_call_every,
)
populate_feature_evaluation_bucket.assert_called_once_with(
expected_bucket_size,
expected_call_every,
)
83 changes: 79 additions & 4 deletions api/tests/unit/app_analytics/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from django.conf import settings
from django.utils import timezone
from freezegun.api import FrozenDateTimeFactory
from pytest_django.fixtures import SettingsWrapper

if "analytics" not in settings.DATABASES:
Expand All @@ -40,7 +41,7 @@ def _create_api_usage_event(environment_id: str, when: datetime):

@pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_api_usage_bucket_multiple_runs(freezer):
def test_populate_api_usage_bucket_multiple_runs(freezer: FrozenDateTimeFactory):
# Given
environment_id = 1
bucket_size = 15
Expand Down Expand Up @@ -111,7 +112,9 @@ def test_populate_api_usage_bucket_multiple_runs(freezer):
)
@pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_api_usage_bucket(freezer, bucket_size, runs_every):
def test_populate_api_usage_bucket(
freezer: FrozenDateTimeFactory, bucket_size: int, runs_every: int
):
# Given
environment_id = 1
now = timezone.now()
Expand Down Expand Up @@ -194,7 +197,7 @@ def test_track_feature_evaluation():

@pytest.mark.freeze_time("2023-01-19T09:09:47.325132+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_feature_evaluation_bucket_15m(freezer):
def test_populate_feature_evaluation_bucket_15m(freezer: FrozenDateTimeFactory):
# Given
environment_id = 1
bucket_size = 15
Expand Down Expand Up @@ -280,7 +283,79 @@ def test_populate_feature_evaluation_bucket_15m(freezer):

@pytest.mark.freeze_time("2023-01-19T09:00:00+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_api_usage_bucket_using_a_bucket(freezer):
def test_populate_feature_evaluation_bucket__upserts_buckets(
freezer: FrozenDateTimeFactory,
) -> None:
# Given
environment_id = 1
bucket_size = 15
feature_name = "feature1"
then = timezone.now()

_create_feature_evaluation_event(environment_id, feature_name, 1, then)

# move the time to 9:47
freezer.move_to(timezone.now().replace(minute=47))

# populate buckets to have an existing one
populate_feature_evaluation_bucket(bucket_size=bucket_size, run_every=60)

# add historical raw data
_create_feature_evaluation_event(environment_id, feature_name, 1, then)

# When
# Feature usage is populated over existing buckets
populate_feature_evaluation_bucket(bucket_size=bucket_size, run_every=60)

# Then
# Buckets are correctly set according to current raw data
buckets = FeatureEvaluationBucket.objects.filter(
environment_id=environment_id,
bucket_size=bucket_size,
).all()
assert len(buckets) == 1
assert buckets[0].total_count == 2


@pytest.mark.freeze_time("2023-01-19T09:00:00+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_api_usage_bucket__upserts_buckets(
freezer: FrozenDateTimeFactory,
) -> None:
# Given
environment_id = 1
bucket_size = 15

then = timezone.now()

_create_api_usage_event(environment_id, then)

# move the time to 9:47
freezer.move_to(timezone.now().replace(minute=47))

# populate buckets to have an existing one
populate_api_usage_bucket(bucket_size=bucket_size, run_every=60)

# add historical raw data
_create_api_usage_event(environment_id, then)

# When
# API usage is populated over existing buckets
populate_api_usage_bucket(bucket_size=bucket_size, run_every=60)

# Then
# Buckets are correctly set according to current raw data
buckets = APIUsageBucket.objects.filter(
environment_id=environment_id,
bucket_size=bucket_size,
).all()
assert len(buckets) == 1
assert buckets[0].total_count == 2


@pytest.mark.freeze_time("2023-01-19T09:00:00+00:00")
@pytest.mark.django_db(databases=["analytics"])
def test_populate_api_usage_bucket_using_a_bucket(freezer: FrozenDateTimeFactory):
# Given
environment_id = 1

Expand Down
Loading