Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tasks-processor): Add recurring task to clean up old recurring task runs #3151

Merged
merged 3 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/app/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,9 @@
)
TASK_DELETE_RUN_TIME = env.time("TASK_DELETE_RUN_TIME", default="01:00")
TASK_DELETE_RUN_EVERY = env.timedelta("TASK_DELETE_RUN_EVERY", default=86400)
RECURRING_TASK_RUN_RETENTION_DAYS = env.int(
"RECURRING_TASK_RUN_RETENTION_DAYS", default=30
)
Comment on lines +913 to +915
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason not to use the same retention period for both types of task?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think we probably want to control this independently, e.g: we can probably go with a lower value here because it's not as critical as tasks?


# Real time(server sent events) settings
SSE_SERVER_BASE_URL = env.str("SSE_SERVER_BASE_URL", None)
Expand Down
16 changes: 15 additions & 1 deletion api/task_processor/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
register_recurring_task,
register_task_handler,
)
from task_processor.models import HealthCheckModel, Task
from task_processor.models import HealthCheckModel, RecurringTaskRun, Task

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -50,3 +50,17 @@ def clean_up_old_tasks():
0 : settings.TASK_DELETE_BATCH_SIZE # noqa:E203
]
).delete()


@register_recurring_task(
run_every=settings.TASK_DELETE_RUN_EVERY,
first_run_time=settings.TASK_DELETE_RUN_TIME,
)
def clean_up_old_recurring_task_runs():
if not settings.ENABLE_CLEAN_UP_OLD_TASKS:
return

now = timezone.now()
delete_before = now - timedelta(days=settings.RECURRING_TASK_RUN_RETENTION_DAYS)

RecurringTaskRun.objects.filter(finished_at__lt=delete_before).delete()
Comment on lines +55 to +66
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that we shouldn't just use the same task as the one which removes regular tasks / task runs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it's because of the name of the task mostly

83 changes: 81 additions & 2 deletions api/tests/unit/task_processor/test_unit_task_processor_tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from datetime import timedelta
from typing import Callable

from django.utils import timezone
from pytest_django.fixtures import SettingsWrapper

from task_processor.models import Task
from task_processor.tasks import clean_up_old_tasks
from task_processor.models import RecurringTask, RecurringTaskRun, Task
from task_processor.tasks import (
clean_up_old_recurring_task_runs,
clean_up_old_tasks,
)

now = timezone.now()
three_days_ago = now - timedelta(days=3)
Expand All @@ -23,6 +28,17 @@ def test_clean_up_old_tasks_does_nothing_when_no_tasks(db):
assert Task.objects.count() == 0


def test_clean_up_old_recurring_task_runs_does_nothing_when_no_runs(db: None) -> None:
# Given
assert RecurringTaskRun.objects.count() == 0

# When
clean_up_old_recurring_task_runs()

# Then
assert RecurringTaskRun.objects.count() == 0


def test_clean_up_old_tasks(settings, django_assert_num_queries, db):
# Given
settings.TASK_DELETE_RETENTION_DAYS = 2
Expand Down Expand Up @@ -71,6 +87,42 @@ def test_clean_up_old_tasks(settings, django_assert_num_queries, db):
]


def test_clean_up_old_recurring_task_runs(
settings: SettingsWrapper,
django_assert_num_queries: Callable[[int], None],
db: None,
) -> None:
# Given
settings.RECURRING_TASK_RUN_RETENTION_DAYS = 2
settings.ENABLE_CLEAN_UP_OLD_TASKS = True

recurring_task = RecurringTask.objects.create(
task_identifier="some_identifier", run_every=timedelta(seconds=1)
)

# 2 task runs finished before retention period
for _ in range(2):
RecurringTaskRun.objects.create(
started_at=three_days_ago,
task=recurring_task,
finished_at=three_days_ago,
)

# a task run that is within the retention period
task_in_retention_period = RecurringTaskRun.objects.create(
task=recurring_task,
started_at=one_day_ago,
finished_at=one_day_ago,
)

# When
with django_assert_num_queries(1):
clean_up_old_recurring_task_runs()

# Then
assert list(RecurringTaskRun.objects.all()) == [task_in_retention_period]


def test_clean_up_old_tasks_include_failed_tasks(
settings, django_assert_num_queries, db
):
Expand Down Expand Up @@ -106,3 +158,30 @@ def test_clean_up_old_tasks_does_not_run_if_disabled(

# Then
assert Task.objects.filter(id=task.id).exists()


def test_clean_up_old_recurring_task_runs_does_not_run_if_disabled(
settings: SettingsWrapper,
django_assert_num_queries: Callable[[int], None],
db: None,
) -> None:
# Given
settings.RECURRING_TASK_RUN_RETENTION_DAYS = 2
settings.ENABLE_CLEAN_UP_OLD_TASKS = False

recurring_task = RecurringTask.objects.create(
task_identifier="some_identifier", run_every=timedelta(seconds=1)
)

RecurringTaskRun.objects.create(
started_at=three_days_ago,
task=recurring_task,
finished_at=three_days_ago,
)

# When
with django_assert_num_queries(0):
clean_up_old_recurring_task_runs()

# Then
assert RecurringTaskRun.objects.exists()