thoughts about pc registry; performance of the pc registry

2026-02-16 11:29:16 -06:00 · 2024-01-05 23:07:31 +01:00
parent 3810ba18f4
commit 5cf1445eeb
8 changed files with 279 additions and 3 deletions
--- a/bugsink/settings.py
+++ b/bugsink/settings.py
@@ -41,6 +41,8 @@ INSTALLED_APPS = [
    'ingest',
    'issues',
    'events',
+
+    'performance',
 ]

 TAILWIND_APP_NAME = 'theme'
--- a/performance/init.py
+++ b/performance/init.py
--- a/performance/management/init.py
+++ b/performance/management/init.py
--- a/performance/management/commands/init.py
+++ b/performance/management/commands/init.py
--- a/performance/management/commands/document_performance_insights.py
+++ b/performance/management/commands/document_performance_insights.py
@@ -0,0 +1,161 @@
+from django.core.management.base import BaseCommand
+
+import random
+import time
+from datetime import datetime, timezone
+
+from django.conf import settings
+
+from bugsink.period_counter import _prev_tup, PeriodCounter
+from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
+from bugsink.registry import get_pc_registry
+
+from projects.models import Project
+from issues.models import Issue
+from events.models import Event
+
+
+# this file is the beginning of an approach to getting a handle on performance.
+
+
+class Command(BaseCommand):
+    help = "..."
+
+    def handle(self, *args, **options):
+        if "performance" not in str(settings.DATABASES["default"]["NAME"]):
+            raise ValueError("This command should only be run on the performance-test database")
+
+        print_thoughts_about_prev_tup()
+        print_thoughts_about_inc()
+        print_thoughts_about_event_evaluation()
+        print_thoughts_about_pc_registry()
+
+
+class passed_time(object):
+    def __enter__(self):
+        self.t0 = time.time()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.elapsed = (time.time() - self.t0) * 1_000  # miliseconds is a good unit for timeing things
+
+
+def print_thoughts_about_prev_tup():
+    v = (2020, 1, 1, 10, 10)
+    with passed_time() as t:
+        for i in range(1_000):
+            v = _prev_tup(v)
+
+    print(f"""## _prev_tup()
+
+1_000 iterations of _prev_tup in {t.elapsed:.3f}ms. The main thing we care about is not this little
+private helper though, but PeriodCounter.inc(). Let's test that next.
+
+""")
+
+
+def print_thoughts_about_inc():
+    random.seed(42)
+
+    pc = PeriodCounter()
+
+    # make sure the pc has some data before we start
+    for point in buckets_to_points_in_time(
+        generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
+        datetime(2020, 10, 15, tzinfo=timezone.utc),
+        datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
+        10_000,
+            ):
+
+        pc.inc(point)
+
+    points = buckets_to_points_in_time(
+        generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
+        datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
+        datetime(2021, 10, 16, 10, 5, tzinfo=timezone.utc),
+        1000)
+
+    with passed_time() as t:
+        for point in points:
+            pc.inc(point)
+
+    print(f"""## PeriodCounter.inc()
+
+1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. We care about evaluation of some event more though. Let's
+test that next.
+""")
+
+
+def print_thoughts_about_event_evaluation():
+    random.seed(42)
+
+    pc = PeriodCounter()
+
+    def noop():
+        pass
+
+    # Now, let's add some event-listeners. These are chosen to match a typical setup of quota for a given Issue or
+    # Project. In this setup, the monthly maximum is spread out in a way that the smaller parts are a bit more than just
+    # splitting things equally. Why? We want some flexibility for bursts of activity without using up the entire budget
+    # for a longer time all at once.
+    pc.add_event_listener("day",    30, 10_000, noop, noop, initial_event_state=False)  # 1 month rolling window
+    pc.add_event_listener("hour",   24,  1_000, noop, noop, initial_event_state=False)  # 1 day rolling window
+    pc.add_event_listener("minute", 60,    200, noop, noop, initial_event_state=False)  # 1 hour rolling window
+
+    # make sure the pc has some data before we start. we pick a 1-month period to match the listeners in the above.
+    for point in buckets_to_points_in_time(
+        generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
+        datetime(2021, 10, 15, tzinfo=timezone.utc),
+        datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
+        10_000,
+            ):
+
+        pc.inc(point)
+
+    # now we start the test: we generate a bursty data-set for a 1-day period, and see how long it takes to evaluate
+    points = buckets_to_points_in_time(
+        generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
+        datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
+        datetime(2021, 11, 16, 10, 5, tzinfo=timezone.utc),
+        1000)
+
+    with passed_time() as t:
+        for point in points:
+            pc.inc(point)
+
+    print(f"""## PeriodCounter.inc()
+
+1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. (when 3 event-listeners are active). I'm not sure exactly
+what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
+the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")
+
+
+def print_thoughts_about_pc_registry():
+    # note: in load_performance_insights we use minimal (non-data-containing) events here. this may not be
+    # representative of real world performance. having said that: this immediately triggers the thought that for real
+    # initialization only timestamps and issue_ids are needed, and that we should adjust the code accordingly
+
+    with passed_time() as t:
+        get_pc_registry()
+
+    print(f"""## get_pc_registry()
+
+getting the pc-registry takes {t.elapsed:.3f}ms. (with the default fixtures, which contain
+
+* { Project.objects.count() } projects,
+* { Issue.objects.count() } issues,
+* { Event.objects.count() } events
+
+This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
+
+* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
+  boot is fine.
+* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
+  first event/request is fine.
+
+Ways forward once we do decide to improve:
+
+* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
+  to the reader)
+* more granular caching/loading, e.g. load per project/issue on demand
+""")
--- a/performance/management/commands/load_performance_fixture.py
+++ b/performance/management/commands/load_performance_fixture.py
@@ -0,0 +1,61 @@
+from django.core.management.base import BaseCommand
+import uuid
+
+import random
+from datetime import datetime, timezone
+
+from django.conf import settings
+
+from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
+from projects.models import Project
+from issues.models import Issue
+from events.models import Event
+
+
+class Command(BaseCommand):
+    help = "..."
+
+    def handle(self, *args, **options):
+        if "performance" not in str(settings.DATABASES["default"]["NAME"]):
+            raise ValueError("This command should only be run on the performance-test database")
+
+        Project.objects.all().delete()
+        Issue.objects.all().delete()
+        Event.objects.all().delete()
+
+        # as a first approach, let's focus on a 'typical' (whatever that means) local setup (not hosted), for a small
+        # team.  maybe 10 people would work on max 10 projects. let's assume we have 10k per-project limits for events
+        # set up. and let's assume 100 issues per project (far from inbox-zero, approach bug-sewer territory)
+        #
+        projects = [Project.objects.create(name="project %s" % i) for i in range(10)]
+        issues_by_project = {}
+
+        for p in projects:
+            issues_by_project[p.id] = [Issue.objects.create(project=p, hash="hash %d" % i) for i in range(100)]
+
+        # now we have 10 projects, each with 100 issues. let's create 10k events for each project.
+        for p in projects:
+            print("loading 10k events for project", p.name)
+            points = buckets_to_points_in_time(
+                generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
+                datetime(2020, 10, 15, tzinfo=timezone.utc),
+                datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
+                10_000,
+            )
+
+            for i, point in enumerate(points):
+                if i % 1_000 == 0:
+                    print("loaded", i, "events")
+
+                # note: because we use such minimal (non-data-containing) events here, the setup in the below may
+                # actually not be representative of real world performance.
+                Event.objects.create(
+                    project=p,
+                    issue=random.choice(issues_by_project[p.id]),
+                    server_side_timestamp=point,
+                    timestamp=point,
+                    event_id=uuid.uuid4().hex,
+                    has_exception=True,
+                    has_logentry=True,
+                    data="{}",
+                )
--- a/performance/out/some_script.txt
+++ b/performance/out/some_script.txt
@@ -1,16 +1,37 @@
 ## _prev_tup()

-1_000 iterations of _prev_tup in 0.816ms. The main thing we care about is not this little
+1_000 iterations of _prev_tup in 0.832ms. The main thing we care about is not this little
 private helper though, but PeriodCounter.inc(). Let's test that next.


 ## PeriodCounter.inc()

-1_000 iterations of PeriodCounter.inc() in 7.766ms. We care about evaluation of some event more though. Let's
+1_000 iterations of PeriodCounter.inc() in 7.885ms. We care about evaluation of some event more though. Let's
 test that next.

 ## PeriodCounter.inc()

-1_000 iterations of PeriodCounter.inc() in 29.593ms. (when 3 event-listeners are active). I'm not sure exactly
+1_000 iterations of PeriodCounter.inc() in 29.567ms. (when 3 event-listeners are active). I'm not sure exactly
 what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
 the period-counter uses up 3% of the budget. A first guess would be: this is good enough.
+## get_pc_registry()
+
+getting the pc-registry takes 6615.371ms. (with the default fixtures, which contain
+
+* 10 projects,
+* 1000 issues,
+* 100000 events
+
+This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
+
+* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
+  boot is fine.
+* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
+  first event/request is fine.
+
+Ways forward once we do decide to improve:
+
+* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
+  to the reader)
+* more granular caching/loading, e.g. load per project/issue on demand
+
--- a/performance/some_script.py
+++ b/performance/some_script.py
@@ -5,6 +5,9 @@ from datetime import datetime, timezone
 from bugsink.period_counter import _prev_tup, PeriodCounter

 from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
+from projects.models import Project
+from issues.models import Issue
+from events.models import Event


 # this file is the beginning of an approach to getting a handle on performance.
@@ -109,6 +112,34 @@ what a good performance would be here, but I can say the following: this means w
 the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")


+def print_thoughts_about_pc_registry():
+    # as a first approach, let's focus on a 'typical' (whatever that means) local setup (not hosted), for a small team.
+    # maybe 10 people would work on max 10 projects. let's assume we have 10k per-project limits for events set up. and
+    # let's assume 100 issues per project (far from inbox-zero, approach bug-sewer territory)
+    #
+    projects = [Project.objects.create(name="project %s" % i) for i in range(10)]
+    issues_by_project = {}
+
+    for p in projects:
+        issues_by_project[p.id] = [Issue.objects.create(project=p, hash="hash %d" % i) for i in range(100)]
+
+    # now we have 10 projects, each with 100 issues. let's create 10k events for each project.
+    for p in projects:
+        points = buckets_to_points_in_time(
+            generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
+            datetime(2020, 10, 15, tzinfo=timezone.utc),
+            datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
+            10_000,
+        )
+
+        for point in points:
+            # note: because we use such minimal (non-data-containing) events here, the setup in the below may actually
+            # not be representative of real world performance. having said that: this immediately triggers the thought
+            # that for real initialization only timestamps and issue_ids are needed, and that we should adjust the code
+            # accordingly
+            Event.objects.create(project=p, issue=random.choice(issues_by_project[p.id]), server_side_timestamp=point)
+
+
 print_thoughts_about_prev_tup()
 print_thoughts_about_inc()
 print_thoughts_about_event_evaluation()