First version of multi-tenant setup (EE)

2026-01-11 15:50:36 -06:00 · 2025-01-28 13:54:55 +01:00
parent d30c3ad704
commit 59372aba33
15 changed files with 221 additions and 3 deletions
--- a/ee/LICENSE
+++ b/ee/LICENSE
@@ -0,0 +1,5 @@
+There is currently no publically available "ee" "ee" license; if you have
+obtained a specific license from Bugsink B.V. directly, you may use that
+license to access the Software in this directory. If you have not obtained a
+specific license from Bugsink B.V., you may not use the Software in this
+directory.
--- a/ee/init.py
+++ b/ee/init.py
--- a/ee/tenants/init.py
+++ b/ee/tenants/init.py
--- a/ee/tenants/base.py
+++ b/ee/tenants/base.py
@@ -0,0 +1,40 @@
+from contextlib import contextmanager
+import threading
+
+local_storage = threading.local()
+# we use a list to allow for nesting; even though in practice nesting will always be of the same values, this allows
+# for correct tracking of the depth (such that we don't return None after push-push-pop)
+local_storage.tenant_subdomain = []
+global_tenant_subdomain = None  # cross-threads, for single-tenant usages (e.g. commands using os.get_env)
+
+
+def set_tenant_subdomain(tenant_subdomain):
+    if not hasattr(local_storage, "tenant_subdomain"):  # lazy init; I'm not 100% sure why needed
+        local_storage.tenant_subdomain = []
+
+    local_storage.tenant_subdomain.append(tenant_subdomain)
+
+
+def set_global_tenant_subdomain(tenant_subdomain):
+    global global_tenant_subdomain
+    global_tenant_subdomain = tenant_subdomain
+
+
+@contextmanager
+def use_tenant_subdomain(tenant_subdomain):
+    set_tenant_subdomain(tenant_subdomain)
+    yield
+    local_storage.tenant_subdomain.pop()
+
+
+def get_tenant_subdomain():
+    if global_tenant_subdomain is not None:
+        return global_tenant_subdomain
+
+    if not hasattr(local_storage, "tenant_subdomain"):  # lazy init; I'm not 100% sure why needed
+        local_storage.tenant_subdomain = []
+
+    if local_storage.tenant_subdomain == []:
+        return None
+
+    return local_storage.tenant_subdomain[-1]
--- a/ee/tenants/database_backend/init.py
+++ b/ee/tenants/database_backend/init.py
--- a/ee/tenants/database_backend/base.py
+++ b/ee/tenants/database_backend/base.py
@@ -0,0 +1,66 @@
+from django.http import Http404
+from django.db.backends.base.base import DEFAULT_DB_ALIAS
+
+from bugsink.timed_sqlite_backend.base import DatabaseWrapper as TimedDatabaseWrapper
+
+from ee.tenants.base import get_tenant_subdomain
+
+
+class DatabaseWrapper(TimedDatabaseWrapper):
+    """
+    DatabaseWrapper w/ TENANTS
+
+    We implement tenant-switching as a Database backend as per:
+    https://forum.djangoproject.com/t/use-database-routers-to-pick-a-database-connection-for-transaction-api-by-default/29744/7
+
+    adamchainz says:
+    > I think you would instead be best doing this within a custom database backend
+
+    roads not taken as per the forum post and other sources:
+    * routers -> probably a better fit for per-model switching; will not work well with transaction(using=...);
+      introduces new uglyness b/c separate snappea DB that the present solution avoids.
+    * overriding settings -> no, per https://github.com/django/django/blob/888b9042b359/django/test/signals.py#L163-L171
+
+    The basic implementation idea is that:
+
+    [1] the model for starting connections is quite straightforward;
+    * in request/response this is "near the beginning of the request", b/c CONN_MAX_AGE=0 (means: max between requests)
+    * in snappea it's also clearly defined, as part of `non_failing_function` which is run in a thread
+    * for the rest there is no multi-tenancy on the application level (we are single-tenant and pass in an env var)
+    the above means that the thing that determines the tenant (request, snappea task) implies get_connection. Which
+    means we just need to set the tenant at that moment. Which we do, via get_connection_params().
+
+    [2] I did a manual check on the superclasses (up to base/base.py) for `NAME` and `settings_dict`, and, there are 2
+    locations (only) worth overriding (from sqlite3/base.py): get_connection_params and is_in_memory_db.
+
+    another road-not-taken: making a _backend_ per tenant, rather than switching per-connection and relying on [1]
+    above. such an approach won't work because of the connection-specific state which would still be on the present
+    object (the "switching" object).
+    """
+
+    def __init__(self, settings_dict, alias=DEFAULT_DB_ALIAS):
+        self.tenants = settings_dict["TENANTS"]
+
+        super().__init__({k: v for k, v in settings_dict.items() if k != "TENANTS"}, alias)
+
+    def get_tenant(self):
+        subdomain = get_tenant_subdomain()
+        if subdomain is None:
+            raise Exception("Cannot determine subdomain outside of request/response loop")
+
+        if subdomain not in self.tenants:
+            # shouldn't happen 'in practice' (because there would be no certificate then)
+            raise Http404(f"No such site: {subdomain}.bugsink.com not found")
+
+        return self.tenants[subdomain]
+
+    def get_connection_params(self):
+        # We just mutate the settings_dict here (the alternative is waaay too much copy-pasting), mutating back after.
+        try:
+            self.settings_dict["NAME"] = self.get_tenant()
+            return super().get_connection_params()
+        finally:
+            del self.settings_dict["NAME"]
+
+    def is_in_memory_db(self):
+        return False  # we _know_ we don't do multi-tenant in memory, so this is simpler than some lookup.
--- a/ee/tenants/middleware.py
+++ b/ee/tenants/middleware.py
@@ -0,0 +1,16 @@
+from .base import use_tenant_subdomain
+
+
+class SelectDatabaseMiddleware:
+
+    def __init__(self, get_response):
+        self.get_response = get_response
+
+    def __call__(self, request):
+        host = request.get_host()
+        request_subdomain = host.split('.')[0].split(':')[0]
+
+        with use_tenant_subdomain(request_subdomain):
+            response = self.get_response(request)
+
+        return response
--- a/ee/tenants/utils.py
+++ b/ee/tenants/utils.py
@@ -0,0 +1,41 @@
+from contextlib import contextmanager
+
+from .base import get_tenant_subdomain, use_tenant_subdomain
+
+
+def add_tenant_subdomain_to_kwargs():
+    tenant_subdomain = get_tenant_subdomain()
+    assert tenant_subdomain is not None, "Must have tenant set to be able to pass this to snappea"
+    return {"TENANT_SUBDOMAIN": tenant_subdomain}
+
+
+@contextmanager
+def pop_tenant_subdomain_from_kwargs(args, kwargs):
+    if "TENANT_SUBDOMAIN" not in kwargs:
+        raise Exception("To run this task I need to have a tenant subdomain, can't find that")
+
+    tenant = kwargs.pop("TENANT_SUBDOMAIN")
+
+    with use_tenant_subdomain(tenant):
+        yield
+
+
+class TenantBaseURL:
+    """'lazy' evaluating drop-in for BASE_URL strings that fills in the TENANT on-demand; I've evaluated the current
+    uses of BASE_URL when writing this, forcing evaulation where needed (when not covered by __add__)."""
+
+    def __init__(self, format_domain):
+        self.format_domain = format_domain
+
+    def fmt(self):
+        return self.format_domain % get_tenant_subdomain()
+
+    def __add__(self, other):
+        return self.fmt() + other
+
+    def __str__(self):
+        return self.fmt()
+
+    def endswith(self, suffix):
+        # needed b/c BASE_URL is cleaned up w/ this helper
+        return self.fmt().endswith(suffix)
--- a/ingest/views.py
+++ b/ingest/views.py
@@ -92,7 +92,7 @@ class BaseIngestAPIView(View):
            # The dsn we show is reconstructed _as we understand it at this point in the code_, which is precisely what
            # you want to show as a first step towards debugging issues with SDKs with faulty authentication (a rather
            # common scenario).
-            dsn = build_dsn(get_settings().BASE_URL, project_pk, sentry_key)
+            dsn = build_dsn(str(get_settings().BASE_URL), project_pk, sentry_key)
            raise exceptions.PermissionDenied("Project not found or key incorrect: %s" % dsn)

    @classmethod
--- a/projects/models.py
+++ b/projects/models.py
@@ -78,7 +78,7 @@ class Project(models.Model):

    @property
    def dsn(self):
-        return build_dsn(get_settings().BASE_URL, self.id, self.sentry_key.hex)
+        return build_dsn(str(get_settings().BASE_URL), self.id, self.sentry_key.hex)

    def get_latest_release(self):
        from releases.models import ordered_releases
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,7 @@ include = [
    "bugsink*",
    "compat*",
    "events*",
+    "ee*",
    "ingest*",
    "issues*",
    "performance*",
--- a/snappea/decorators.py
+++ b/snappea/decorators.py
@@ -6,6 +6,7 @@ from performance.context_managers import time_to_logger
 from . import registry
 from .models import Task, wakeup_server
 from .settings import get_settings
+from .utils import add_task_kwargs

 performance_logger = logging.getLogger("bugsink.performance.snappea")

@@ -26,6 +27,7 @@ def shared_task(function):
            # No need for a transaction: we just write something (not connected to any other object, and we will never
            # touch it again). Counterpoint: if we'd have a transaction, we could distinguish between "wait for write
            # lock" and "actually write".
+            kwargs.update(add_task_kwargs())
            Task.objects.create(task_name=name, args=json.dumps(args), kwargs=json.dumps(kwargs))

            # not necessary: `connections["snappea"].close()`; Django does this at the end of the request and the
--- a/snappea/foreman.py
+++ b/snappea/foreman.py
@@ -22,6 +22,8 @@ from . import registry
 from .models import Task
 from .datastructures import Workers
 from .settings import get_settings
+from .utils import run_task_context
+

 logger = logging.getLogger("snappea.foreman")
 performance_logger = logging.getLogger("bugsink.performance.snappea")
@@ -170,7 +172,9 @@ class Foreman:
        def non_failing_function(*inner_args, **inner_kwargs):
            t0 = time.time()
            try:
-                function(*inner_args, **inner_kwargs)
+                with run_task_context(inner_args, inner_kwargs):
+                    function(*inner_args, **inner_kwargs)
+
            except Exception as e:
                # Potential TODO: make this configurable / depend on our existing config in bugsink/settings.py
                logger.warning("Snappea caught Exception: %s", str(e))
--- a/snappea/settings.py
+++ b/snappea/settings.py
@@ -32,6 +32,8 @@ DEFAULTS = {

    "TASK_QS_LIMIT": 100,

+    "HOOK_ADD_TASK_KWARGS": "snappea.utils.dont_add_anything",
+    "HOOK_RUN_TASK_CONTEXT": "snappea.utils.no_context",
 }


--- a/snappea/utils.py
+++ b/snappea/utils.py
@@ -0,0 +1,41 @@
+from contextlib import contextmanager
+import importlib
+
+from .settings import get_settings
+
+
+def add_task_kwargs():
+    """Hook for extending Task kwargs"""
+
+    if not hasattr(add_task_kwargs, "func"):
+        # the configured function is cached on add_task_kwargs itself
+        hook = get_settings().HOOK_ADD_TASK_KWARGS
+        module_name, function_name = hook.rsplit('.', 1)
+        module = importlib.import_module(module_name)
+        add_task_kwargs.func = getattr(module, function_name)
+
+    return add_task_kwargs.func()
+
+
+def run_task_context(task_args, task_kwargs):
+    """Hook for running a task in a context; the task's args and kwargs are passed for optional pre-processing"""
+
+    if not hasattr(add_task_kwargs, "func"):
+        # the configured function is cached on run_task_context itself
+        hook = get_settings().HOOK_RUN_TASK_CONTEXT
+        module_name, function_name = hook.rsplit('.', 1)
+        module = importlib.import_module(module_name)
+        run_task_context.func = getattr(module, function_name)
+
+    return run_task_context.func(task_args, task_kwargs)
+
+
+def dont_add_anything():
+    # no-op impl of add_task_kwargs
+    return {}
+
+
+@contextmanager
+def no_context(task_args, task_kwargs):
+    # no-op impl of run_task_context
+    yield