First version of multi-tenant setup (EE)

This commit is contained in:
Klaas van Schelven
2025-01-28 13:54:55 +01:00
parent d30c3ad704
commit 59372aba33
15 changed files with 221 additions and 3 deletions

5
ee/LICENSE Normal file
View File

@@ -0,0 +1,5 @@
There is currently no publically available "ee" "ee" license; if you have
obtained a specific license from Bugsink B.V. directly, you may use that
license to access the Software in this directory. If you have not obtained a
specific license from Bugsink B.V., you may not use the Software in this
directory.

0
ee/__init__.py Normal file
View File

0
ee/tenants/__init__.py Normal file
View File

40
ee/tenants/base.py Normal file
View File

@@ -0,0 +1,40 @@
from contextlib import contextmanager
import threading
local_storage = threading.local()
# we use a list to allow for nesting; even though in practice nesting will always be of the same values, this allows
# for correct tracking of the depth (such that we don't return None after push-push-pop)
local_storage.tenant_subdomain = []
global_tenant_subdomain = None # cross-threads, for single-tenant usages (e.g. commands using os.get_env)
def set_tenant_subdomain(tenant_subdomain):
if not hasattr(local_storage, "tenant_subdomain"): # lazy init; I'm not 100% sure why needed
local_storage.tenant_subdomain = []
local_storage.tenant_subdomain.append(tenant_subdomain)
def set_global_tenant_subdomain(tenant_subdomain):
global global_tenant_subdomain
global_tenant_subdomain = tenant_subdomain
@contextmanager
def use_tenant_subdomain(tenant_subdomain):
set_tenant_subdomain(tenant_subdomain)
yield
local_storage.tenant_subdomain.pop()
def get_tenant_subdomain():
if global_tenant_subdomain is not None:
return global_tenant_subdomain
if not hasattr(local_storage, "tenant_subdomain"): # lazy init; I'm not 100% sure why needed
local_storage.tenant_subdomain = []
if local_storage.tenant_subdomain == []:
return None
return local_storage.tenant_subdomain[-1]

View File

View File

@@ -0,0 +1,66 @@
from django.http import Http404
from django.db.backends.base.base import DEFAULT_DB_ALIAS
from bugsink.timed_sqlite_backend.base import DatabaseWrapper as TimedDatabaseWrapper
from ee.tenants.base import get_tenant_subdomain
class DatabaseWrapper(TimedDatabaseWrapper):
"""
DatabaseWrapper w/ TENANTS
We implement tenant-switching as a Database backend as per:
https://forum.djangoproject.com/t/use-database-routers-to-pick-a-database-connection-for-transaction-api-by-default/29744/7
adamchainz says:
> I think you would instead be best doing this within a custom database backend
roads not taken as per the forum post and other sources:
* routers -> probably a better fit for per-model switching; will not work well with transaction(using=...);
introduces new uglyness b/c separate snappea DB that the present solution avoids.
* overriding settings -> no, per https://github.com/django/django/blob/888b9042b359/django/test/signals.py#L163-L171
The basic implementation idea is that:
[1] the model for starting connections is quite straightforward;
* in request/response this is "near the beginning of the request", b/c CONN_MAX_AGE=0 (means: max between requests)
* in snappea it's also clearly defined, as part of `non_failing_function` which is run in a thread
* for the rest there is no multi-tenancy on the application level (we are single-tenant and pass in an env var)
the above means that the thing that determines the tenant (request, snappea task) implies get_connection. Which
means we just need to set the tenant at that moment. Which we do, via get_connection_params().
[2] I did a manual check on the superclasses (up to base/base.py) for `NAME` and `settings_dict`, and, there are 2
locations (only) worth overriding (from sqlite3/base.py): get_connection_params and is_in_memory_db.
another road-not-taken: making a _backend_ per tenant, rather than switching per-connection and relying on [1]
above. such an approach won't work because of the connection-specific state which would still be on the present
object (the "switching" object).
"""
def __init__(self, settings_dict, alias=DEFAULT_DB_ALIAS):
self.tenants = settings_dict["TENANTS"]
super().__init__({k: v for k, v in settings_dict.items() if k != "TENANTS"}, alias)
def get_tenant(self):
subdomain = get_tenant_subdomain()
if subdomain is None:
raise Exception("Cannot determine subdomain outside of request/response loop")
if subdomain not in self.tenants:
# shouldn't happen 'in practice' (because there would be no certificate then)
raise Http404(f"No such site: {subdomain}.bugsink.com not found")
return self.tenants[subdomain]
def get_connection_params(self):
# We just mutate the settings_dict here (the alternative is waaay too much copy-pasting), mutating back after.
try:
self.settings_dict["NAME"] = self.get_tenant()
return super().get_connection_params()
finally:
del self.settings_dict["NAME"]
def is_in_memory_db(self):
return False # we _know_ we don't do multi-tenant in memory, so this is simpler than some lookup.

16
ee/tenants/middleware.py Normal file
View File

@@ -0,0 +1,16 @@
from .base import use_tenant_subdomain
class SelectDatabaseMiddleware:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
host = request.get_host()
request_subdomain = host.split('.')[0].split(':')[0]
with use_tenant_subdomain(request_subdomain):
response = self.get_response(request)
return response

41
ee/tenants/utils.py Normal file
View File

@@ -0,0 +1,41 @@
from contextlib import contextmanager
from .base import get_tenant_subdomain, use_tenant_subdomain
def add_tenant_subdomain_to_kwargs():
tenant_subdomain = get_tenant_subdomain()
assert tenant_subdomain is not None, "Must have tenant set to be able to pass this to snappea"
return {"TENANT_SUBDOMAIN": tenant_subdomain}
@contextmanager
def pop_tenant_subdomain_from_kwargs(args, kwargs):
if "TENANT_SUBDOMAIN" not in kwargs:
raise Exception("To run this task I need to have a tenant subdomain, can't find that")
tenant = kwargs.pop("TENANT_SUBDOMAIN")
with use_tenant_subdomain(tenant):
yield
class TenantBaseURL:
"""'lazy' evaluating drop-in for BASE_URL strings that fills in the TENANT on-demand; I've evaluated the current
uses of BASE_URL when writing this, forcing evaulation where needed (when not covered by __add__)."""
def __init__(self, format_domain):
self.format_domain = format_domain
def fmt(self):
return self.format_domain % get_tenant_subdomain()
def __add__(self, other):
return self.fmt() + other
def __str__(self):
return self.fmt()
def endswith(self, suffix):
# needed b/c BASE_URL is cleaned up w/ this helper
return self.fmt().endswith(suffix)

View File

@@ -92,7 +92,7 @@ class BaseIngestAPIView(View):
# The dsn we show is reconstructed _as we understand it at this point in the code_, which is precisely what
# you want to show as a first step towards debugging issues with SDKs with faulty authentication (a rather
# common scenario).
dsn = build_dsn(get_settings().BASE_URL, project_pk, sentry_key)
dsn = build_dsn(str(get_settings().BASE_URL), project_pk, sentry_key)
raise exceptions.PermissionDenied("Project not found or key incorrect: %s" % dsn)
@classmethod

View File

@@ -78,7 +78,7 @@ class Project(models.Model):
@property
def dsn(self):
return build_dsn(get_settings().BASE_URL, self.id, self.sentry_key.hex)
return build_dsn(str(get_settings().BASE_URL), self.id, self.sentry_key.hex)
def get_latest_release(self):
from releases.models import ordered_releases

View File

@@ -43,6 +43,7 @@ include = [
"bugsink*",
"compat*",
"events*",
"ee*",
"ingest*",
"issues*",
"performance*",

View File

@@ -6,6 +6,7 @@ from performance.context_managers import time_to_logger
from . import registry
from .models import Task, wakeup_server
from .settings import get_settings
from .utils import add_task_kwargs
performance_logger = logging.getLogger("bugsink.performance.snappea")
@@ -26,6 +27,7 @@ def shared_task(function):
# No need for a transaction: we just write something (not connected to any other object, and we will never
# touch it again). Counterpoint: if we'd have a transaction, we could distinguish between "wait for write
# lock" and "actually write".
kwargs.update(add_task_kwargs())
Task.objects.create(task_name=name, args=json.dumps(args), kwargs=json.dumps(kwargs))
# not necessary: `connections["snappea"].close()`; Django does this at the end of the request and the

View File

@@ -22,6 +22,8 @@ from . import registry
from .models import Task
from .datastructures import Workers
from .settings import get_settings
from .utils import run_task_context
logger = logging.getLogger("snappea.foreman")
performance_logger = logging.getLogger("bugsink.performance.snappea")
@@ -170,7 +172,9 @@ class Foreman:
def non_failing_function(*inner_args, **inner_kwargs):
t0 = time.time()
try:
function(*inner_args, **inner_kwargs)
with run_task_context(inner_args, inner_kwargs):
function(*inner_args, **inner_kwargs)
except Exception as e:
# Potential TODO: make this configurable / depend on our existing config in bugsink/settings.py
logger.warning("Snappea caught Exception: %s", str(e))

View File

@@ -32,6 +32,8 @@ DEFAULTS = {
"TASK_QS_LIMIT": 100,
"HOOK_ADD_TASK_KWARGS": "snappea.utils.dont_add_anything",
"HOOK_RUN_TASK_CONTEXT": "snappea.utils.no_context",
}

41
snappea/utils.py Normal file
View File

@@ -0,0 +1,41 @@
from contextlib import contextmanager
import importlib
from .settings import get_settings
def add_task_kwargs():
"""Hook for extending Task kwargs"""
if not hasattr(add_task_kwargs, "func"):
# the configured function is cached on add_task_kwargs itself
hook = get_settings().HOOK_ADD_TASK_KWARGS
module_name, function_name = hook.rsplit('.', 1)
module = importlib.import_module(module_name)
add_task_kwargs.func = getattr(module, function_name)
return add_task_kwargs.func()
def run_task_context(task_args, task_kwargs):
"""Hook for running a task in a context; the task's args and kwargs are passed for optional pre-processing"""
if not hasattr(add_task_kwargs, "func"):
# the configured function is cached on run_task_context itself
hook = get_settings().HOOK_RUN_TASK_CONTEXT
module_name, function_name = hook.rsplit('.', 1)
module = importlib.import_module(module_name)
run_task_context.func = getattr(module, function_name)
return run_task_context.func(task_args, task_kwargs)
def dont_add_anything():
# no-op impl of add_task_kwargs
return {}
@contextmanager
def no_context(task_args, task_kwargs):
# no-op impl of run_task_context
yield