PoC: Minimal open-telemetry tracing and metrics integration behind "otel" feature flag.

This commit is contained in:
Sebastian Jeltsch
2025-10-03 16:15:19 +02:00
parent 33c14552bf
commit af5a3d3b7a
5 changed files with 380 additions and 70 deletions

274
Cargo.lock generated
View File

@@ -694,6 +694,25 @@ dependencies = [
"url",
]
[[package]]
name = "axum-tracing-opentelemetry"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8ca2639ea8e778162fa734ddd5d77cb2fdf29ec39adb80192693834b6639ea6"
dependencies = [
"axum 0.8.7",
"futures-core",
"futures-util",
"http",
"opentelemetry 0.31.0",
"opentelemetry-semantic-conventions 0.31.0",
"pin-project-lite",
"tower 0.5.2",
"tracing",
"tracing-opentelemetry",
"tracing-opentelemetry-instrumentation-sdk",
]
[[package]]
name = "az"
version = "1.2.1"
@@ -1224,9 +1243,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.51"
version = "4.5.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8"
dependencies = [
"clap_builder",
"clap_derive",
@@ -1234,9 +1253,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.51"
version = "4.5.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1"
dependencies = [
"anstream",
"anstyle",
@@ -2239,11 +2258,11 @@ dependencies = [
"hyper-util",
"log",
"once_cell",
"opentelemetry",
"opentelemetry-http",
"opentelemetry-otlp",
"opentelemetry-semantic-conventions",
"opentelemetry_sdk",
"opentelemetry 0.27.1",
"opentelemetry-http 0.27.0",
"opentelemetry-otlp 0.27.0",
"opentelemetry-semantic-conventions 0.27.0",
"opentelemetry_sdk 0.27.1",
"pin-project",
"serde",
"thiserror 2.0.17",
@@ -3747,6 +3766,23 @@ dependencies = [
"cfb",
]
[[package]]
name = "init-tracing-opentelemetry"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3405b8beafca487bde01d88e05e6646c4e5c6a3c9830e7b937290ea8f7f5e5d9"
dependencies = [
"opentelemetry 0.31.0",
"opentelemetry-otlp 0.31.0",
"opentelemetry-semantic-conventions 0.31.0",
"opentelemetry-stdout",
"opentelemetry_sdk 0.31.0",
"thiserror 2.0.17",
"tracing",
"tracing-opentelemetry",
"tracing-subscriber",
]
[[package]]
name = "inout"
version = "0.1.4"
@@ -4193,6 +4229,15 @@ dependencies = [
"libc",
]
[[package]]
name = "matchers"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.7.3"
@@ -4478,6 +4523,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "nu-ansi-term"
version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "num"
version = "0.4.3"
@@ -4761,6 +4815,19 @@ dependencies = [
"tracing",
]
[[package]]
name = "opentelemetry"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0"
dependencies = [
"futures-core",
"futures-sink",
"js-sys",
"pin-project-lite",
"thiserror 2.0.17",
]
[[package]]
name = "opentelemetry-http"
version = "0.27.0"
@@ -4770,7 +4837,20 @@ dependencies = [
"async-trait",
"bytes",
"http",
"opentelemetry",
"opentelemetry 0.27.1",
]
[[package]]
name = "opentelemetry-http"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d"
dependencies = [
"async-trait",
"bytes",
"http",
"opentelemetry 0.31.0",
"reqwest",
]
[[package]]
@@ -4782,18 +4862,36 @@ dependencies = [
"async-trait",
"futures-core",
"http",
"opentelemetry",
"opentelemetry-http",
"opentelemetry-proto",
"opentelemetry_sdk",
"opentelemetry 0.27.1",
"opentelemetry-http 0.27.0",
"opentelemetry-proto 0.27.0",
"opentelemetry_sdk 0.27.1",
"prost 0.13.5",
"serde_json",
"thiserror 1.0.69",
"tokio",
"tonic",
"tonic 0.12.3",
"tracing",
]
[[package]]
name = "opentelemetry-otlp"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf"
dependencies = [
"http",
"opentelemetry 0.31.0",
"opentelemetry-http 0.31.0",
"opentelemetry-proto 0.31.0",
"opentelemetry_sdk 0.31.0",
"prost 0.14.1",
"reqwest",
"thiserror 2.0.17",
"tokio",
"tonic 0.14.2",
]
[[package]]
name = "opentelemetry-proto"
version = "0.27.0"
@@ -4801,11 +4899,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
dependencies = [
"hex",
"opentelemetry",
"opentelemetry_sdk",
"opentelemetry 0.27.1",
"opentelemetry_sdk 0.27.1",
"prost 0.13.5",
"serde",
"tonic",
"tonic 0.12.3",
]
[[package]]
name = "opentelemetry-proto"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
dependencies = [
"opentelemetry 0.31.0",
"opentelemetry_sdk 0.31.0",
"prost 0.14.1",
"tonic 0.14.2",
"tonic-prost",
]
[[package]]
@@ -4814,6 +4925,23 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc1b6902ff63b32ef6c489e8048c5e253e2e4a803ea3ea7e783914536eb15c52"
[[package]]
name = "opentelemetry-semantic-conventions"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e62e29dfe041afb8ed2a6c9737ab57db4907285d999ef8ad3a59092a36bdc846"
[[package]]
name = "opentelemetry-stdout"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811"
dependencies = [
"chrono",
"opentelemetry 0.31.0",
"opentelemetry_sdk 0.31.0",
]
[[package]]
name = "opentelemetry_sdk"
version = "0.27.1"
@@ -4825,7 +4953,7 @@ dependencies = [
"futures-executor",
"futures-util",
"glob",
"opentelemetry",
"opentelemetry 0.27.1",
"percent-encoding",
"rand 0.8.5",
"serde_json",
@@ -4835,6 +4963,23 @@ dependencies = [
"tracing",
]
[[package]]
name = "opentelemetry_sdk"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd"
dependencies = [
"futures-channel",
"futures-executor",
"futures-util",
"opentelemetry 0.31.0",
"percent-encoding",
"rand 0.9.2",
"thiserror 2.0.17",
"tokio",
"tokio-stream",
]
[[package]]
name = "ordered-float"
version = "2.10.1"
@@ -7699,6 +7844,43 @@ dependencies = [
"tracing",
]
[[package]]
name = "tonic"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
dependencies = [
"async-trait",
"base64",
"bytes",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-timeout",
"hyper-util",
"percent-encoding",
"pin-project",
"sync_wrapper",
"tokio",
"tokio-stream",
"tower 0.5.2",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-prost"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
dependencies = [
"bytes",
"prost 0.14.1",
"tonic 0.14.2",
]
[[package]]
name = "tower"
version = "0.4.13"
@@ -7727,9 +7909,12 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap 2.12.0",
"pin-project-lite",
"slab",
"sync_wrapper",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
@@ -7823,6 +8008,49 @@ dependencies = [
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-opentelemetry"
version = "0.32.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6e5658463dd88089aba75c7791e1d3120633b1bfde22478b28f625a9bb1b8e"
dependencies = [
"js-sys",
"opentelemetry 0.31.0",
"opentelemetry_sdk 0.31.0",
"rustversion",
"smallvec",
"thiserror 2.0.17",
"tracing",
"tracing-core",
"tracing-log",
"tracing-subscriber",
"web-time",
]
[[package]]
name = "tracing-opentelemetry-instrumentation-sdk"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4aea2e5bee9df9d098d951baeccb84e614f12cbc74d01d99fb3ac99a55bf7bf7"
dependencies = [
"http",
"opentelemetry 0.31.0",
"opentelemetry-semantic-conventions 0.31.0",
"tracing",
"tracing-opentelemetry",
]
[[package]]
name = "tracing-serde"
version = "0.2.0"
@@ -7839,11 +8067,16 @@ version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex-automata",
"serde",
"serde_json",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-serde",
]
@@ -7860,6 +8093,7 @@ dependencies = [
"async-trait",
"axum 0.8.7",
"axum-test",
"axum-tracing-opentelemetry",
"base64",
"bytes",
"chrono",
@@ -7876,6 +8110,7 @@ dependencies = [
"hyper-util",
"indexmap 2.12.0",
"indoc",
"init-tracing-opentelemetry",
"itertools 0.14.0",
"jsonschema",
"jsonwebtoken",
@@ -7916,6 +8151,7 @@ dependencies = [
"tower-http",
"tower-service",
"tracing",
"tracing-opentelemetry-instrumentation-sdk",
"tracing-subscriber",
"trailbase-assets",
"trailbase-build",

View File

@@ -27,6 +27,7 @@ harness = false
default = ["wasm"]
v8 = ["dep:trailbase-js"]
wasm = ["dep:trailbase-wasm-runtime-host"]
otel = ["dep:axum-tracing-opentelemetry", "dep:init-tracing-opentelemetry"]
[dependencies]
aes-gcm-siv = "0.11.1"
@@ -36,6 +37,7 @@ async-channel = "2.3.1"
async-trait = "0.1.80"
axum = { workspace = true }
base64 = { workspace = true }
axum-tracing-opentelemetry = { version = "0.32.0", optional = true }
bytes = { version = "1.8.0", features = ["serde"] }
chrono = "^0.4.38"
client-ip = "0.1.1"
@@ -49,6 +51,7 @@ hyper = "1.6.0"
hyper-util = "0.1.7"
indexmap = "2.11.4"
indoc = "2.0.5"
init-tracing-opentelemetry = { version = "0.32.0", features = ["tracing_subscriber_ext", "metrics"], optional = true }
itertools = "0.14.0"
jsonschema = { version = "0.35.0", default-features = false }
jsonwebtoken = { version = "^10.2.0", default-features = false, features = ["use_pem", "rust_crypto"] }
@@ -85,6 +88,7 @@ tower-cookies = "0.11.0"
tower-http = { version = "^0.6.0", default-features = false, features = ["cors", "trace", "fs", "limit"] }
tower-service = { version = "0.3.3", default-features = false }
tracing = { workspace = true }
tracing-opentelemetry-instrumentation-sdk = "0.32.0"
tracing-subscriber = { workspace = true }
trailbase-assets = { workspace = true }
trailbase-build = { workspace = true }

View File

@@ -40,7 +40,7 @@ pub use init::{InitArgs, InitError, init_app_state};
/// A set of options to configure serving behaviors. Changing any of these options
/// requires a server restart, which makes them a natural fit for being exposed as command line
/// arguments.
#[derive(Debug, Default)]
#[derive(Clone, Debug, Default)]
pub struct ServerOptions {
/// Optional path to static assets that will be served at the HTTP root.
pub data_dir: DataDir,
@@ -84,7 +84,7 @@ pub struct ServerOptions {
/// TLS certificate path.
pub tls_cert: Option<CertificateDer<'static>>,
/// TLS key path.
pub tls_key: Option<PrivateKeyDer<'static>>,
pub tls_key: Option<Arc<PrivateKeyDer<'static>>>,
}
pub struct Server {
@@ -141,37 +141,7 @@ impl Server {
})
.await?;
// Initialize tracing subscribers/layers.
//
// A few notes in case initialization below panics. The `log` and `tracing` crates/systems are
// mostly independent. Both like to be initialized only once given their global nature. There
// is a `.try_init()`, which has not effect when already initialized.
//
// Here we specifically only initialize `tracing`, since we critically rely on the
// `SqliteLogLayer`. We leave `log` initialization to the program level.
//
// The current setup prevents users from initializing tracing themselves. This is only relevant
// for the frameworks-use-case. If we wanted to allow it, we could check that if already
// initialized, the "logging::SqliteLogLayer" is present.
//
// If the `tracing_subscriber` crate is built with the default feature `tracing-log`,
// initializing `tracing` will also initialize the `log` crate. So this approach will only
// work if built w/o `tracing-log`. Otherwise, initializing `log` before will lead to a panic
// here. We do *not* want to use a `.try_init()` here, otherwise may silently miss
// `SqliteLogLayer`.
//
// Response log events are emitted at the INFO level, see `logging.rs`
let filter_layer = filter::Targets::new()
.with_default(filter::LevelFilter::OFF)
.with_target(crate::logging::EVENT_TARGET, crate::logging::LEVEL);
tracing_subscriber::Registry::default()
.with(filter_layer)
.with(logging::SqliteLogLayer::new(
&state,
/* log-to-stdout= */ opts.log_responses,
))
.init();
Self::build_tracing(&state, opts.log_responses).init();
if new_data_dir {
on_first_init(state.clone())
@@ -209,6 +179,61 @@ impl Server {
})
}
fn build_tracing(
state: &AppState,
log_responses: bool,
) -> impl tracing_subscriber::layer::SubscriberExt {
// Initialize tracing subscribers/layers.
//
// A few notes in case initialization below panics. The `log` and `tracing` crates/systems are
// mostly independent. Both like to be initialized only once given their global nature. There
// is a `.try_init()`, which has not effect when already initialized.
//
// Here we specifically only initialize `tracing`, since we critically rely on the
// `SqliteLogLayer`. We leave `log` initialization to the program level.
//
// The current setup prevents users from initializing tracing themselves. This is only relevant
// for the frameworks-use-case. If we wanted to allow it, we could check that if already
// initialized, the "logging::SqliteLogLayer" is present.
//
// If the `tracing_subscriber` crate is built with the default feature `tracing-log`,
// initializing `tracing` will also initialize the `log` crate. So this approach will only
// work if built w/o `tracing-log`. Otherwise, initializing `log` before will lead to a panic
// here. We do *not* want to use a `.try_init()` here, otherwise may silently miss
// `SqliteLogLayer`.
//
// Response log events are emitted at the INFO level, see `logging.rs`
#[cfg(not(feature = "otel"))]
let subscriber = tracing_subscriber::Registry::default();
#[cfg(feature = "otel")]
let subscriber = {
let (subscriber, otel_guard) =
init_tracing_opentelemetry::tracing_subscriber_ext::regiter_otel_layers(
tracing_subscriber::Registry::default(),
)
.expect("startup");
// TODO: We have to keep this alive. Let's find something better than a singleton.
use std::sync::OnceLock;
static SINGLETON: OnceLock<init_tracing_opentelemetry::Guard> = OnceLock::new();
SINGLETON.get_or_init(move || init_tracing_opentelemetry::Guard::global(Some(otel_guard)));
subscriber
};
let filter_layer = filter::Targets::new()
.with_default(filter::LevelFilter::OFF)
.with_target(crate::logging::EVENT_TARGET, crate::logging::LEVEL);
return subscriber
.with(filter_layer)
.with(logging::SqliteLogLayer::new(
state,
/* log-to-stdout= */ log_responses,
));
}
pub async fn serve(self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Install a HUP/hangup signal handler to reload config.
#[cfg(unix)]
@@ -409,11 +434,16 @@ impl Server {
);
}
fn wrap_with_default_layers(
pub fn wrap_with_default_layers(
state: &AppState,
opts: &ServerOptions,
router: Router<AppState>,
) -> Router<()> {
#[cfg(feature = "otel")]
let router = router
.layer(axum_tracing_opentelemetry::middleware::OtelInResponseLayer)
.layer(axum_tracing_opentelemetry::middleware::OtelAxumLayer::default());
return router
.layer(CookieManagerLayer::new())
.layer(build_cors(opts))

View File

@@ -3,7 +3,6 @@ use axum::http::StatusCode;
use axum_test::TestServer;
use axum_test::multipart::MultipartForm;
use tower_cookies::Cookie;
use tracing_subscriber::prelude::*;
use trailbase_sqlite::params;
use trailbase::AppState;
@@ -34,13 +33,7 @@ fn integration_tests() {
async fn test_record_apis() {
let data_dir = temp_dir::TempDir::new().unwrap();
let Server {
state,
main_router,
admin_router,
tls,
} = Server::init(ServerOptions {
let options = ServerOptions {
data_dir: DataDir(data_dir.path().to_path_buf()),
address: "localhost:4041".to_string(),
admin_address: None,
@@ -49,9 +42,14 @@ async fn test_record_apis() {
cors_allowed_origins: vec![],
runtime_threads: None,
..Default::default()
})
.await
.unwrap();
};
let Server {
state,
main_router,
admin_router,
tls,
} = Server::init(options.clone()).await.unwrap();
assert!(admin_router.is_none());
assert!(tls.is_none());
@@ -93,13 +91,54 @@ async fn test_record_apis() {
add_user_to_room(conn, user_x, room).await.unwrap();
// Set up logging: declares **where** tracing is being logged to, e.g. stderr, file, sqlite.
tracing_subscriber::Registry::default()
.with(trailbase::logging::SqliteLogLayer::new(&state, false))
.set_default();
#[allow(unused_mut)]
let (_address, mut router) = main_router;
#[cfg(feature = "otel")]
{
#[tracing::instrument]
async fn trace_id() -> impl axum::response::IntoResponse {
// Publish OTEL metrics:
// https://docs.rs/tracing-opentelemetry/latest/tracing_opentelemetry/struct.MetricsLayer.html
//
// To publish a new metric, add a key-value pair to your tracing::Event that contains
// following prefixes:
//
// monotonic_counter. (non-negative numbers): Used when the counter should only ever
// increase counter.: Used when the counter can go up or down
// histogram.: Used to report arbitrary values that are likely to be statistically
// meaningful gauge.: Used to report instantaneous values that can go up or down
tracing::info!(monotonic_counter.index = 1);
return axum::Json(serde_json::json!({
"id": tracing_opentelemetry_instrumentation_sdk::find_current_trace_id().unwrap(),
}));
}
// NOTE: the wrapping is needed to have the OTEL layers touch the route.
router = router.merge(Server::wrap_with_default_layers(
&state,
&options,
axum::Router::new().route("/trace", axum::routing::get(trace_id)),
));
}
{
let server = TestServer::new(main_router.1).unwrap();
let server = TestServer::new(router).unwrap();
#[cfg(feature = "otel")]
{
let trace_response = server.get("/trace").await;
assert_eq!(trace_response.status_code(), StatusCode::OK);
#[derive(serde::Deserialize)]
struct TraceResponse {
id: String,
}
let TraceResponse { id } = trace_response.json();
assert_ne!(id, "");
}
{
// User X can post to a JSON message.

View File

@@ -1,4 +1,5 @@
use rcgen::{CertifiedKey, generate_simple_self_signed};
use std::sync::Arc;
use tokio_rustls::rustls::pki_types::{PrivateKeyDer, pem::PemObject};
use tracing::*;
use trailbase::{DataDir, Server, ServerOptions};
@@ -32,7 +33,7 @@ fn test_https_serving() {
dev: false,
cors_allowed_origins: vec![],
runtime_threads: None,
tls_key: Some(tls_key),
tls_key: Some(Arc::new(tls_key)),
tls_cert: Some(cert.der().clone()),
..Default::default()
})