From b12b7d38abb77e0c062cd6d460cf3efe86e8cc06 Mon Sep 17 00:00:00 2001 From: Marco Cadetg Date: Sun, 7 Dec 2025 11:24:58 +0100 Subject: [PATCH] fix(linux): resolve eBPF thread names to main process names (#87) Use periodic procfs PID cache to resolve thread names (e.g. 'Socket Thread') to main process names (e.g. 'firefox'). Falls back to eBPF name for short-lived processes that have already exited. --- src/network/platform/linux/enhanced.rs | 16 ++++++- src/network/platform/linux/process.rs | 65 +++++++++++++++++++------- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/src/network/platform/linux/enhanced.rs b/src/network/platform/linux/enhanced.rs index 04adf5a..8fc5b97 100644 --- a/src/network/platform/linux/enhanced.rs +++ b/src/network/platform/linux/enhanced.rs @@ -166,8 +166,19 @@ mod ebpf_enhanced { is_tcp, ) { Some(process_info) => { + // Try to resolve the correct main process name using the PID. + // eBPF captures thread names (e.g., "Socket Thread"), but we want + // the main process name (e.g., "firefox"). The procfs cache maps + // PIDs to main process names from /proc//comm. + // For short-lived processes (like curl), the PID won't be in the + // cache (process already exited), so we fall back to the eBPF name. + let resolved_name = self + .procfs_lookup + .get_process_name_by_pid(process_info.pid) + .unwrap_or_else(|| process_info.comm.clone()); + debug!( - "eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, Comm: {}, Age: {}ns", + "eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, eBPF comm: {}, Resolved: {}, Age: {}ns", conn.local_addr.ip(), conn.local_addr.port(), conn.remote_addr.ip(), @@ -175,9 +186,10 @@ mod ebpf_enhanced { process_info.pid, process_info.uid, process_info.comm, + resolved_name, process_info.timestamp ); - Some((process_info.pid, process_info.comm)) + Some((process_info.pid, resolved_name)) } None => { debug!( diff --git a/src/network/platform/linux/process.rs b/src/network/platform/linux/process.rs index 17842b4..7d4245c 100644 --- a/src/network/platform/linux/process.rs +++ b/src/network/platform/linux/process.rs @@ -7,11 +7,20 @@ use std::collections::HashMap; use std::fs; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::sync::RwLock; -use std::time::{Duration, Instant}; +use std::time::Instant; + +/// Map of socket inode to (PID, process name) +type InodeProcessMap = HashMap; +/// Map of PID to process name +type PidNameMap = HashMap; +/// Map of connection key to (PID, process name) +type ConnectionProcessMap = HashMap; pub struct LinuxProcessLookup { // Cache: ConnectionKey -> (pid, process_name) cache: RwLock, + // Cache: PID -> process_name (for resolving eBPF thread names to main process names) + pid_names: RwLock>, } struct ProcessCache { @@ -21,20 +30,36 @@ struct ProcessCache { impl LinuxProcessLookup { pub fn new() -> Result { + // Populate the cache immediately so early connections have process names available. + // This ensures the PID→name cache is ready before packet capture starts. + let (process_map, pid_names) = Self::build_process_map()?; + Ok(Self { cache: RwLock::new(ProcessCache { - lookup: HashMap::new(), - last_refresh: Instant::now() - Duration::from_secs(3600), + lookup: process_map, + last_refresh: Instant::now(), }), + pid_names: RwLock::new(pid_names), }) } - /// Build connection -> process mapping - fn build_process_map() -> Result> { + /// Get process name by PID from the cached procfs scan. + /// Returns None if PID not found (process may have exited or not yet scanned). + pub fn get_process_name_by_pid(&self, pid: u32) -> Option { + self.pid_names + .read() + .expect("pid_names lock poisoned") + .get(&pid) + .cloned() + } + + /// Build connection -> process mapping and PID -> name mapping + fn build_process_map() -> Result<(ConnectionProcessMap, PidNameMap)> + { let mut process_map = HashMap::new(); - // First, build inode -> process mapping - let inode_to_process = Self::build_inode_map()?; + // First, build inode -> process mapping and PID -> name mapping + let (inode_to_process, pid_names) = Self::build_inode_map()?; // Then, parse network files to map connections -> inodes -> processes Self::parse_and_map( @@ -62,12 +87,13 @@ impl LinuxProcessLookup { &mut process_map, )?; - Ok(process_map) + Ok((process_map, pid_names)) } - /// Build inode -> (pid, process_name) mapping - fn build_inode_map() -> Result> { + /// Build inode -> (pid, process_name) mapping and PID -> process_name mapping + fn build_inode_map() -> Result<(InodeProcessMap, PidNameMap)> { let mut inode_map = HashMap::new(); + let mut pid_names = HashMap::new(); for entry in fs::read_dir("/proc")? { let entry = entry?; @@ -87,7 +113,10 @@ impl LinuxProcessLookup { .trim() .to_string(); - // Check file descriptors + // Store PID -> name mapping for all processes + pid_names.insert(pid, process_name.clone()); + + // Check file descriptors for socket inodes let fd_dir = path.join("fd"); if let Ok(fd_entries) = fs::read_dir(&fd_dir) { for fd_entry in fd_entries.flatten() { @@ -102,15 +131,15 @@ impl LinuxProcessLookup { } } - Ok(inode_map) + Ok((inode_map, pid_names)) } /// Parse /proc/net file and map connections to processes fn parse_and_map( path: &str, protocol: Protocol, - inode_map: &HashMap, - result: &mut HashMap, + inode_map: &InodeProcessMap, + result: &mut ConnectionProcessMap, ) -> Result<()> { let content = match fs::read_to_string(path) { Ok(c) => c, @@ -201,17 +230,19 @@ impl ProcessLookup for LinuxProcessLookup { // The enrichment thread (app.rs:495-500) handles periodic refresh every 5 seconds. // IMPORTANT: Do NOT refresh here as it caused high CPU usage when called for every // connection without process info (flamegraph showed this was the main bottleneck). - let cache = self.cache.read().unwrap(); + let cache = self.cache.read().expect("process cache lock poisoned"); cache.lookup.get(&key).cloned() } fn refresh(&self) -> Result<()> { - let process_map = Self::build_process_map()?; + let (process_map, pid_names) = Self::build_process_map()?; - let mut cache = self.cache.write().unwrap(); + let mut cache = self.cache.write().expect("process cache lock poisoned"); cache.lookup = process_map; cache.last_refresh = Instant::now(); + *self.pid_names.write().expect("pid_names lock poisoned") = pid_names; + Ok(()) }