fix(linux): resolve eBPF thread names to main process names

Use periodic procfs PID cache to resolve thread names (e.g. 'Socket Thread') to main process names (e.g. 'firefox'). Falls back to eBPF name for short-lived processes that have already exited.
2026-01-18 12:30:33 -06:00 · 2025-12-07 11:09:29 +01:00
parent c31909bff8
commit 810b865841
2 changed files with 62 additions and 19 deletions
--- a/src/network/platform/linux/enhanced.rs
+++ b/src/network/platform/linux/enhanced.rs
@@ -166,8 +166,19 @@ mod ebpf_enhanced {
                is_tcp,
            ) {
                Some(process_info) => {
+                    // Try to resolve the correct main process name using the PID.
+                    // eBPF captures thread names (e.g., "Socket Thread"), but we want
+                    // the main process name (e.g., "firefox"). The procfs cache maps
+                    // PIDs to main process names from /proc/<pid>/comm.
+                    // For short-lived processes (like curl), the PID won't be in the
+                    // cache (process already exited), so we fall back to the eBPF name.
+                    let resolved_name = self
+                        .procfs_lookup
+                        .get_process_name_by_pid(process_info.pid)
+                        .unwrap_or_else(|| process_info.comm.clone());
+
                    debug!(
-                        "eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, Comm: {}, Age: {}ns",
+                        "eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, eBPF comm: {}, Resolved: {}, Age: {}ns",
                        conn.local_addr.ip(),
                        conn.local_addr.port(),
                        conn.remote_addr.ip(),
@@ -175,9 +186,10 @@ mod ebpf_enhanced {
                        process_info.pid,
                        process_info.uid,
                        process_info.comm,
+                        resolved_name,
                        process_info.timestamp
                    );
-                    Some((process_info.pid, process_info.comm))
+                    Some((process_info.pid, resolved_name))
                }
                None => {
                    debug!(
--- a/src/network/platform/linux/process.rs
+++ b/src/network/platform/linux/process.rs
@@ -7,11 +7,20 @@ use std::collections::HashMap;
 use std::fs;
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
 use std::sync::RwLock;
-use std::time::{Duration, Instant};
+use std::time::Instant;
+
+/// Map of socket inode to (PID, process name)
+type InodeProcessMap = HashMap<u64, (u32, String)>;
+/// Map of PID to process name
+type PidNameMap = HashMap<u32, String>;
+/// Map of connection key to (PID, process name)
+type ConnectionProcessMap = HashMap<ConnectionKey, (u32, String)>;

 pub struct LinuxProcessLookup {
    // Cache: ConnectionKey -> (pid, process_name)
    cache: RwLock<ProcessCache>,
+    // Cache: PID -> process_name (for resolving eBPF thread names to main process names)
+    pid_names: RwLock<HashMap<u32, String>>,
 }

 struct ProcessCache {
@@ -21,20 +30,36 @@ struct ProcessCache {

 impl LinuxProcessLookup {
    pub fn new() -> Result<Self> {
+        // Populate the cache immediately so early connections have process names available.
+        // This ensures the PID→name cache is ready before packet capture starts.
+        let (process_map, pid_names) = Self::build_process_map()?;
+
        Ok(Self {
            cache: RwLock::new(ProcessCache {
-                lookup: HashMap::new(),
-                last_refresh: Instant::now() - Duration::from_secs(3600),
+                lookup: process_map,
+                last_refresh: Instant::now(),
            }),
+            pid_names: RwLock::new(pid_names),
        })
    }

-    /// Build connection -> process mapping
-    fn build_process_map() -> Result<HashMap<ConnectionKey, (u32, String)>> {
+    /// Get process name by PID from the cached procfs scan.
+    /// Returns None if PID not found (process may have exited or not yet scanned).
+    pub fn get_process_name_by_pid(&self, pid: u32) -> Option<String> {
+        self.pid_names
+            .read()
+            .expect("pid_names lock poisoned")
+            .get(&pid)
+            .cloned()
+    }
+
+    /// Build connection -> process mapping and PID -> name mapping
+    fn build_process_map() -> Result<(ConnectionProcessMap, PidNameMap)>
+    {
        let mut process_map = HashMap::new();

-        // First, build inode -> process mapping
-        let inode_to_process = Self::build_inode_map()?;
+        // First, build inode -> process mapping and PID -> name mapping
+        let (inode_to_process, pid_names) = Self::build_inode_map()?;

        // Then, parse network files to map connections -> inodes -> processes
        Self::parse_and_map(
@@ -62,12 +87,13 @@ impl LinuxProcessLookup {
            &mut process_map,
        )?;

-        Ok(process_map)
+        Ok((process_map, pid_names))
    }

-    /// Build inode -> (pid, process_name) mapping
-    fn build_inode_map() -> Result<HashMap<u64, (u32, String)>> {
+    /// Build inode -> (pid, process_name) mapping and PID -> process_name mapping
+    fn build_inode_map() -> Result<(InodeProcessMap, PidNameMap)> {
        let mut inode_map = HashMap::new();
+        let mut pid_names = HashMap::new();

        for entry in fs::read_dir("/proc")? {
            let entry = entry?;
@@ -87,7 +113,10 @@ impl LinuxProcessLookup {
                    .trim()
                    .to_string();

-                // Check file descriptors
+                // Store PID -> name mapping for all processes
+                pid_names.insert(pid, process_name.clone());
+
+                // Check file descriptors for socket inodes
                let fd_dir = path.join("fd");
                if let Ok(fd_entries) = fs::read_dir(&fd_dir) {
                    for fd_entry in fd_entries.flatten() {
@@ -102,15 +131,15 @@ impl LinuxProcessLookup {
            }
        }

-        Ok(inode_map)
+        Ok((inode_map, pid_names))
    }

    /// Parse /proc/net file and map connections to processes
    fn parse_and_map(
        path: &str,
        protocol: Protocol,
-        inode_map: &HashMap<u64, (u32, String)>,
-        result: &mut HashMap<ConnectionKey, (u32, String)>,
+        inode_map: &InodeProcessMap,
+        result: &mut ConnectionProcessMap,
    ) -> Result<()> {
        let content = match fs::read_to_string(path) {
            Ok(c) => c,
@@ -201,17 +230,19 @@ impl ProcessLookup for LinuxProcessLookup {
        // The enrichment thread (app.rs:495-500) handles periodic refresh every 5 seconds.
        // IMPORTANT: Do NOT refresh here as it caused high CPU usage when called for every
        // connection without process info (flamegraph showed this was the main bottleneck).
-        let cache = self.cache.read().unwrap();
+        let cache = self.cache.read().expect("process cache lock poisoned");
        cache.lookup.get(&key).cloned()
    }

    fn refresh(&self) -> Result<()> {
-        let process_map = Self::build_process_map()?;
+        let (process_map, pid_names) = Self::build_process_map()?;

-        let mut cache = self.cache.write().unwrap();
+        let mut cache = self.cache.write().expect("process cache lock poisoned");
        cache.lookup = process_map;
        cache.last_refresh = Instant::now();

+        *self.pid_names.write().expect("pid_names lock poisoned") = pid_names;
+
        Ok(())
    }