fix(linux): resolve eBPF thread names to main process names

Use periodic procfs PID cache to resolve thread names (e.g. 'Socket Thread')
to main process names (e.g. 'firefox'). Falls back to eBPF name for
short-lived processes that have already exited.
This commit is contained in:
Marco Cadetg
2025-12-07 11:09:29 +01:00
parent c31909bff8
commit 810b865841
2 changed files with 62 additions and 19 deletions

View File

@@ -166,8 +166,19 @@ mod ebpf_enhanced {
is_tcp,
) {
Some(process_info) => {
// Try to resolve the correct main process name using the PID.
// eBPF captures thread names (e.g., "Socket Thread"), but we want
// the main process name (e.g., "firefox"). The procfs cache maps
// PIDs to main process names from /proc/<pid>/comm.
// For short-lived processes (like curl), the PID won't be in the
// cache (process already exited), so we fall back to the eBPF name.
let resolved_name = self
.procfs_lookup
.get_process_name_by_pid(process_info.pid)
.unwrap_or_else(|| process_info.comm.clone());
debug!(
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, Comm: {}, Age: {}ns",
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, eBPF comm: {}, Resolved: {}, Age: {}ns",
conn.local_addr.ip(),
conn.local_addr.port(),
conn.remote_addr.ip(),
@@ -175,9 +186,10 @@ mod ebpf_enhanced {
process_info.pid,
process_info.uid,
process_info.comm,
resolved_name,
process_info.timestamp
);
Some((process_info.pid, process_info.comm))
Some((process_info.pid, resolved_name))
}
None => {
debug!(

View File

@@ -7,11 +7,20 @@ use std::collections::HashMap;
use std::fs;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::sync::RwLock;
use std::time::{Duration, Instant};
use std::time::Instant;
/// Map of socket inode to (PID, process name)
type InodeProcessMap = HashMap<u64, (u32, String)>;
/// Map of PID to process name
type PidNameMap = HashMap<u32, String>;
/// Map of connection key to (PID, process name)
type ConnectionProcessMap = HashMap<ConnectionKey, (u32, String)>;
pub struct LinuxProcessLookup {
// Cache: ConnectionKey -> (pid, process_name)
cache: RwLock<ProcessCache>,
// Cache: PID -> process_name (for resolving eBPF thread names to main process names)
pid_names: RwLock<HashMap<u32, String>>,
}
struct ProcessCache {
@@ -21,20 +30,36 @@ struct ProcessCache {
impl LinuxProcessLookup {
pub fn new() -> Result<Self> {
// Populate the cache immediately so early connections have process names available.
// This ensures the PID→name cache is ready before packet capture starts.
let (process_map, pid_names) = Self::build_process_map()?;
Ok(Self {
cache: RwLock::new(ProcessCache {
lookup: HashMap::new(),
last_refresh: Instant::now() - Duration::from_secs(3600),
lookup: process_map,
last_refresh: Instant::now(),
}),
pid_names: RwLock::new(pid_names),
})
}
/// Build connection -> process mapping
fn build_process_map() -> Result<HashMap<ConnectionKey, (u32, String)>> {
/// Get process name by PID from the cached procfs scan.
/// Returns None if PID not found (process may have exited or not yet scanned).
pub fn get_process_name_by_pid(&self, pid: u32) -> Option<String> {
self.pid_names
.read()
.expect("pid_names lock poisoned")
.get(&pid)
.cloned()
}
/// Build connection -> process mapping and PID -> name mapping
fn build_process_map() -> Result<(ConnectionProcessMap, PidNameMap)>
{
let mut process_map = HashMap::new();
// First, build inode -> process mapping
let inode_to_process = Self::build_inode_map()?;
// First, build inode -> process mapping and PID -> name mapping
let (inode_to_process, pid_names) = Self::build_inode_map()?;
// Then, parse network files to map connections -> inodes -> processes
Self::parse_and_map(
@@ -62,12 +87,13 @@ impl LinuxProcessLookup {
&mut process_map,
)?;
Ok(process_map)
Ok((process_map, pid_names))
}
/// Build inode -> (pid, process_name) mapping
fn build_inode_map() -> Result<HashMap<u64, (u32, String)>> {
/// Build inode -> (pid, process_name) mapping and PID -> process_name mapping
fn build_inode_map() -> Result<(InodeProcessMap, PidNameMap)> {
let mut inode_map = HashMap::new();
let mut pid_names = HashMap::new();
for entry in fs::read_dir("/proc")? {
let entry = entry?;
@@ -87,7 +113,10 @@ impl LinuxProcessLookup {
.trim()
.to_string();
// Check file descriptors
// Store PID -> name mapping for all processes
pid_names.insert(pid, process_name.clone());
// Check file descriptors for socket inodes
let fd_dir = path.join("fd");
if let Ok(fd_entries) = fs::read_dir(&fd_dir) {
for fd_entry in fd_entries.flatten() {
@@ -102,15 +131,15 @@ impl LinuxProcessLookup {
}
}
Ok(inode_map)
Ok((inode_map, pid_names))
}
/// Parse /proc/net file and map connections to processes
fn parse_and_map(
path: &str,
protocol: Protocol,
inode_map: &HashMap<u64, (u32, String)>,
result: &mut HashMap<ConnectionKey, (u32, String)>,
inode_map: &InodeProcessMap,
result: &mut ConnectionProcessMap,
) -> Result<()> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
@@ -201,17 +230,19 @@ impl ProcessLookup for LinuxProcessLookup {
// The enrichment thread (app.rs:495-500) handles periodic refresh every 5 seconds.
// IMPORTANT: Do NOT refresh here as it caused high CPU usage when called for every
// connection without process info (flamegraph showed this was the main bottleneck).
let cache = self.cache.read().unwrap();
let cache = self.cache.read().expect("process cache lock poisoned");
cache.lookup.get(&key).cloned()
}
fn refresh(&self) -> Result<()> {
let process_map = Self::build_process_map()?;
let (process_map, pid_names) = Self::build_process_map()?;
let mut cache = self.cache.write().unwrap();
let mut cache = self.cache.write().expect("process cache lock poisoned");
cache.lookup = process_map;
cache.last_refresh = Instant::now();
*self.pid_names.write().expect("pid_names lock poisoned") = pid_names;
Ok(())
}