fix(linux): resolve eBPF thread names to main process names (#87)

Use periodic procfs PID cache to resolve thread names (e.g. 'Socket Thread')
to main process names (e.g. 'firefox'). Falls back to eBPF name for
short-lived processes that have already exited.
This commit is contained in:
Marco Cadetg
2025-12-07 11:24:58 +01:00
committed by GitHub
parent c31909bff8
commit b12b7d38ab
2 changed files with 62 additions and 19 deletions

View File

@@ -166,8 +166,19 @@ mod ebpf_enhanced {
is_tcp,
) {
Some(process_info) => {
// Try to resolve the correct main process name using the PID.
// eBPF captures thread names (e.g., "Socket Thread"), but we want
// the main process name (e.g., "firefox"). The procfs cache maps
// PIDs to main process names from /proc/<pid>/comm.
// For short-lived processes (like curl), the PID won't be in the
// cache (process already exited), so we fall back to the eBPF name.
let resolved_name = self
.procfs_lookup
.get_process_name_by_pid(process_info.pid)
.unwrap_or_else(|| process_info.comm.clone());
debug!(
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, Comm: {}, Age: {}ns",
"eBPF lookup successful for {}:{} -> {}:{} - PID: {}, UID: {}, eBPF comm: {}, Resolved: {}, Age: {}ns",
conn.local_addr.ip(),
conn.local_addr.port(),
conn.remote_addr.ip(),
@@ -175,9 +186,10 @@ mod ebpf_enhanced {
process_info.pid,
process_info.uid,
process_info.comm,
resolved_name,
process_info.timestamp
);
Some((process_info.pid, process_info.comm))
Some((process_info.pid, resolved_name))
}
None => {
debug!(

View File

@@ -7,11 +7,20 @@ use std::collections::HashMap;
use std::fs;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::sync::RwLock;
use std::time::{Duration, Instant};
use std::time::Instant;
/// Map of socket inode to (PID, process name)
type InodeProcessMap = HashMap<u64, (u32, String)>;
/// Map of PID to process name
type PidNameMap = HashMap<u32, String>;
/// Map of connection key to (PID, process name)
type ConnectionProcessMap = HashMap<ConnectionKey, (u32, String)>;
pub struct LinuxProcessLookup {
// Cache: ConnectionKey -> (pid, process_name)
cache: RwLock<ProcessCache>,
// Cache: PID -> process_name (for resolving eBPF thread names to main process names)
pid_names: RwLock<HashMap<u32, String>>,
}
struct ProcessCache {
@@ -21,20 +30,36 @@ struct ProcessCache {
impl LinuxProcessLookup {
pub fn new() -> Result<Self> {
// Populate the cache immediately so early connections have process names available.
// This ensures the PID→name cache is ready before packet capture starts.
let (process_map, pid_names) = Self::build_process_map()?;
Ok(Self {
cache: RwLock::new(ProcessCache {
lookup: HashMap::new(),
last_refresh: Instant::now() - Duration::from_secs(3600),
lookup: process_map,
last_refresh: Instant::now(),
}),
pid_names: RwLock::new(pid_names),
})
}
/// Build connection -> process mapping
fn build_process_map() -> Result<HashMap<ConnectionKey, (u32, String)>> {
/// Get process name by PID from the cached procfs scan.
/// Returns None if PID not found (process may have exited or not yet scanned).
pub fn get_process_name_by_pid(&self, pid: u32) -> Option<String> {
self.pid_names
.read()
.expect("pid_names lock poisoned")
.get(&pid)
.cloned()
}
/// Build connection -> process mapping and PID -> name mapping
fn build_process_map() -> Result<(ConnectionProcessMap, PidNameMap)>
{
let mut process_map = HashMap::new();
// First, build inode -> process mapping
let inode_to_process = Self::build_inode_map()?;
// First, build inode -> process mapping and PID -> name mapping
let (inode_to_process, pid_names) = Self::build_inode_map()?;
// Then, parse network files to map connections -> inodes -> processes
Self::parse_and_map(
@@ -62,12 +87,13 @@ impl LinuxProcessLookup {
&mut process_map,
)?;
Ok(process_map)
Ok((process_map, pid_names))
}
/// Build inode -> (pid, process_name) mapping
fn build_inode_map() -> Result<HashMap<u64, (u32, String)>> {
/// Build inode -> (pid, process_name) mapping and PID -> process_name mapping
fn build_inode_map() -> Result<(InodeProcessMap, PidNameMap)> {
let mut inode_map = HashMap::new();
let mut pid_names = HashMap::new();
for entry in fs::read_dir("/proc")? {
let entry = entry?;
@@ -87,7 +113,10 @@ impl LinuxProcessLookup {
.trim()
.to_string();
// Check file descriptors
// Store PID -> name mapping for all processes
pid_names.insert(pid, process_name.clone());
// Check file descriptors for socket inodes
let fd_dir = path.join("fd");
if let Ok(fd_entries) = fs::read_dir(&fd_dir) {
for fd_entry in fd_entries.flatten() {
@@ -102,15 +131,15 @@ impl LinuxProcessLookup {
}
}
Ok(inode_map)
Ok((inode_map, pid_names))
}
/// Parse /proc/net file and map connections to processes
fn parse_and_map(
path: &str,
protocol: Protocol,
inode_map: &HashMap<u64, (u32, String)>,
result: &mut HashMap<ConnectionKey, (u32, String)>,
inode_map: &InodeProcessMap,
result: &mut ConnectionProcessMap,
) -> Result<()> {
let content = match fs::read_to_string(path) {
Ok(c) => c,
@@ -201,17 +230,19 @@ impl ProcessLookup for LinuxProcessLookup {
// The enrichment thread (app.rs:495-500) handles periodic refresh every 5 seconds.
// IMPORTANT: Do NOT refresh here as it caused high CPU usage when called for every
// connection without process info (flamegraph showed this was the main bottleneck).
let cache = self.cache.read().unwrap();
let cache = self.cache.read().expect("process cache lock poisoned");
cache.lookup.get(&key).cloned()
}
fn refresh(&self) -> Result<()> {
let process_map = Self::build_process_map()?;
let (process_map, pid_names) = Self::build_process_map()?;
let mut cache = self.cache.write().unwrap();
let mut cache = self.cache.write().expect("process cache lock poisoned");
cache.lookup = process_map;
cache.last_refresh = Instant::now();
*self.pid_names.write().expect("pid_names lock poisoned") = pid_names;
Ok(())
}