mirror of
https://github.com/domcyrus/rustnet.git
synced 2026-02-04 21:28:51 -06:00
392 lines
13 KiB
Python
Executable File
392 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Enrich RustNet PCAP captures with process information from sidecar JSONL.
|
|
|
|
This script correlates packets in a PCAP file with process information
|
|
from the accompanying .connections.jsonl file created by RustNet.
|
|
|
|
NOTE: If you captured using PKTAP on macOS (e.g., `--interface pktap,en0`),
|
|
the process information is already embedded in the PCAP file itself.
|
|
You can view it directly in Wireshark without using this script.
|
|
This script is only needed for regular (non-PKTAP) captures.
|
|
|
|
Usage:
|
|
# Show packets with process info
|
|
python pcap_enrich.py capture.pcap
|
|
|
|
# Export to annotated PCAPNG (requires editcap from Wireshark)
|
|
python pcap_enrich.py capture.pcap --output annotated.pcapng
|
|
|
|
# Generate TSV report
|
|
python pcap_enrich.py capture.pcap --format tsv > report.tsv
|
|
|
|
Requirements:
|
|
pip install scapy
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from scapy.all import rdpcap, IP, TCP, UDP, ICMP
|
|
except ImportError:
|
|
print("Error: scapy is required. Install with: pip install scapy", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
def parse_systemtime(st) -> float | None:
|
|
"""Parse a SystemTime serialized as {secs_since_epoch, nanos_since_epoch}."""
|
|
if st is None:
|
|
return None
|
|
if isinstance(st, dict):
|
|
secs = st.get("secs_since_epoch", 0)
|
|
nanos = st.get("nanos_since_epoch", 0)
|
|
return secs + nanos / 1e9
|
|
# Fallback for other formats
|
|
return None
|
|
|
|
|
|
def load_connections(jsonl_path: Path) -> dict:
|
|
"""Load connection-to-process mappings from JSONL file.
|
|
|
|
Returns a dict mapping (proto, local, remote) -> list of connection info dicts.
|
|
Multiple connections can exist for the same tuple (port reuse over time).
|
|
"""
|
|
lookup = {}
|
|
|
|
if not jsonl_path.exists():
|
|
print(f"Warning: Sidecar file not found: {jsonl_path}", file=sys.stderr)
|
|
return lookup
|
|
|
|
with open(jsonl_path) as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
c = json.loads(line)
|
|
proto = c.get("protocol", "").upper()
|
|
local = c.get("local_addr", "")
|
|
remote = c.get("remote_addr", "")
|
|
|
|
if proto and local and remote:
|
|
info = {
|
|
"pid": c.get("pid"),
|
|
"process_name": c.get("process_name"),
|
|
"first_seen": parse_systemtime(c.get("first_seen")),
|
|
"last_seen": parse_systemtime(c.get("last_seen")),
|
|
"bytes_sent": c.get("bytes_sent", 0),
|
|
"bytes_received": c.get("bytes_received", 0),
|
|
}
|
|
|
|
# Store both directions, as a list to handle port reuse
|
|
for key in [(proto, local, remote), (proto, remote, local)]:
|
|
if key not in lookup:
|
|
lookup[key] = []
|
|
lookup[key].append(info)
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f"Warning: Invalid JSON at line {line_num}: {e}", file=sys.stderr)
|
|
|
|
return lookup
|
|
|
|
|
|
def find_matching_connection(lookup: dict, pkt_tuple: tuple, pkt_time: float, slack: float) -> dict | None:
|
|
"""Find the best matching connection for a packet based on tuple and timestamp.
|
|
|
|
Args:
|
|
lookup: Connection lookup dict
|
|
pkt_tuple: (proto, src, dst) tuple from packet
|
|
pkt_time: Packet timestamp (seconds since epoch)
|
|
slack: Allowed time slack in seconds
|
|
|
|
Returns:
|
|
Best matching connection info dict, or None if no match
|
|
"""
|
|
connections = lookup.get(pkt_tuple, [])
|
|
if not connections:
|
|
return None
|
|
|
|
best_match = None
|
|
best_score = float('inf')
|
|
|
|
for conn in connections:
|
|
first_seen = conn.get("first_seen")
|
|
last_seen = conn.get("last_seen")
|
|
|
|
# If no timestamps, fall back to simple match (first connection wins)
|
|
if first_seen is None or last_seen is None:
|
|
if best_match is None:
|
|
best_match = conn
|
|
continue
|
|
|
|
# Check if packet falls within connection time range (with slack)
|
|
if first_seen - slack <= pkt_time <= last_seen + slack:
|
|
# Score by how close the packet is to the connection's time range
|
|
# Prefer connections where the packet is well within the range
|
|
if pkt_time < first_seen:
|
|
score = first_seen - pkt_time
|
|
elif pkt_time > last_seen:
|
|
score = pkt_time - last_seen
|
|
else:
|
|
score = 0 # Perfect match (within range)
|
|
|
|
if score < best_score:
|
|
best_score = score
|
|
best_match = conn
|
|
|
|
return best_match
|
|
|
|
|
|
def get_packet_tuple(pkt) -> tuple:
|
|
"""Extract connection tuple from packet."""
|
|
if not pkt.haslayer(IP):
|
|
return None
|
|
|
|
ip = pkt[IP]
|
|
src_ip = ip.src
|
|
dst_ip = ip.dst
|
|
|
|
if pkt.haslayer(TCP):
|
|
tcp = pkt[TCP]
|
|
return ("TCP", f"{src_ip}:{tcp.sport}", f"{dst_ip}:{tcp.dport}")
|
|
elif pkt.haslayer(UDP):
|
|
udp = pkt[UDP]
|
|
return ("UDP", f"{src_ip}:{udp.sport}", f"{dst_ip}:{udp.dport}")
|
|
elif pkt.haslayer(ICMP):
|
|
return ("ICMP", src_ip, dst_ip)
|
|
|
|
return None
|
|
|
|
|
|
def enrich_packets(pcap_path: Path, lookup: dict, slack: float):
|
|
"""Yield enriched packet information."""
|
|
packets = rdpcap(str(pcap_path))
|
|
|
|
for frame_num, pkt in enumerate(packets, 1):
|
|
pkt_tuple = get_packet_tuple(pkt)
|
|
pkt_time = float(pkt.time)
|
|
|
|
if not pkt_tuple:
|
|
yield {
|
|
"frame": frame_num,
|
|
"time": pkt_time,
|
|
"proto": "OTHER",
|
|
"src": "",
|
|
"dst": "",
|
|
"pid": None,
|
|
"process": None,
|
|
}
|
|
continue
|
|
|
|
proto, src, dst = pkt_tuple
|
|
info = find_matching_connection(lookup, pkt_tuple, pkt_time, slack) or {}
|
|
|
|
yield {
|
|
"frame": frame_num,
|
|
"time": pkt_time,
|
|
"proto": proto,
|
|
"src": src,
|
|
"dst": dst,
|
|
"pid": info.get("pid"),
|
|
"process": info.get("process_name"),
|
|
"bytes_sent": info.get("bytes_sent"),
|
|
"bytes_received": info.get("bytes_received"),
|
|
}
|
|
|
|
|
|
def print_table(packets: list):
|
|
"""Print enriched packets as a formatted table."""
|
|
print(f"{'Frame':>6} {'Proto':<5} {'Source':<24} {'Destination':<24} {'PID':>7} {'Process':<20}")
|
|
print("-" * 95)
|
|
|
|
for p in packets:
|
|
pid_str = str(p["pid"]) if p["pid"] else "-"
|
|
proc_str = p["process"] or "-"
|
|
if len(proc_str) > 20:
|
|
proc_str = proc_str[:17] + "..."
|
|
print(f"{p['frame']:>6} {p['proto']:<5} {p['src']:<24} {p['dst']:<24} {pid_str:>7} {proc_str:<20}")
|
|
|
|
|
|
def print_tsv(packets: list):
|
|
"""Print enriched packets as TSV."""
|
|
print("frame\ttime\tproto\tsrc\tdst\tpid\tprocess")
|
|
for p in packets:
|
|
print(f"{p['frame']}\t{p['time']:.6f}\t{p['proto']}\t{p['src']}\t{p['dst']}\t{p['pid'] or ''}\t{p['process'] or ''}")
|
|
|
|
|
|
def print_json(packets: list):
|
|
"""Print enriched packets as JSON."""
|
|
print(json.dumps(packets, indent=2))
|
|
|
|
|
|
def create_pcapng(pcap_path: Path, packets: list, output_path: Path):
|
|
"""Create annotated PCAPNG using editcap."""
|
|
# Check if editcap is available
|
|
try:
|
|
subprocess.run(["editcap", "--version"], capture_output=True, check=True)
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
print("Error: editcap not found. Install Wireshark to get editcap.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# First convert to pcapng
|
|
with tempfile.NamedTemporaryFile(suffix=".pcapng", delete=False) as tmp:
|
|
tmp_path = Path(tmp.name)
|
|
|
|
subprocess.run(["editcap", "-F", "pcapng", str(pcap_path), str(tmp_path)], check=True)
|
|
|
|
# Build annotation commands
|
|
# editcap -a "frame:comment" format
|
|
annotations = []
|
|
for p in packets:
|
|
if p["pid"] or p["process"]:
|
|
comment_parts = []
|
|
if p["pid"]:
|
|
comment_parts.append(f"PID:{p['pid']}")
|
|
if p["process"]:
|
|
comment_parts.append(f"Process:{p['process']}")
|
|
comment = " ".join(comment_parts)
|
|
annotations.append(f"{p['frame']}:{comment}")
|
|
|
|
if not annotations:
|
|
print("No process information found to annotate.", file=sys.stderr)
|
|
# Just copy the pcapng as-is
|
|
tmp_path.rename(output_path)
|
|
return
|
|
|
|
# Apply annotations in batches (editcap has command line limits)
|
|
current_input = tmp_path
|
|
batch_size = 100
|
|
|
|
for i in range(0, len(annotations), batch_size):
|
|
batch = annotations[i:i + batch_size]
|
|
with tempfile.NamedTemporaryFile(suffix=".pcapng", delete=False) as tmp2:
|
|
tmp2_path = Path(tmp2.name)
|
|
|
|
cmd = ["editcap"]
|
|
for ann in batch:
|
|
cmd.extend(["-a", ann])
|
|
cmd.extend([str(current_input), str(tmp2_path)])
|
|
|
|
subprocess.run(cmd, check=True)
|
|
|
|
if current_input != tmp_path:
|
|
current_input.unlink()
|
|
current_input = tmp2_path
|
|
|
|
# Move final result to output
|
|
current_input.rename(output_path)
|
|
if tmp_path.exists():
|
|
tmp_path.unlink()
|
|
|
|
print(f"Created annotated PCAPNG: {output_path}")
|
|
print(f"Annotated {len(annotations)} packets with process information.")
|
|
|
|
|
|
def count_unique_connections(lookup: dict) -> int:
|
|
"""Count unique connections (accounting for bidirectional storage)."""
|
|
seen = set()
|
|
count = 0
|
|
for key, conns in lookup.items():
|
|
for conn in conns:
|
|
# Create a unique identifier for each connection
|
|
conn_id = (key, conn.get("first_seen"), conn.get("pid"))
|
|
if conn_id not in seen:
|
|
seen.add(conn_id)
|
|
count += 1
|
|
return count // 2 # Divide by 2 because we store both directions
|
|
|
|
|
|
def print_summary(packets: list, lookup: dict):
|
|
"""Print a summary of process information found."""
|
|
total = len(packets)
|
|
with_pid = sum(1 for p in packets if p["pid"])
|
|
|
|
# Group by process
|
|
by_process = {}
|
|
for p in packets:
|
|
proc = p["process"] or "<unknown>"
|
|
if proc not in by_process:
|
|
by_process[proc] = {"count": 0, "pid": p["pid"]}
|
|
by_process[proc]["count"] += 1
|
|
|
|
print(f"\nSummary:")
|
|
print(f" Total packets: {total}")
|
|
print(f" Packets with process info: {with_pid} ({100*with_pid/total:.1f}%)")
|
|
print(f" Unique connections in sidecar: {count_unique_connections(lookup)}")
|
|
print(f"\nPackets by process:")
|
|
for proc, info in sorted(by_process.items(), key=lambda x: -x[1]["count"]):
|
|
pid_str = f" (PID {info['pid']})" if info["pid"] else ""
|
|
print(f" {proc}{pid_str}: {info['count']} packets")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Enrich RustNet PCAP captures with process information.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
%(prog)s capture.pcap # Show packets with process info
|
|
%(prog)s capture.pcap --format tsv # Output as TSV
|
|
%(prog)s capture.pcap --format json # Output as JSON
|
|
%(prog)s capture.pcap -o annotated.pcapng # Create annotated PCAPNG
|
|
%(prog)s capture.pcap --summary # Show summary only
|
|
%(prog)s capture.pcap --slack 5 # Use 5 second slack for timestamp matching
|
|
"""
|
|
)
|
|
parser.add_argument("pcap", type=Path, help="Path to PCAP file")
|
|
parser.add_argument("-j", "--jsonl", type=Path,
|
|
help="Path to sidecar JSONL file (default: <pcap>.connections.jsonl)")
|
|
parser.add_argument("-o", "--output", type=Path,
|
|
help="Output annotated PCAPNG file")
|
|
parser.add_argument("-f", "--format", choices=["table", "tsv", "json"], default="table",
|
|
help="Output format (default: table)")
|
|
parser.add_argument("-s", "--summary", action="store_true",
|
|
help="Show summary only")
|
|
parser.add_argument("-l", "--limit", type=int, default=0,
|
|
help="Limit number of packets to process (0 = no limit)")
|
|
parser.add_argument("--slack", type=float, default=2.0,
|
|
help="Timestamp matching slack in seconds (default: 2.0)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.pcap.exists():
|
|
print(f"Error: PCAP file not found: {args.pcap}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Default sidecar path
|
|
jsonl_path = args.jsonl or Path(f"{args.pcap}.connections.jsonl")
|
|
|
|
# Load connection mappings
|
|
lookup = load_connections(jsonl_path)
|
|
if lookup:
|
|
print(f"Loaded {count_unique_connections(lookup)} connections from {jsonl_path}", file=sys.stderr)
|
|
|
|
# Process packets
|
|
packets = list(enrich_packets(args.pcap, lookup, args.slack))
|
|
if args.limit > 0:
|
|
packets = packets[:args.limit]
|
|
|
|
if args.summary:
|
|
print_summary(packets, lookup)
|
|
return
|
|
|
|
if args.output:
|
|
create_pcapng(args.pcap, packets, args.output)
|
|
print_summary(packets, lookup)
|
|
else:
|
|
if args.format == "table":
|
|
print_table(packets)
|
|
print_summary(packets, lookup)
|
|
elif args.format == "tsv":
|
|
print_tsv(packets)
|
|
elif args.format == "json":
|
|
print_json(packets)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |