Line data Source code
1 : use anyhow::bail;
2 : use flate2::write::{GzDecoder, GzEncoder};
3 : use flate2::Compression;
4 : use itertools::Itertools as _;
5 : use once_cell::sync::Lazy;
6 : use pprof::protos::{Function, Line, Location, Message as _, Profile};
7 : use regex::Regex;
8 :
9 : use std::borrow::Cow;
10 : use std::collections::{HashMap, HashSet};
11 : use std::ffi::c_void;
12 : use std::io::Write as _;
13 :
14 : /// Decodes a gzip-compressed Protobuf-encoded pprof profile.
15 0 : pub fn decode(bytes: &[u8]) -> anyhow::Result<Profile> {
16 0 : let mut gz = GzDecoder::new(Vec::new());
17 0 : gz.write_all(bytes)?;
18 0 : Ok(Profile::parse_from_bytes(&gz.finish()?)?)
19 0 : }
20 :
21 : /// Encodes a pprof profile as gzip-compressed Protobuf.
22 0 : pub fn encode(profile: &Profile) -> anyhow::Result<Vec<u8>> {
23 0 : let mut gz = GzEncoder::new(Vec::new(), Compression::default());
24 0 : profile.write_to_writer(&mut gz)?;
25 0 : Ok(gz.finish()?)
26 0 : }
27 :
28 : /// Symbolizes a pprof profile using the current binary.
29 0 : pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {
30 0 : if !profile.function.is_empty() {
31 0 : return Ok(profile); // already symbolized
32 0 : }
33 0 :
34 0 : // Collect function names.
35 0 : let mut functions: HashMap<String, Function> = HashMap::new();
36 0 : let mut strings: HashMap<String, i64> = profile
37 0 : .string_table
38 0 : .into_iter()
39 0 : .enumerate()
40 0 : .map(|(i, s)| (s, i as i64))
41 0 : .collect();
42 0 :
43 0 : // Helper to look up or register a string.
44 0 : let mut string_id = |s: &str| -> i64 {
45 : // Don't use .entry() to avoid unnecessary allocations.
46 0 : if let Some(id) = strings.get(s) {
47 0 : return *id;
48 0 : }
49 0 : let id = strings.len() as i64;
50 0 : strings.insert(s.to_string(), id);
51 0 : id
52 0 : };
53 :
54 0 : for loc in &mut profile.location {
55 0 : if !loc.line.is_empty() {
56 0 : continue;
57 0 : }
58 0 :
59 0 : // Resolve the line and function for each location.
60 0 : backtrace::resolve(loc.address as *mut c_void, |symbol| {
61 0 : let Some(symname) = symbol.name() else {
62 0 : return;
63 : };
64 0 : let mut name = symname.to_string();
65 :
66 : // Strip the Rust monomorphization suffix from the symbol name.
67 : static SUFFIX_REGEX: Lazy<Regex> =
68 0 : Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex"));
69 0 : if let Some(m) = SUFFIX_REGEX.find(&name) {
70 0 : name.truncate(m.start());
71 0 : }
72 :
73 0 : let function_id = match functions.get(&name) {
74 0 : Some(function) => function.id,
75 : None => {
76 0 : let id = functions.len() as u64 + 1;
77 0 : let system_name = String::from_utf8_lossy(symname.as_bytes());
78 0 : let filename = symbol
79 0 : .filename()
80 0 : .map(|path| path.to_string_lossy())
81 0 : .unwrap_or(Cow::Borrowed(""));
82 0 : let function = Function {
83 0 : id,
84 0 : name: string_id(&name),
85 0 : system_name: string_id(&system_name),
86 0 : filename: string_id(&filename),
87 0 : ..Default::default()
88 0 : };
89 0 : functions.insert(name, function);
90 0 : id
91 : }
92 : };
93 0 : loc.line.push(Line {
94 0 : function_id,
95 0 : line: symbol.lineno().unwrap_or(0) as i64,
96 0 : ..Default::default()
97 0 : });
98 0 : });
99 0 : }
100 :
101 : // Store the resolved functions, and mark the mapping as resolved.
102 0 : profile.function = functions.into_values().sorted_by_key(|f| f.id).collect();
103 0 : profile.string_table = strings
104 0 : .into_iter()
105 0 : .sorted_by_key(|(_, i)| *i)
106 0 : .map(|(s, _)| s)
107 0 : .collect();
108 :
109 0 : for mapping in &mut profile.mapping {
110 0 : mapping.has_functions = true;
111 0 : mapping.has_filenames = true;
112 0 : }
113 :
114 0 : Ok(profile)
115 0 : }
116 :
117 : /// Strips locations (stack frames) matching the given mappings (substring) or function names
118 : /// (regex). The function bool specifies whether child frames should be stripped as well.
119 : ///
120 : /// The string definitions are left behind in the profile for simplicity, to avoid rewriting all
121 : /// string references.
122 0 : pub fn strip_locations(
123 0 : mut profile: Profile,
124 0 : mappings: &[&str],
125 0 : functions: &[(Regex, bool)],
126 0 : ) -> Profile {
127 0 : // Strip mappings.
128 0 : let mut strip_mappings: HashSet<u64> = HashSet::new();
129 0 :
130 0 : profile.mapping.retain(|mapping| {
131 0 : let Some(name) = profile.string_table.get(mapping.filename as usize) else {
132 0 : return true;
133 : };
134 0 : if mappings.iter().any(|substr| name.contains(substr)) {
135 0 : strip_mappings.insert(mapping.id);
136 0 : return false;
137 0 : }
138 0 : true
139 0 : });
140 0 :
141 0 : // Strip functions.
142 0 : let mut strip_functions: HashMap<u64, bool> = HashMap::new();
143 0 :
144 0 : profile.function.retain(|function| {
145 0 : let Some(name) = profile.string_table.get(function.name as usize) else {
146 0 : return true;
147 : };
148 0 : for (regex, strip_children) in functions {
149 0 : if regex.is_match(name) {
150 0 : strip_functions.insert(function.id, *strip_children);
151 0 : return false;
152 0 : }
153 : }
154 0 : true
155 0 : });
156 0 :
157 0 : // Strip locations. The bool specifies whether child frames should be stripped too.
158 0 : let mut strip_locations: HashMap<u64, bool> = HashMap::new();
159 0 :
160 0 : profile.location.retain(|location| {
161 0 : for line in &location.line {
162 0 : if let Some(strip_children) = strip_functions.get(&line.function_id) {
163 0 : strip_locations.insert(location.id, *strip_children);
164 0 : return false;
165 0 : }
166 : }
167 0 : if strip_mappings.contains(&location.mapping_id) {
168 0 : strip_locations.insert(location.id, false);
169 0 : return false;
170 0 : }
171 0 : true
172 0 : });
173 :
174 : // Strip sample locations.
175 0 : for sample in &mut profile.sample {
176 : // First, find the uppermost function with child removal and truncate the stack.
177 0 : if let Some(truncate) = sample
178 0 : .location_id
179 0 : .iter()
180 0 : .rposition(|id| strip_locations.get(id) == Some(&true))
181 0 : {
182 0 : sample.location_id.drain(..=truncate);
183 0 : }
184 : // Next, strip any individual frames without child removal.
185 0 : sample
186 0 : .location_id
187 0 : .retain(|id| !strip_locations.contains_key(id));
188 0 : }
189 :
190 0 : profile
191 0 : }
192 :
193 : /// Generates an SVG flamegraph from a symbolized pprof profile.
194 0 : pub fn flamegraph(
195 0 : profile: Profile,
196 0 : opts: &mut inferno::flamegraph::Options,
197 0 : ) -> anyhow::Result<Vec<u8>> {
198 0 : if profile.mapping.iter().any(|m| !m.has_functions) {
199 0 : bail!("profile not symbolized");
200 0 : }
201 0 :
202 0 : // Index locations, functions, and strings.
203 0 : let locations: HashMap<u64, Location> =
204 0 : profile.location.into_iter().map(|l| (l.id, l)).collect();
205 0 : let functions: HashMap<u64, Function> =
206 0 : profile.function.into_iter().map(|f| (f.id, f)).collect();
207 0 : let strings = profile.string_table;
208 0 :
209 0 : // Resolve stacks as function names, and sum sample values per stack. Also reverse the stack,
210 0 : // since inferno expects it bottom-up.
211 0 : let mut stacks: HashMap<Vec<&str>, i64> = HashMap::new();
212 0 : for sample in profile.sample {
213 0 : let mut stack = Vec::with_capacity(sample.location_id.len());
214 0 : for location in sample.location_id.into_iter().rev() {
215 0 : let Some(location) = locations.get(&location) else {
216 0 : bail!("missing location {location}");
217 : };
218 0 : for line in location.line.iter().rev() {
219 0 : let Some(function) = functions.get(&line.function_id) else {
220 0 : bail!("missing function {}", line.function_id);
221 : };
222 0 : let Some(name) = strings.get(function.name as usize) else {
223 0 : bail!("missing string {}", function.name);
224 : };
225 0 : stack.push(name.as_str());
226 : }
227 : }
228 0 : let Some(&value) = sample.value.first() else {
229 0 : bail!("missing value");
230 : };
231 0 : *stacks.entry(stack).or_default() += value;
232 : }
233 :
234 : // Construct stack lines for inferno.
235 0 : let lines = stacks
236 0 : .into_iter()
237 0 : .map(|(stack, value)| (stack.into_iter().join(";"), value))
238 0 : .map(|(stack, value)| format!("{stack} {value}"))
239 0 : .sorted()
240 0 : .collect_vec();
241 0 :
242 0 : // Construct the flamegraph.
243 0 : let mut bytes = Vec::new();
244 0 : let lines = lines.iter().map(|line| line.as_str());
245 0 : inferno::flamegraph::from_lines(opts, lines, &mut bytes)?;
246 0 : Ok(bytes)
247 0 : }
|