Line data Source code
1 : use std::str::FromStr;
2 :
3 : use anyhow::Context;
4 : use clap::Parser;
5 : use pageserver_api::key::Key;
6 : use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
7 : use pageserver_api::shard::{ShardCount, ShardStripeSize};
8 :
9 : #[derive(Parser)]
10 : pub(super) struct DescribeKeyCommand {
11 : /// Key material in one of the forms: hex, span attributes captured from log, reltag blocknum
12 0 : input: Vec<String>,
13 :
14 : /// The number of shards to calculate what Keys placement would be.
15 : #[arg(long)]
16 : shard_count: Option<CustomShardCount>,
17 :
18 : /// The sharding stripe size.
19 : ///
20 : /// The default is hardcoded. It makes no sense to provide this without providing
21 : /// `--shard-count`.
22 : #[arg(long, requires = "shard_count")]
23 : stripe_size: Option<u32>,
24 : }
25 :
26 : /// Sharded shard count without unsharded count, which the actual ShardCount supports.
27 : #[derive(Clone, Copy)]
28 : pub(super) struct CustomShardCount(std::num::NonZeroU8);
29 :
30 : #[derive(Debug, thiserror::Error)]
31 : pub(super) enum InvalidShardCount {
32 : #[error(transparent)]
33 : ParsingFailed(#[from] std::num::ParseIntError),
34 : #[error("too few shards")]
35 : TooFewShards,
36 : }
37 :
38 : impl FromStr for CustomShardCount {
39 : type Err = InvalidShardCount;
40 :
41 0 : fn from_str(s: &str) -> Result<Self, Self::Err> {
42 0 : let inner: std::num::NonZeroU8 = s.parse()?;
43 0 : if inner.get() < 2 {
44 0 : Err(InvalidShardCount::TooFewShards)
45 : } else {
46 0 : Ok(CustomShardCount(inner))
47 : }
48 0 : }
49 : }
50 :
51 : impl From<CustomShardCount> for ShardCount {
52 0 : fn from(value: CustomShardCount) -> Self {
53 0 : ShardCount::new(value.0.get())
54 0 : }
55 : }
56 :
57 : impl DescribeKeyCommand {
58 0 : pub(super) fn execute(self) {
59 0 : let DescribeKeyCommand {
60 0 : input,
61 0 : shard_count,
62 0 : stripe_size,
63 0 : } = self;
64 0 :
65 0 : let material = KeyMaterial::try_from(input.as_slice()).unwrap();
66 0 : let kind = material.kind();
67 0 : let key = Key::from(material);
68 0 :
69 0 : println!("parsed from {kind}: {key}:");
70 0 : println!();
71 0 : println!("{key:?}");
72 :
73 : macro_rules! kind_query {
74 : ([$($name:ident),*$(,)?]) => {{[$(kind_query!($name)),*]}};
75 : ($name:ident) => {{
76 : let s: &'static str = stringify!($name);
77 : let s = s.strip_prefix("is_").unwrap_or(s);
78 : let s = s.strip_suffix("_key").unwrap_or(s);
79 :
80 : #[allow(clippy::needless_borrow)]
81 : (s, key.$name())
82 : }};
83 : }
84 :
85 : // the current characterization is a mess of these boolean queries and separate
86 : // "recognization". I think it accurately represents how strictly we model the Key
87 : // right now, but could of course be made less confusing.
88 :
89 0 : let queries = kind_query!([
90 0 : is_rel_block_key,
91 0 : is_rel_vm_block_key,
92 0 : is_rel_fsm_block_key,
93 0 : is_slru_block_key,
94 0 : is_inherited_key,
95 0 : is_rel_size_key,
96 0 : is_slru_segment_size_key,
97 0 : ]);
98 0 :
99 0 : let recognized_kind = "recognized kind";
100 0 : let metadata_key = "metadata key";
101 0 : let shard_placement = "shard placement";
102 0 :
103 0 : let longest = queries
104 0 : .iter()
105 0 : .map(|t| t.0)
106 0 : .chain([recognized_kind, metadata_key, shard_placement])
107 0 : .map(|s| s.len())
108 0 : .max()
109 0 : .unwrap();
110 0 :
111 0 : let colon = 1;
112 0 : let padding = 1;
113 :
114 0 : for (name, is) in queries {
115 0 : let width = longest - name.len() + colon + padding;
116 0 : println!("{}{:width$}{}", name, ":", is);
117 0 : }
118 :
119 0 : let width = longest - recognized_kind.len() + colon + padding;
120 0 : println!(
121 0 : "{}{:width$}{:?}",
122 0 : recognized_kind,
123 0 : ":",
124 0 : RecognizedKeyKind::new(key),
125 0 : );
126 :
127 0 : if let Some(shard_count) = shard_count {
128 0 : // seeing the sharding placement might be confusing, so leave it out unless shard
129 0 : // count was given.
130 0 :
131 0 : let stripe_size = stripe_size.map(ShardStripeSize).unwrap_or_default();
132 0 : println!(
133 0 : "# placement with shard_count: {} and stripe_size: {}:",
134 0 : shard_count.0, stripe_size.0
135 0 : );
136 0 : let width = longest - shard_placement.len() + colon + padding;
137 0 : println!(
138 0 : "{}{:width$}{:?}",
139 0 : shard_placement,
140 0 : ":",
141 0 : pageserver_api::shard::describe(&key, shard_count.into(), stripe_size)
142 0 : );
143 0 : }
144 0 : }
145 : }
146 :
147 : /// Hand-wavy "inputs we accept" for a key.
148 : #[derive(Debug)]
149 : pub(super) enum KeyMaterial {
150 : Hex(Key),
151 : String(SpanAttributesFromLogs),
152 : Split(RelTag, BlockNumber),
153 : }
154 :
155 : impl KeyMaterial {
156 0 : fn kind(&self) -> &'static str {
157 0 : match self {
158 0 : KeyMaterial::Hex(_) => "hex",
159 0 : KeyMaterial::String(_) | KeyMaterial::Split(_, _) => "split",
160 : }
161 0 : }
162 : }
163 :
164 : impl From<KeyMaterial> for Key {
165 6 : fn from(value: KeyMaterial) -> Self {
166 6 : match value {
167 0 : KeyMaterial::Hex(key) => key,
168 3 : KeyMaterial::String(SpanAttributesFromLogs(rt, blocknum))
169 3 : | KeyMaterial::Split(rt, blocknum) => {
170 6 : pageserver_api::key::rel_block_to_key(rt, blocknum)
171 : }
172 : }
173 6 : }
174 : }
175 :
176 : impl<S: AsRef<str>> TryFrom<&[S]> for KeyMaterial {
177 : type Error = anyhow::Error;
178 :
179 8 : fn try_from(value: &[S]) -> Result<Self, Self::Error> {
180 8 : match value {
181 8 : [] => anyhow::bail!(
182 0 : "need 1..N positional arguments describing the key, try hex or a log line"
183 0 : ),
184 5 : [one] => {
185 5 : let one = one.as_ref();
186 5 :
187 5 : let key = Key::from_hex(one).map(KeyMaterial::Hex);
188 5 :
189 5 : let attrs = SpanAttributesFromLogs::from_str(one).map(KeyMaterial::String);
190 5 :
191 5 : match (key, attrs) {
192 1 : (Ok(key), _) => Ok(key),
193 3 : (_, Ok(s)) => Ok(s),
194 1 : (Err(e1), Err(e2)) => anyhow::bail!(
195 1 : "failed to parse {one:?} as hex or span attributes:\n- {e1:#}\n- {e2:#}"
196 1 : ),
197 : }
198 : }
199 3 : more => {
200 : // assume going left to right one of these is a reltag and then we find a blocknum
201 : // this works, because we don't have plain numbers at least right after reltag in
202 : // logs. for some definition of "works".
203 :
204 3 : let Some((reltag_at, reltag)) = more
205 3 : .iter()
206 3 : .map(AsRef::as_ref)
207 3 : .enumerate()
208 4 : .find_map(|(i, s)| {
209 4 : s.split_once("rel=")
210 4 : .map(|(_garbage, actual)| actual)
211 4 : .unwrap_or(s)
212 4 : .parse::<RelTag>()
213 4 : .ok()
214 4 : .map(|rt| (i, rt))
215 4 : })
216 : else {
217 0 : anyhow::bail!("found no RelTag in arguments");
218 : };
219 :
220 3 : let Some(blocknum) = more
221 3 : .iter()
222 3 : .map(AsRef::as_ref)
223 3 : .skip(reltag_at)
224 6 : .find_map(|s| {
225 6 : s.split_once("blkno=")
226 6 : .map(|(_garbage, actual)| actual)
227 6 : .unwrap_or(s)
228 6 : .parse::<BlockNumber>()
229 6 : .ok()
230 6 : })
231 : else {
232 0 : anyhow::bail!("found no blocknum in arguments");
233 : };
234 :
235 3 : Ok(KeyMaterial::Split(reltag, blocknum))
236 : }
237 : }
238 8 : }
239 : }
240 :
241 : #[derive(Debug)]
242 : pub(super) struct SpanAttributesFromLogs(RelTag, BlockNumber);
243 :
244 : impl std::str::FromStr for SpanAttributesFromLogs {
245 : type Err = anyhow::Error;
246 :
247 5 : fn from_str(s: &str) -> Result<Self, Self::Err> {
248 : // accept the span separator but do not require or fail if either is missing
249 : // "whatever{rel=1663/16389/24615 blkno=1052204 req_lsn=FFFFFFFF/FFFFFFFF}"
250 5 : let (_, reltag) = s
251 5 : .split_once("rel=")
252 5 : .ok_or_else(|| anyhow::anyhow!("cannot find 'rel='"))?;
253 3 : let reltag = reltag.split_whitespace().next().unwrap();
254 :
255 3 : let (_, blocknum) = s
256 3 : .split_once("blkno=")
257 3 : .ok_or_else(|| anyhow::anyhow!("cannot find 'blkno='"))?;
258 3 : let blocknum = blocknum.split_whitespace().next().unwrap();
259 :
260 3 : let reltag = reltag
261 3 : .parse()
262 3 : .with_context(|| format!("parse reltag from {reltag:?}"))?;
263 3 : let blocknum = blocknum
264 3 : .parse()
265 3 : .with_context(|| format!("parse blocknum from {blocknum:?}"))?;
266 :
267 3 : Ok(Self(reltag, blocknum))
268 5 : }
269 : }
270 :
271 : #[derive(Debug)]
272 : #[allow(dead_code)] // debug print is used
273 : enum RecognizedKeyKind {
274 : DbDir,
275 : ControlFile,
276 : Checkpoint,
277 : AuxFilesV1,
278 : SlruDir(Result<SlruKind, u32>),
279 : RelMap(RelTagish<2>),
280 : RelDir(RelTagish<2>),
281 : AuxFileV2(Result<AuxFileV2, utils::Hex<[u8; 16]>>),
282 : }
283 :
284 : #[derive(Debug, PartialEq)]
285 : #[allow(unused)]
286 : enum AuxFileV2 {
287 : Recognized(&'static str, utils::Hex<[u8; 13]>),
288 : OtherWithPrefix(&'static str, utils::Hex<[u8; 13]>),
289 : Other(utils::Hex<[u8; 13]>),
290 : }
291 :
292 : impl RecognizedKeyKind {
293 0 : fn new(key: Key) -> Option<Self> {
294 : use RecognizedKeyKind::{
295 : AuxFilesV1, Checkpoint, ControlFile, DbDir, RelDir, RelMap, SlruDir,
296 : };
297 :
298 0 : let slru_dir_kind = pageserver_api::key::slru_dir_kind(&key);
299 :
300 0 : Some(match key {
301 0 : pageserver_api::key::DBDIR_KEY => DbDir,
302 0 : pageserver_api::key::CONTROLFILE_KEY => ControlFile,
303 0 : pageserver_api::key::CHECKPOINT_KEY => Checkpoint,
304 0 : pageserver_api::key::AUX_FILES_KEY => AuxFilesV1,
305 0 : _ if slru_dir_kind.is_some() => SlruDir(slru_dir_kind.unwrap()),
306 0 : _ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 0 => {
307 0 : RelMap([key.field2, key.field3].into())
308 : }
309 0 : _ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 1 => {
310 0 : RelDir([key.field2, key.field3].into())
311 : }
312 0 : _ if key.is_metadata_key() => RecognizedKeyKind::AuxFileV2(
313 0 : AuxFileV2::new(key).ok_or_else(|| utils::Hex(key.to_i128().to_be_bytes())),
314 0 : ),
315 0 : _ => return None,
316 : })
317 0 : }
318 : }
319 :
320 : impl AuxFileV2 {
321 7 : fn new(key: Key) -> Option<AuxFileV2> {
322 : const EMPTY_HASH: [u8; 13] = {
323 : let mut out = [0u8; 13];
324 : let hash = pageserver::aux_file::fnv_hash(b"").to_be_bytes();
325 : let mut i = 3;
326 : while i < 16 {
327 : out[i - 3] = hash[i];
328 : i += 1;
329 : }
330 : out
331 : };
332 :
333 7 : let bytes = key.to_i128().to_be_bytes();
334 7 : let hash = utils::Hex(<[u8; 13]>::try_from(&bytes[3..]).unwrap());
335 7 :
336 7 : assert_eq!(EMPTY_HASH.len(), hash.0.len());
337 :
338 : // TODO: we could probably find the preimages for the hashes
339 :
340 7 : Some(match (bytes[1], bytes[2]) {
341 1 : (1, 1) => AuxFileV2::Recognized("pg_logical/mappings/", hash),
342 1 : (1, 2) => AuxFileV2::Recognized("pg_logical/snapshots/", hash),
343 1 : (1, 3) if hash.0 == EMPTY_HASH => {
344 1 : AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
345 : }
346 1 : (2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
347 0 : (3, 1) => AuxFileV2::Recognized("pg_stat/pgstat.stat", hash),
348 1 : (1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
349 1 : (0xff, 0xff) => AuxFileV2::Other(hash),
350 1 : _ => return None,
351 : })
352 7 : }
353 : }
354 :
355 : /// Prefix of RelTag, currently only known use cases are the two item versions.
356 : ///
357 : /// Renders like a reltag with `/`, nothing else.
358 : struct RelTagish<const N: usize>([u32; N]);
359 :
360 : impl<const N: usize> From<[u32; N]> for RelTagish<N> {
361 0 : fn from(val: [u32; N]) -> Self {
362 0 : RelTagish(val)
363 0 : }
364 : }
365 :
366 : impl<const N: usize> std::fmt::Debug for RelTagish<N> {
367 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368 : use std::fmt::Write as _;
369 0 : let mut first = true;
370 0 : self.0.iter().try_for_each(|x| {
371 0 : if !first {
372 0 : f.write_char('/')?;
373 0 : }
374 0 : first = false;
375 0 : write!(f, "{}", x)
376 0 : })
377 0 : }
378 : }
379 :
380 : #[cfg(test)]
381 : mod tests {
382 : use pageserver::aux_file::encode_aux_file_key;
383 :
384 : use super::*;
385 :
386 : #[test]
387 1 : fn hex_is_key_material() {
388 1 : let m = KeyMaterial::try_from(&["000000067F0000400200DF927900FFFFFFFF"][..]).unwrap();
389 1 : assert!(matches!(m, KeyMaterial::Hex(_)), "{m:?}");
390 1 : }
391 :
392 : #[test]
393 1 : fn single_positional_spanalike_is_key_material() {
394 1 : // why is this needed? if you are checking many, then copypaste starts to appeal
395 1 : let strings = [
396 1 : (
397 1 : line!(),
398 1 : "2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0",
399 1 : ),
400 1 : (line!(), "rel=1663/208101/2620_fsm blkno=2"),
401 1 : (line!(), "rel=1663/208101/2620.1 blkno=2"),
402 1 : ];
403 1 :
404 1 : let mut first: Option<Key> = None;
405 :
406 4 : for (line, example) in strings {
407 3 : let m = KeyMaterial::try_from(&[example][..])
408 3 : .unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
409 3 : let key = Key::from(m);
410 3 : if let Some(first) = first {
411 2 : assert_eq!(first, key);
412 1 : } else {
413 1 : first = Some(key);
414 1 : }
415 : }
416 :
417 : // not supporting this is rather accidential, but I think the input parsing is lenient
418 : // enough already
419 1 : KeyMaterial::try_from(&["1663/208101/2620_fsm 2"][..]).unwrap_err();
420 1 : }
421 :
422 : #[test]
423 1 : fn multiple_spanlike_args() {
424 1 : let strings = [
425 1 : (
426 1 : line!(),
427 1 : &[
428 1 : "process_query{tenant_id=C",
429 1 : "timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm",
430 1 : "blkno=2",
431 1 : "req_lsn=0/238D98C8}",
432 1 : ][..],
433 1 : ),
434 1 : (line!(), &["rel=1663/208101/2620_fsm", "blkno=2"][..]),
435 1 : (line!(), &["1663/208101/2620_fsm", "2"][..]),
436 1 : ];
437 1 :
438 1 : let mut first: Option<Key> = None;
439 :
440 4 : for (line, example) in strings {
441 3 : let m = KeyMaterial::try_from(example)
442 3 : .unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
443 3 : let key = Key::from(m);
444 3 : if let Some(first) = first {
445 2 : assert_eq!(first, key);
446 1 : } else {
447 1 : first = Some(key);
448 1 : }
449 : }
450 1 : }
451 : #[test]
452 1 : fn recognized_auxfiles() {
453 : use AuxFileV2::*;
454 :
455 1 : let empty = [
456 1 : 0x2e, 0x07, 0xbb, 0x01, 0x42, 0x62, 0xb8, 0x21, 0x75, 0x62, 0x95, 0xc5, 0x8d,
457 1 : ];
458 1 : let foobar = [
459 1 : 0x62, 0x79, 0x3c, 0x64, 0xbf, 0x6f, 0x0d, 0x35, 0x97, 0xba, 0x44, 0x6f, 0x18,
460 1 : ];
461 1 :
462 1 : #[rustfmt::skip]
463 1 : let examples = [
464 1 : (line!(), "pg_logical/mappings/foobar", Recognized("pg_logical/mappings/", utils::Hex(foobar))),
465 1 : (line!(), "pg_logical/snapshots/foobar", Recognized("pg_logical/snapshots/", utils::Hex(foobar))),
466 1 : (line!(), "pg_logical/replorigin_checkpoint", Recognized("pg_logical/replorigin_checkpoint", utils::Hex(empty))),
467 1 : (line!(), "pg_logical/foobar", OtherWithPrefix("pg_logical/", utils::Hex(foobar))),
468 1 : (line!(), "pg_replslot/foobar", Recognized("pg_replslot/", utils::Hex(foobar))),
469 1 : (line!(), "foobar", Other(utils::Hex(foobar))),
470 1 : ];
471 :
472 7 : for (line, path, expected) in examples {
473 6 : let key = encode_aux_file_key(path);
474 6 : let recognized =
475 6 : AuxFileV2::new(key).unwrap_or_else(|| panic!("line {line} example failed"));
476 6 :
477 6 : assert_eq!(recognized, expected);
478 : }
479 :
480 1 : assert_eq!(
481 1 : AuxFileV2::new(Key::from_hex("600000102000000000000000000000000000").unwrap()),
482 : None,
483 0 : "example key has one too few 0 after 6 before 1"
484 : );
485 1 : }
486 : }
|