Line data Source code
1 : //! See `pageserver_api::shard` for description on sharding.
2 :
3 : use std::ops::RangeInclusive;
4 : use std::str::FromStr;
5 :
6 : use hex::FromHex;
7 : use serde::{Deserialize, Serialize};
8 :
9 : use crate::id::TenantId;
10 :
11 0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
12 : pub struct ShardNumber(pub u8);
13 :
14 0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
15 : pub struct ShardCount(pub u8);
16 :
17 : /// Combination of ShardNumber and ShardCount.
18 : ///
19 : /// For use within the context of a particular tenant, when we need to know which shard we're
20 : /// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
21 : /// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
22 : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
23 : pub struct ShardIndex {
24 : pub shard_number: ShardNumber,
25 : pub shard_count: ShardCount,
26 : }
27 :
28 : /// Formatting helper, for generating the `shard_id` label in traces.
29 : pub struct ShardSlug<'a>(&'a TenantShardId);
30 :
31 : /// TenantShardId globally identifies a particular shard in a particular tenant.
32 : ///
33 : /// These are written as `<TenantId>-<ShardSlug>`, for example:
34 : /// # The second shard in a two-shard tenant
35 : /// 072f1291a5310026820b2fe4b2968934-0102
36 : ///
37 : /// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
38 : /// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
39 : /// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
40 : ///
41 : /// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
42 : /// is both forward and backward compatible with TenantId: a legacy TenantId can be
43 : /// decoded as a TenantShardId, and when re-encoded it will be parseable
44 : /// as a TenantId.
45 : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
46 : pub struct TenantShardId {
47 : pub tenant_id: TenantId,
48 : pub shard_number: ShardNumber,
49 : pub shard_count: ShardCount,
50 : }
51 :
52 : impl ShardCount {
53 : pub const MAX: Self = Self(u8::MAX);
54 : pub const MIN: Self = Self(0);
55 :
56 : /// The internal value of a ShardCount may be zero, which means "1 shard, but use
57 : /// legacy format for TenantShardId that excludes the shard suffix", also known
58 : /// as [`TenantShardId::unsharded`].
59 : ///
60 : /// This method returns the actual number of shards, i.e. if our internal value is
61 : /// zero, we return 1 (unsharded tenants have 1 shard).
62 2407220 : pub fn count(&self) -> u8 {
63 2407220 : if self.0 > 0 { self.0 } else { 1 }
64 2407220 : }
65 :
66 : /// The literal internal value: this is **not** the number of shards in the
67 : /// tenant, as we have a special zero value for legacy unsharded tenants. Use
68 : /// [`Self::count`] if you want to know the cardinality of shards.
69 2 : pub fn literal(&self) -> u8 {
70 2 : self.0
71 2 : }
72 :
73 : /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
74 : /// uses the legacy format for `TenantShardId`. See also the documentation for
75 : /// [`Self::count`].
76 0 : pub fn is_unsharded(&self) -> bool {
77 0 : self.0 == 0
78 0 : }
79 :
80 : /// `v` may be zero, or the number of shards in the tenant. `v` is what
81 : /// [`Self::literal`] would return.
82 10496 : pub const fn new(val: u8) -> Self {
83 10496 : Self(val)
84 10496 : }
85 : }
86 :
87 : impl ShardNumber {
88 : pub const MAX: Self = Self(u8::MAX);
89 : }
90 :
91 : impl TenantShardId {
92 46 : pub fn unsharded(tenant_id: TenantId) -> Self {
93 46 : Self {
94 46 : tenant_id,
95 46 : shard_number: ShardNumber(0),
96 46 : shard_count: ShardCount(0),
97 46 : }
98 46 : }
99 :
100 : /// The range of all TenantShardId that belong to a particular TenantId. This is useful when
101 : /// you have a BTreeMap of TenantShardId, and are querying by TenantId.
102 0 : pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {
103 0 : RangeInclusive::new(
104 0 : Self {
105 0 : tenant_id,
106 0 : shard_number: ShardNumber(0),
107 0 : shard_count: ShardCount(0),
108 0 : },
109 0 : Self {
110 0 : tenant_id,
111 0 : shard_number: ShardNumber::MAX,
112 0 : shard_count: ShardCount::MAX,
113 0 : },
114 : )
115 0 : }
116 :
117 0 : pub fn range(&self) -> RangeInclusive<Self> {
118 0 : RangeInclusive::new(*self, *self)
119 0 : }
120 :
121 19006 : pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
122 19006 : ShardSlug(self)
123 19006 : }
124 :
125 : /// Convenience for code that has special behavior on the 0th shard.
126 310 : pub fn is_shard_zero(&self) -> bool {
127 310 : self.shard_number == ShardNumber(0)
128 310 : }
129 :
130 : /// The "unsharded" value is distinct from simply having a single shard: it represents
131 : /// a tenant which is not shard-aware at all, and whose storage paths will not include
132 : /// a shard suffix.
133 0 : pub fn is_unsharded(&self) -> bool {
134 0 : self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
135 0 : }
136 :
137 : /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
138 : /// is useful when logging from code that is already in a span that includes tenant ID, to
139 : /// keep messages reasonably terse.
140 0 : pub fn to_index(&self) -> ShardIndex {
141 0 : ShardIndex {
142 0 : shard_number: self.shard_number,
143 0 : shard_count: self.shard_count,
144 0 : }
145 0 : }
146 :
147 : /// Calculate the children of this TenantShardId when splitting the overall tenant into
148 : /// the given number of shards.
149 5 : pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {
150 5 : let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);
151 5 : let mut child_shards = Vec::new();
152 24 : for shard_number in 0..ShardNumber(new_shard_count.0).0 {
153 : // Key mapping is based on a round robin mapping of key hash modulo shard count,
154 : // so our child shards are the ones which the same keys would map to.
155 24 : if shard_number % effective_old_shard_count == self.shard_number.0 {
156 20 : child_shards.push(TenantShardId {
157 20 : tenant_id: self.tenant_id,
158 20 : shard_number: ShardNumber(shard_number),
159 20 : shard_count: new_shard_count,
160 20 : })
161 4 : }
162 : }
163 :
164 5 : child_shards
165 5 : }
166 : }
167 :
168 : impl std::fmt::Display for ShardNumber {
169 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170 0 : self.0.fmt(f)
171 0 : }
172 : }
173 :
174 : impl std::fmt::Display for ShardCount {
175 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176 0 : self.0.fmt(f)
177 0 : }
178 : }
179 :
180 : impl std::fmt::Display for ShardSlug<'_> {
181 6103 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182 6103 : write!(
183 6103 : f,
184 6103 : "{:02x}{:02x}",
185 : self.0.shard_number.0, self.0.shard_count.0
186 : )
187 6103 : }
188 : }
189 :
190 : impl std::fmt::Display for TenantShardId {
191 11744 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192 11744 : if self.shard_count != ShardCount(0) {
193 118 : write!(f, "{}-{}", self.tenant_id, self.shard_slug())
194 : } else {
195 : // Legacy case (shard_count == 0) -- format as just the tenant id. Note that this
196 : // is distinct from the normal single shard case (shard count == 1).
197 11626 : self.tenant_id.fmt(f)
198 : }
199 11744 : }
200 : }
201 :
202 : impl std::fmt::Debug for TenantShardId {
203 5681 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204 : // Debug is the same as Display: the compact hex representation
205 5681 : write!(f, "{self}")
206 5681 : }
207 : }
208 :
209 : impl std::str::FromStr for TenantShardId {
210 : type Err = hex::FromHexError;
211 :
212 16 : fn from_str(s: &str) -> Result<Self, Self::Err> {
213 : // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count
214 16 : if s.len() == 32 {
215 : // Legacy case: no shard specified
216 : Ok(Self {
217 13 : tenant_id: TenantId::from_str(s)?,
218 13 : shard_number: ShardNumber(0),
219 13 : shard_count: ShardCount(0),
220 : })
221 3 : } else if s.len() == 37 {
222 3 : let bytes = s.as_bytes();
223 3 : let tenant_id = TenantId::from_hex(&bytes[0..32])?;
224 3 : let mut shard_parts: [u8; 2] = [0u8; 2];
225 3 : hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;
226 3 : Ok(Self {
227 3 : tenant_id,
228 3 : shard_number: ShardNumber(shard_parts[0]),
229 3 : shard_count: ShardCount(shard_parts[1]),
230 3 : })
231 : } else {
232 0 : Err(hex::FromHexError::InvalidStringLength)
233 : }
234 16 : }
235 : }
236 :
237 : impl From<[u8; 18]> for TenantShardId {
238 25 : fn from(b: [u8; 18]) -> Self {
239 25 : let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();
240 :
241 25 : Self {
242 25 : tenant_id: TenantId::from(tenant_id_bytes),
243 25 : shard_number: ShardNumber(b[16]),
244 25 : shard_count: ShardCount(b[17]),
245 25 : }
246 25 : }
247 : }
248 :
249 : impl ShardIndex {
250 7 : pub fn new(number: ShardNumber, count: ShardCount) -> Self {
251 7 : Self {
252 7 : shard_number: number,
253 7 : shard_count: count,
254 7 : }
255 7 : }
256 83 : pub fn unsharded() -> Self {
257 83 : Self {
258 83 : shard_number: ShardNumber(0),
259 83 : shard_count: ShardCount(0),
260 83 : }
261 83 : }
262 :
263 : /// The "unsharded" value is distinct from simply having a single shard: it represents
264 : /// a tenant which is not shard-aware at all, and whose storage paths will not include
265 : /// a shard suffix.
266 37678 : pub fn is_unsharded(&self) -> bool {
267 37678 : self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
268 37678 : }
269 :
270 : /// For use in constructing remote storage paths: concatenate this with a TenantId
271 : /// to get a fully qualified TenantShardId.
272 : ///
273 : /// Backward compat: this function returns an empty string if Self::is_unsharded, such
274 : /// that the legacy pre-sharding remote key format is preserved.
275 1076 : pub fn get_suffix(&self) -> String {
276 1076 : if self.is_unsharded() {
277 1070 : "".to_string()
278 : } else {
279 6 : format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
280 : }
281 1076 : }
282 : }
283 :
284 : impl std::fmt::Display for ShardIndex {
285 1171 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
286 1171 : write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
287 1171 : }
288 : }
289 :
290 : impl std::fmt::Debug for ShardIndex {
291 895 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292 : // Debug is the same as Display: the compact hex representation
293 895 : write!(f, "{self}")
294 895 : }
295 : }
296 :
297 : impl std::str::FromStr for ShardIndex {
298 : type Err = hex::FromHexError;
299 :
300 1565 : fn from_str(s: &str) -> Result<Self, Self::Err> {
301 : // Expect format: 1 byte shard number, 1 byte shard count
302 1565 : if s.len() == 4 {
303 1565 : let bytes = s.as_bytes();
304 1565 : let mut shard_parts: [u8; 2] = [0u8; 2];
305 1565 : hex::decode_to_slice(bytes, &mut shard_parts)?;
306 1565 : Ok(Self {
307 1565 : shard_number: ShardNumber(shard_parts[0]),
308 1565 : shard_count: ShardCount(shard_parts[1]),
309 1565 : })
310 : } else {
311 0 : Err(hex::FromHexError::InvalidStringLength)
312 : }
313 1565 : }
314 : }
315 :
316 : impl From<[u8; 2]> for ShardIndex {
317 1 : fn from(b: [u8; 2]) -> Self {
318 1 : Self {
319 1 : shard_number: ShardNumber(b[0]),
320 1 : shard_count: ShardCount(b[1]),
321 1 : }
322 1 : }
323 : }
324 :
325 : impl Serialize for TenantShardId {
326 35 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
327 35 : where
328 35 : S: serde::Serializer,
329 : {
330 35 : if serializer.is_human_readable() {
331 31 : serializer.collect_str(self)
332 : } else {
333 : // Note: while human encoding of [`TenantShardId`] is backward and forward
334 : // compatible, this binary encoding is not.
335 4 : let mut packed: [u8; 18] = [0; 18];
336 4 : packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
337 4 : packed[16] = self.shard_number.0;
338 4 : packed[17] = self.shard_count.0;
339 :
340 4 : packed.serialize(serializer)
341 : }
342 0 : }
343 : }
344 :
345 : impl<'de> Deserialize<'de> for TenantShardId {
346 6 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
347 6 : where
348 6 : D: serde::Deserializer<'de>,
349 : {
350 : struct IdVisitor {
351 : is_human_readable_deserializer: bool,
352 : }
353 :
354 : impl<'de> serde::de::Visitor<'de> for IdVisitor {
355 : type Value = TenantShardId;
356 :
357 0 : fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
358 0 : if self.is_human_readable_deserializer {
359 0 : formatter.write_str("value in form of hex string")
360 : } else {
361 0 : formatter.write_str("value in form of integer array([u8; 18])")
362 : }
363 0 : }
364 :
365 2 : fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
366 2 : where
367 2 : A: serde::de::SeqAccess<'de>,
368 : {
369 2 : let s = serde::de::value::SeqAccessDeserializer::new(seq);
370 2 : let id: [u8; 18] = Deserialize::deserialize(s)?;
371 2 : Ok(TenantShardId::from(id))
372 0 : }
373 :
374 4 : fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
375 4 : where
376 4 : E: serde::de::Error,
377 : {
378 4 : TenantShardId::from_str(v).map_err(E::custom)
379 0 : }
380 : }
381 :
382 6 : if deserializer.is_human_readable() {
383 4 : deserializer.deserialize_str(IdVisitor {
384 4 : is_human_readable_deserializer: true,
385 4 : })
386 : } else {
387 2 : deserializer.deserialize_tuple(
388 : 18,
389 2 : IdVisitor {
390 2 : is_human_readable_deserializer: false,
391 2 : },
392 : )
393 : }
394 0 : }
395 : }
396 :
397 : impl Serialize for ShardIndex {
398 16 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
399 16 : where
400 16 : S: serde::Serializer,
401 : {
402 16 : if serializer.is_human_readable() {
403 14 : serializer.collect_str(self)
404 : } else {
405 : // Binary encoding is not used in index_part.json, but is included in anticipation of
406 : // switching various structures (e.g. inter-process communication, remote metadata) to more
407 : // compact binary encodings in future.
408 2 : let mut packed: [u8; 2] = [0; 2];
409 2 : packed[0] = self.shard_number.0;
410 2 : packed[1] = self.shard_count.0;
411 2 : packed.serialize(serializer)
412 : }
413 0 : }
414 : }
415 :
416 : impl<'de> Deserialize<'de> for ShardIndex {
417 1565 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
418 1565 : where
419 1565 : D: serde::Deserializer<'de>,
420 : {
421 : struct IdVisitor {
422 : is_human_readable_deserializer: bool,
423 : }
424 :
425 : impl<'de> serde::de::Visitor<'de> for IdVisitor {
426 : type Value = ShardIndex;
427 :
428 0 : fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
429 0 : if self.is_human_readable_deserializer {
430 0 : formatter.write_str("value in form of hex string")
431 : } else {
432 0 : formatter.write_str("value in form of integer array([u8; 2])")
433 : }
434 0 : }
435 :
436 1 : fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
437 1 : where
438 1 : A: serde::de::SeqAccess<'de>,
439 : {
440 1 : let s = serde::de::value::SeqAccessDeserializer::new(seq);
441 1 : let id: [u8; 2] = Deserialize::deserialize(s)?;
442 1 : Ok(ShardIndex::from(id))
443 0 : }
444 :
445 1564 : fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
446 1564 : where
447 1564 : E: serde::de::Error,
448 : {
449 1564 : ShardIndex::from_str(v).map_err(E::custom)
450 0 : }
451 : }
452 :
453 1565 : if deserializer.is_human_readable() {
454 1564 : deserializer.deserialize_str(IdVisitor {
455 1564 : is_human_readable_deserializer: true,
456 1564 : })
457 : } else {
458 1 : deserializer.deserialize_tuple(
459 : 2,
460 1 : IdVisitor {
461 1 : is_human_readable_deserializer: false,
462 1 : },
463 : )
464 : }
465 0 : }
466 : }
|