Line data Source code
1 : //! See `pageserver_api::shard` for description on sharding.
2 :
3 : use std::{ops::RangeInclusive, str::FromStr};
4 :
5 : use hex::FromHex;
6 : use serde::{Deserialize, Serialize};
7 :
8 : use crate::id::TenantId;
9 :
10 0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
11 : pub struct ShardNumber(pub u8);
12 :
13 0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
14 : pub struct ShardCount(pub u8);
15 :
16 : /// Combination of ShardNumber and ShardCount.
17 : ///
18 : /// For use within the context of a particular tenant, when we need to know which shard we're
19 : /// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
20 : /// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
21 : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
22 : pub struct ShardIndex {
23 : pub shard_number: ShardNumber,
24 : pub shard_count: ShardCount,
25 : }
26 :
27 : /// Formatting helper, for generating the `shard_id` label in traces.
28 : pub struct ShardSlug<'a>(&'a TenantShardId);
29 :
30 : /// TenantShardId globally identifies a particular shard in a particular tenant.
31 : ///
32 : /// These are written as `<TenantId>-<ShardSlug>`, for example:
33 : /// # The second shard in a two-shard tenant
34 : /// 072f1291a5310026820b2fe4b2968934-0102
35 : ///
36 : /// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
37 : /// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
38 : /// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
39 : ///
40 : /// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
41 : /// is both forward and backward compatible with TenantId: a legacy TenantId can be
42 : /// decoded as a TenantShardId, and when re-encoded it will be parseable
43 : /// as a TenantId.
44 : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
45 : pub struct TenantShardId {
46 : pub tenant_id: TenantId,
47 : pub shard_number: ShardNumber,
48 : pub shard_count: ShardCount,
49 : }
50 :
51 : impl ShardCount {
52 : pub const MAX: Self = Self(u8::MAX);
53 : pub const MIN: Self = Self(0);
54 :
55 : /// The internal value of a ShardCount may be zero, which means "1 shard, but use
56 : /// legacy format for TenantShardId that excludes the shard suffix", also known
57 : /// as [`TenantShardId::unsharded`].
58 : ///
59 : /// This method returns the actual number of shards, i.e. if our internal value is
60 : /// zero, we return 1 (unsharded tenants have 1 shard).
61 14412665 : pub fn count(&self) -> u8 {
62 14412665 : if self.0 > 0 {
63 11 : self.0
64 : } else {
65 14412654 : 1
66 : }
67 14412665 : }
68 :
69 : /// The literal internal value: this is **not** the number of shards in the
70 : /// tenant, as we have a special zero value for legacy unsharded tenants. Use
71 : /// [`Self::count`] if you want to know the cardinality of shards.
72 2 : pub fn literal(&self) -> u8 {
73 2 : self.0
74 2 : }
75 :
76 : /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
77 : /// uses the legacy format for `TenantShardId`. See also the documentation for
78 : /// [`Self::count`].
79 0 : pub fn is_unsharded(&self) -> bool {
80 0 : self.0 == 0
81 0 : }
82 :
83 : /// `v` may be zero, or the number of shards in the tenant. `v` is what
84 : /// [`Self::literal`] would return.
85 5737 : pub const fn new(val: u8) -> Self {
86 5737 : Self(val)
87 5737 : }
88 : }
89 :
90 : impl ShardNumber {
91 : pub const MAX: Self = Self(u8::MAX);
92 : }
93 :
94 : impl TenantShardId {
95 95 : pub fn unsharded(tenant_id: TenantId) -> Self {
96 95 : Self {
97 95 : tenant_id,
98 95 : shard_number: ShardNumber(0),
99 95 : shard_count: ShardCount(0),
100 95 : }
101 95 : }
102 :
103 : /// The range of all TenantShardId that belong to a particular TenantId. This is useful when
104 : /// you have a BTreeMap of TenantShardId, and are querying by TenantId.
105 0 : pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {
106 0 : RangeInclusive::new(
107 0 : Self {
108 0 : tenant_id,
109 0 : shard_number: ShardNumber(0),
110 0 : shard_count: ShardCount(0),
111 0 : },
112 0 : Self {
113 0 : tenant_id,
114 0 : shard_number: ShardNumber::MAX,
115 0 : shard_count: ShardCount::MAX,
116 0 : },
117 0 : )
118 0 : }
119 :
120 34010 : pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
121 34010 : ShardSlug(self)
122 34010 : }
123 :
124 : /// Convenience for code that has special behavior on the 0th shard.
125 18 : pub fn is_shard_zero(&self) -> bool {
126 18 : self.shard_number == ShardNumber(0)
127 18 : }
128 :
129 : /// The "unsharded" value is distinct from simply having a single shard: it represents
130 : /// a tenant which is not shard-aware at all, and whose storage paths will not include
131 : /// a shard suffix.
132 0 : pub fn is_unsharded(&self) -> bool {
133 0 : self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
134 0 : }
135 :
136 : /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
137 : /// is useful when logging from code that is already in a span that includes tenant ID, to
138 : /// keep messages reasonably terse.
139 0 : pub fn to_index(&self) -> ShardIndex {
140 0 : ShardIndex {
141 0 : shard_number: self.shard_number,
142 0 : shard_count: self.shard_count,
143 0 : }
144 0 : }
145 :
146 : /// Calculate the children of this TenantShardId when splitting the overall tenant into
147 : /// the given number of shards.
148 4 : pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {
149 4 : let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);
150 4 : let mut child_shards = Vec::new();
151 16 : for shard_number in 0..ShardNumber(new_shard_count.0).0 {
152 : // Key mapping is based on a round robin mapping of key hash modulo shard count,
153 : // so our child shards are the ones which the same keys would map to.
154 16 : if shard_number % effective_old_shard_count == self.shard_number.0 {
155 12 : child_shards.push(TenantShardId {
156 12 : tenant_id: self.tenant_id,
157 12 : shard_number: ShardNumber(shard_number),
158 12 : shard_count: new_shard_count,
159 12 : })
160 4 : }
161 : }
162 :
163 4 : child_shards
164 4 : }
165 : }
166 :
167 : impl<'a> std::fmt::Display for ShardSlug<'a> {
168 34010 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
169 34010 : write!(
170 34010 : f,
171 34010 : "{:02x}{:02x}",
172 34010 : self.0.shard_number.0, self.0.shard_count.0
173 34010 : )
174 34010 : }
175 : }
176 :
177 : impl std::fmt::Display for TenantShardId {
178 37098 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179 37098 : if self.shard_count != ShardCount(0) {
180 355 : write!(f, "{}-{}", self.tenant_id, self.shard_slug())
181 : } else {
182 : // Legacy case (shard_count == 0) -- format as just the tenant id. Note that this
183 : // is distinct from the normal single shard case (shard count == 1).
184 36743 : self.tenant_id.fmt(f)
185 : }
186 37098 : }
187 : }
188 :
189 : impl std::fmt::Debug for TenantShardId {
190 9384 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191 9384 : // Debug is the same as Display: the compact hex representation
192 9384 : write!(f, "{}", self)
193 9384 : }
194 : }
195 :
196 : impl std::str::FromStr for TenantShardId {
197 : type Err = hex::FromHexError;
198 :
199 13089 : fn from_str(s: &str) -> Result<Self, Self::Err> {
200 13089 : // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count
201 13089 : if s.len() == 32 {
202 : // Legacy case: no shard specified
203 : Ok(Self {
204 12991 : tenant_id: TenantId::from_str(s)?,
205 12991 : shard_number: ShardNumber(0),
206 12991 : shard_count: ShardCount(0),
207 : })
208 98 : } else if s.len() == 37 {
209 98 : let bytes = s.as_bytes();
210 98 : let tenant_id = TenantId::from_hex(&bytes[0..32])?;
211 98 : let mut shard_parts: [u8; 2] = [0u8; 2];
212 98 : hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;
213 98 : Ok(Self {
214 98 : tenant_id,
215 98 : shard_number: ShardNumber(shard_parts[0]),
216 98 : shard_count: ShardCount(shard_parts[1]),
217 98 : })
218 : } else {
219 0 : Err(hex::FromHexError::InvalidStringLength)
220 : }
221 13089 : }
222 : }
223 :
224 : impl From<[u8; 18]> for TenantShardId {
225 2 : fn from(b: [u8; 18]) -> Self {
226 2 : let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();
227 2 :
228 2 : Self {
229 2 : tenant_id: TenantId::from(tenant_id_bytes),
230 2 : shard_number: ShardNumber(b[16]),
231 2 : shard_count: ShardCount(b[17]),
232 2 : }
233 2 : }
234 : }
235 :
236 : impl ShardIndex {
237 0 : pub fn new(number: ShardNumber, count: ShardCount) -> Self {
238 0 : Self {
239 0 : shard_number: number,
240 0 : shard_count: count,
241 0 : }
242 0 : }
243 306 : pub fn unsharded() -> Self {
244 306 : Self {
245 306 : shard_number: ShardNumber(0),
246 306 : shard_count: ShardCount(0),
247 306 : }
248 306 : }
249 :
250 : /// The "unsharded" value is distinct from simply having a single shard: it represents
251 : /// a tenant which is not shard-aware at all, and whose storage paths will not include
252 : /// a shard suffix.
253 216401 : pub fn is_unsharded(&self) -> bool {
254 216401 : self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
255 216401 : }
256 :
257 : /// For use in constructing remote storage paths: concatenate this with a TenantId
258 : /// to get a fully qualified TenantShardId.
259 : ///
260 : /// Backward compat: this function returns an empty string if Self::is_unsharded, such
261 : /// that the legacy pre-sharding remote key format is preserved.
262 4159 : pub fn get_suffix(&self) -> String {
263 4159 : if self.is_unsharded() {
264 4135 : "".to_string()
265 : } else {
266 24 : format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
267 : }
268 4159 : }
269 : }
270 :
271 : impl std::fmt::Display for ShardIndex {
272 5992 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 5992 : write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
274 5992 : }
275 : }
276 :
277 : impl std::fmt::Debug for ShardIndex {
278 4554 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
279 4554 : // Debug is the same as Display: the compact hex representation
280 4554 : write!(f, "{}", self)
281 4554 : }
282 : }
283 :
284 : impl std::str::FromStr for ShardIndex {
285 : type Err = hex::FromHexError;
286 :
287 9385 : fn from_str(s: &str) -> Result<Self, Self::Err> {
288 9385 : // Expect format: 1 byte shard number, 1 byte shard count
289 9385 : if s.len() == 4 {
290 9385 : let bytes = s.as_bytes();
291 9385 : let mut shard_parts: [u8; 2] = [0u8; 2];
292 9385 : hex::decode_to_slice(bytes, &mut shard_parts)?;
293 9385 : Ok(Self {
294 9385 : shard_number: ShardNumber(shard_parts[0]),
295 9385 : shard_count: ShardCount(shard_parts[1]),
296 9385 : })
297 : } else {
298 0 : Err(hex::FromHexError::InvalidStringLength)
299 : }
300 9385 : }
301 : }
302 :
303 : impl From<[u8; 2]> for ShardIndex {
304 1 : fn from(b: [u8; 2]) -> Self {
305 1 : Self {
306 1 : shard_number: ShardNumber(b[0]),
307 1 : shard_count: ShardCount(b[1]),
308 1 : }
309 1 : }
310 : }
311 :
312 : impl Serialize for TenantShardId {
313 126 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
314 126 : where
315 126 : S: serde::Serializer,
316 126 : {
317 126 : if serializer.is_human_readable() {
318 122 : serializer.collect_str(self)
319 : } else {
320 : // Note: while human encoding of [`TenantShardId`] is backward and forward
321 : // compatible, this binary encoding is not.
322 4 : let mut packed: [u8; 18] = [0; 18];
323 4 : packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
324 4 : packed[16] = self.shard_number.0;
325 4 : packed[17] = self.shard_count.0;
326 4 :
327 4 : packed.serialize(serializer)
328 : }
329 126 : }
330 : }
331 :
332 : impl<'de> Deserialize<'de> for TenantShardId {
333 21 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
334 21 : where
335 21 : D: serde::Deserializer<'de>,
336 21 : {
337 : struct IdVisitor {
338 : is_human_readable_deserializer: bool,
339 : }
340 :
341 : impl<'de> serde::de::Visitor<'de> for IdVisitor {
342 : type Value = TenantShardId;
343 :
344 0 : fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
345 0 : if self.is_human_readable_deserializer {
346 0 : formatter.write_str("value in form of hex string")
347 : } else {
348 0 : formatter.write_str("value in form of integer array([u8; 18])")
349 : }
350 0 : }
351 :
352 2 : fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
353 2 : where
354 2 : A: serde::de::SeqAccess<'de>,
355 2 : {
356 2 : let s = serde::de::value::SeqAccessDeserializer::new(seq);
357 2 : let id: [u8; 18] = Deserialize::deserialize(s)?;
358 2 : Ok(TenantShardId::from(id))
359 2 : }
360 :
361 19 : fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
362 19 : where
363 19 : E: serde::de::Error,
364 19 : {
365 19 : TenantShardId::from_str(v).map_err(E::custom)
366 19 : }
367 : }
368 :
369 21 : if deserializer.is_human_readable() {
370 19 : deserializer.deserialize_str(IdVisitor {
371 19 : is_human_readable_deserializer: true,
372 19 : })
373 : } else {
374 2 : deserializer.deserialize_tuple(
375 2 : 18,
376 2 : IdVisitor {
377 2 : is_human_readable_deserializer: false,
378 2 : },
379 2 : )
380 : }
381 21 : }
382 : }
383 :
384 : impl Serialize for ShardIndex {
385 50 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
386 50 : where
387 50 : S: serde::Serializer,
388 50 : {
389 50 : if serializer.is_human_readable() {
390 48 : serializer.collect_str(self)
391 : } else {
392 : // Binary encoding is not used in index_part.json, but is included in anticipation of
393 : // switching various structures (e.g. inter-process communication, remote metadata) to more
394 : // compact binary encodings in future.
395 2 : let mut packed: [u8; 2] = [0; 2];
396 2 : packed[0] = self.shard_number.0;
397 2 : packed[1] = self.shard_count.0;
398 2 : packed.serialize(serializer)
399 : }
400 50 : }
401 : }
402 :
403 : impl<'de> Deserialize<'de> for ShardIndex {
404 9385 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
405 9385 : where
406 9385 : D: serde::Deserializer<'de>,
407 9385 : {
408 : struct IdVisitor {
409 : is_human_readable_deserializer: bool,
410 : }
411 :
412 : impl<'de> serde::de::Visitor<'de> for IdVisitor {
413 : type Value = ShardIndex;
414 :
415 0 : fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
416 0 : if self.is_human_readable_deserializer {
417 0 : formatter.write_str("value in form of hex string")
418 : } else {
419 0 : formatter.write_str("value in form of integer array([u8; 2])")
420 : }
421 0 : }
422 :
423 1 : fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
424 1 : where
425 1 : A: serde::de::SeqAccess<'de>,
426 1 : {
427 1 : let s = serde::de::value::SeqAccessDeserializer::new(seq);
428 1 : let id: [u8; 2] = Deserialize::deserialize(s)?;
429 1 : Ok(ShardIndex::from(id))
430 1 : }
431 :
432 9384 : fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
433 9384 : where
434 9384 : E: serde::de::Error,
435 9384 : {
436 9384 : ShardIndex::from_str(v).map_err(E::custom)
437 9384 : }
438 : }
439 :
440 9385 : if deserializer.is_human_readable() {
441 9384 : deserializer.deserialize_str(IdVisitor {
442 9384 : is_human_readable_deserializer: true,
443 9384 : })
444 : } else {
445 1 : deserializer.deserialize_tuple(
446 1 : 2,
447 1 : IdVisitor {
448 1 : is_human_readable_deserializer: false,
449 1 : },
450 1 : )
451 : }
452 9385 : }
453 : }
|