LCOV - code coverage report
Current view: top level - libs/utils/src - shard.rs (source / functions) Coverage Total Hit
Test: 07bee600374ccd486c69370d0972d9035964fe68.info Lines: 81.7 % 251 205
Test Date: 2025-02-20 13:11:02 Functions: 42.4 % 92 39

            Line data    Source code
       1              : //! See `pageserver_api::shard` for description on sharding.
       2              : 
       3              : use std::{ops::RangeInclusive, str::FromStr};
       4              : 
       5              : use hex::FromHex;
       6              : use serde::{Deserialize, Serialize};
       7              : 
       8              : use crate::id::TenantId;
       9              : 
      10            0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
      11              : pub struct ShardNumber(pub u8);
      12              : 
      13            0 : #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
      14              : pub struct ShardCount(pub u8);
      15              : 
      16              : /// Combination of ShardNumber and ShardCount.
      17              : ///
      18              : /// For use within the context of a particular tenant, when we need to know which shard we're
      19              : /// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
      20              : /// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
      21              : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
      22              : pub struct ShardIndex {
      23              :     pub shard_number: ShardNumber,
      24              :     pub shard_count: ShardCount,
      25              : }
      26              : 
      27              : /// Formatting helper, for generating the `shard_id` label in traces.
      28              : pub struct ShardSlug<'a>(&'a TenantShardId);
      29              : 
      30              : /// TenantShardId globally identifies a particular shard in a particular tenant.
      31              : ///
      32              : /// These are written as `<TenantId>-<ShardSlug>`, for example:
      33              : ///   # The second shard in a two-shard tenant
      34              : ///   072f1291a5310026820b2fe4b2968934-0102
      35              : ///
      36              : /// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
      37              : /// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
      38              : /// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
      39              : ///
      40              : /// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
      41              : /// is both forward and backward compatible with TenantId: a legacy TenantId can be
      42              : /// decoded as a TenantShardId, and when re-encoded it will be parseable
      43              : /// as a TenantId.
      44              : #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
      45              : pub struct TenantShardId {
      46              :     pub tenant_id: TenantId,
      47              :     pub shard_number: ShardNumber,
      48              :     pub shard_count: ShardCount,
      49              : }
      50              : 
      51              : impl ShardCount {
      52              :     pub const MAX: Self = Self(u8::MAX);
      53              :     pub const MIN: Self = Self(0);
      54              : 
      55              :     /// The internal value of a ShardCount may be zero, which means "1 shard, but use
      56              :     /// legacy format for TenantShardId that excludes the shard suffix", also known
      57              :     /// as [`TenantShardId::unsharded`].
      58              :     ///
      59              :     /// This method returns the actual number of shards, i.e. if our internal value is
      60              :     /// zero, we return 1 (unsharded tenants have 1 shard).
      61      9613426 :     pub fn count(&self) -> u8 {
      62      9613426 :         if self.0 > 0 {
      63         5021 :             self.0
      64              :         } else {
      65      9608405 :             1
      66              :         }
      67      9613426 :     }
      68              : 
      69              :     /// The literal internal value: this is **not** the number of shards in the
      70              :     /// tenant, as we have a special zero value for legacy unsharded tenants.  Use
      71              :     /// [`Self::count`] if you want to know the cardinality of shards.
      72            2 :     pub fn literal(&self) -> u8 {
      73            2 :         self.0
      74            2 :     }
      75              : 
      76              :     /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
      77              :     /// uses the legacy format for `TenantShardId`. See also the documentation for
      78              :     /// [`Self::count`].
      79            0 :     pub fn is_unsharded(&self) -> bool {
      80            0 :         self.0 == 0
      81            0 :     }
      82              : 
      83              :     /// `v` may be zero, or the number of shards in the tenant.  `v` is what
      84              :     /// [`Self::literal`] would return.
      85         9998 :     pub const fn new(val: u8) -> Self {
      86         9998 :         Self(val)
      87         9998 :     }
      88              : }
      89              : 
      90              : impl ShardNumber {
      91              :     pub const MAX: Self = Self(u8::MAX);
      92              : }
      93              : 
      94              : impl TenantShardId {
      95           53 :     pub fn unsharded(tenant_id: TenantId) -> Self {
      96           53 :         Self {
      97           53 :             tenant_id,
      98           53 :             shard_number: ShardNumber(0),
      99           53 :             shard_count: ShardCount(0),
     100           53 :         }
     101           53 :     }
     102              : 
     103              :     /// The range of all TenantShardId that belong to a particular TenantId.  This is useful when
     104              :     /// you have a BTreeMap of TenantShardId, and are querying by TenantId.
     105            0 :     pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive<Self> {
     106            0 :         RangeInclusive::new(
     107            0 :             Self {
     108            0 :                 tenant_id,
     109            0 :                 shard_number: ShardNumber(0),
     110            0 :                 shard_count: ShardCount(0),
     111            0 :             },
     112            0 :             Self {
     113            0 :                 tenant_id,
     114            0 :                 shard_number: ShardNumber::MAX,
     115            0 :                 shard_count: ShardCount::MAX,
     116            0 :             },
     117            0 :         )
     118            0 :     }
     119              : 
     120            0 :     pub fn range(&self) -> RangeInclusive<Self> {
     121            0 :         RangeInclusive::new(*self, *self)
     122            0 :     }
     123              : 
     124        38038 :     pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
     125        38038 :         ShardSlug(self)
     126        38038 :     }
     127              : 
     128              :     /// Convenience for code that has special behavior on the 0th shard.
     129         1130 :     pub fn is_shard_zero(&self) -> bool {
     130         1130 :         self.shard_number == ShardNumber(0)
     131         1130 :     }
     132              : 
     133              :     /// The "unsharded" value is distinct from simply having a single shard: it represents
     134              :     /// a tenant which is not shard-aware at all, and whose storage paths will not include
     135              :     /// a shard suffix.
     136            0 :     pub fn is_unsharded(&self) -> bool {
     137            0 :         self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
     138            0 :     }
     139              : 
     140              :     /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
     141              :     /// is useful when logging from code that is already in a span that includes tenant ID, to
     142              :     /// keep messages reasonably terse.
     143            0 :     pub fn to_index(&self) -> ShardIndex {
     144            0 :         ShardIndex {
     145            0 :             shard_number: self.shard_number,
     146            0 :             shard_count: self.shard_count,
     147            0 :         }
     148            0 :     }
     149              : 
     150              :     /// Calculate the children of this TenantShardId when splitting the overall tenant into
     151              :     /// the given number of shards.
     152            8 :     pub fn split(&self, new_shard_count: ShardCount) -> Vec<TenantShardId> {
     153            8 :         let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1);
     154            8 :         let mut child_shards = Vec::new();
     155           48 :         for shard_number in 0..ShardNumber(new_shard_count.0).0 {
     156              :             // Key mapping is based on a round robin mapping of key hash modulo shard count,
     157              :             // so our child shards are the ones which the same keys would map to.
     158           48 :             if shard_number % effective_old_shard_count == self.shard_number.0 {
     159           44 :                 child_shards.push(TenantShardId {
     160           44 :                     tenant_id: self.tenant_id,
     161           44 :                     shard_number: ShardNumber(shard_number),
     162           44 :                     shard_count: new_shard_count,
     163           44 :                 })
     164            4 :             }
     165              :         }
     166              : 
     167            8 :         child_shards
     168            8 :     }
     169              : }
     170              : 
     171              : impl std::fmt::Display for ShardNumber {
     172            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     173            0 :         self.0.fmt(f)
     174            0 :     }
     175              : }
     176              : 
     177              : impl std::fmt::Display for ShardSlug<'_> {
     178        25143 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     179        25143 :         write!(
     180        25143 :             f,
     181        25143 :             "{:02x}{:02x}",
     182        25143 :             self.0.shard_number.0, self.0.shard_count.0
     183        25143 :         )
     184        25143 :     }
     185              : }
     186              : 
     187              : impl std::fmt::Display for TenantShardId {
     188        28428 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     189        28428 :         if self.shard_count != ShardCount(0) {
     190          321 :             write!(f, "{}-{}", self.tenant_id, self.shard_slug())
     191              :         } else {
     192              :             // Legacy case (shard_count == 0) -- format as just the tenant id.  Note that this
     193              :             // is distinct from the normal single shard case (shard count == 1).
     194        28107 :             self.tenant_id.fmt(f)
     195              :         }
     196        28428 :     }
     197              : }
     198              : 
     199              : impl std::fmt::Debug for TenantShardId {
     200         6264 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     201         6264 :         // Debug is the same as Display: the compact hex representation
     202         6264 :         write!(f, "{}", self)
     203         6264 :     }
     204              : }
     205              : 
     206              : impl std::str::FromStr for TenantShardId {
     207              :     type Err = hex::FromHexError;
     208              : 
     209         9244 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     210         9244 :         // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count
     211         9244 :         if s.len() == 32 {
     212              :             // Legacy case: no shard specified
     213              :             Ok(Self {
     214         9174 :                 tenant_id: TenantId::from_str(s)?,
     215         9174 :                 shard_number: ShardNumber(0),
     216         9174 :                 shard_count: ShardCount(0),
     217              :             })
     218           70 :         } else if s.len() == 37 {
     219           70 :             let bytes = s.as_bytes();
     220           70 :             let tenant_id = TenantId::from_hex(&bytes[0..32])?;
     221           70 :             let mut shard_parts: [u8; 2] = [0u8; 2];
     222           70 :             hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?;
     223           70 :             Ok(Self {
     224           70 :                 tenant_id,
     225           70 :                 shard_number: ShardNumber(shard_parts[0]),
     226           70 :                 shard_count: ShardCount(shard_parts[1]),
     227           70 :             })
     228              :         } else {
     229            0 :             Err(hex::FromHexError::InvalidStringLength)
     230              :         }
     231         9244 :     }
     232              : }
     233              : 
     234              : impl From<[u8; 18]> for TenantShardId {
     235           94 :     fn from(b: [u8; 18]) -> Self {
     236           94 :         let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap();
     237           94 : 
     238           94 :         Self {
     239           94 :             tenant_id: TenantId::from(tenant_id_bytes),
     240           94 :             shard_number: ShardNumber(b[16]),
     241           94 :             shard_count: ShardCount(b[17]),
     242           94 :         }
     243           94 :     }
     244              : }
     245              : 
     246              : impl ShardIndex {
     247           28 :     pub fn new(number: ShardNumber, count: ShardCount) -> Self {
     248           28 :         Self {
     249           28 :             shard_number: number,
     250           28 :             shard_count: count,
     251           28 :         }
     252           28 :     }
     253          316 :     pub fn unsharded() -> Self {
     254          316 :         Self {
     255          316 :             shard_number: ShardNumber(0),
     256          316 :             shard_count: ShardCount(0),
     257          316 :         }
     258          316 :     }
     259              : 
     260              :     /// The "unsharded" value is distinct from simply having a single shard: it represents
     261              :     /// a tenant which is not shard-aware at all, and whose storage paths will not include
     262              :     /// a shard suffix.
     263       148677 :     pub fn is_unsharded(&self) -> bool {
     264       148677 :         self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
     265       148677 :     }
     266              : 
     267              :     /// For use in constructing remote storage paths: concatenate this with a TenantId
     268              :     /// to get a fully qualified TenantShardId.
     269              :     ///
     270              :     /// Backward compat: this function returns an empty string if Self::is_unsharded, such
     271              :     /// that the legacy pre-sharding remote key format is preserved.
     272         3973 :     pub fn get_suffix(&self) -> String {
     273         3973 :         if self.is_unsharded() {
     274         3957 :             "".to_string()
     275              :         } else {
     276           16 :             format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
     277              :         }
     278         3973 :     }
     279              : }
     280              : 
     281              : impl std::fmt::Display for ShardIndex {
     282         4504 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     283         4504 :         write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
     284         4504 :     }
     285              : }
     286              : 
     287              : impl std::fmt::Debug for ShardIndex {
     288         3468 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     289         3468 :         // Debug is the same as Display: the compact hex representation
     290         3468 :         write!(f, "{}", self)
     291         3468 :     }
     292              : }
     293              : 
     294              : impl std::str::FromStr for ShardIndex {
     295              :     type Err = hex::FromHexError;
     296              : 
     297         6257 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     298         6257 :         // Expect format: 1 byte shard number, 1 byte shard count
     299         6257 :         if s.len() == 4 {
     300         6257 :             let bytes = s.as_bytes();
     301         6257 :             let mut shard_parts: [u8; 2] = [0u8; 2];
     302         6257 :             hex::decode_to_slice(bytes, &mut shard_parts)?;
     303         6257 :             Ok(Self {
     304         6257 :                 shard_number: ShardNumber(shard_parts[0]),
     305         6257 :                 shard_count: ShardCount(shard_parts[1]),
     306         6257 :             })
     307              :         } else {
     308            0 :             Err(hex::FromHexError::InvalidStringLength)
     309              :         }
     310         6257 :     }
     311              : }
     312              : 
     313              : impl From<[u8; 2]> for ShardIndex {
     314            1 :     fn from(b: [u8; 2]) -> Self {
     315            1 :         Self {
     316            1 :             shard_number: ShardNumber(b[0]),
     317            1 :             shard_count: ShardCount(b[1]),
     318            1 :         }
     319            1 :     }
     320              : }
     321              : 
     322              : impl Serialize for TenantShardId {
     323           86 :     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     324           86 :     where
     325           86 :         S: serde::Serializer,
     326           86 :     {
     327           86 :         if serializer.is_human_readable() {
     328           82 :             serializer.collect_str(self)
     329              :         } else {
     330              :             // Note: while human encoding of [`TenantShardId`] is backward and forward
     331              :             // compatible, this binary encoding is not.
     332            4 :             let mut packed: [u8; 18] = [0; 18];
     333            4 :             packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
     334            4 :             packed[16] = self.shard_number.0;
     335            4 :             packed[17] = self.shard_count.0;
     336            4 : 
     337            4 :             packed.serialize(serializer)
     338              :         }
     339           86 :     }
     340              : }
     341              : 
     342              : impl<'de> Deserialize<'de> for TenantShardId {
     343           15 :     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     344           15 :     where
     345           15 :         D: serde::Deserializer<'de>,
     346           15 :     {
     347              :         struct IdVisitor {
     348              :             is_human_readable_deserializer: bool,
     349              :         }
     350              : 
     351              :         impl<'de> serde::de::Visitor<'de> for IdVisitor {
     352              :             type Value = TenantShardId;
     353              : 
     354            0 :             fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
     355            0 :                 if self.is_human_readable_deserializer {
     356            0 :                     formatter.write_str("value in form of hex string")
     357              :                 } else {
     358            0 :                     formatter.write_str("value in form of integer array([u8; 18])")
     359              :                 }
     360            0 :             }
     361              : 
     362            2 :             fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
     363            2 :             where
     364            2 :                 A: serde::de::SeqAccess<'de>,
     365            2 :             {
     366            2 :                 let s = serde::de::value::SeqAccessDeserializer::new(seq);
     367            2 :                 let id: [u8; 18] = Deserialize::deserialize(s)?;
     368            2 :                 Ok(TenantShardId::from(id))
     369            2 :             }
     370              : 
     371           13 :             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
     372           13 :             where
     373           13 :                 E: serde::de::Error,
     374           13 :             {
     375           13 :                 TenantShardId::from_str(v).map_err(E::custom)
     376           13 :             }
     377              :         }
     378              : 
     379           15 :         if deserializer.is_human_readable() {
     380           13 :             deserializer.deserialize_str(IdVisitor {
     381           13 :                 is_human_readable_deserializer: true,
     382           13 :             })
     383              :         } else {
     384            2 :             deserializer.deserialize_tuple(
     385            2 :                 18,
     386            2 :                 IdVisitor {
     387            2 :                     is_human_readable_deserializer: false,
     388            2 :                 },
     389            2 :             )
     390              :         }
     391           15 :     }
     392              : }
     393              : 
     394              : impl Serialize for ShardIndex {
     395           34 :     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     396           34 :     where
     397           34 :         S: serde::Serializer,
     398           34 :     {
     399           34 :         if serializer.is_human_readable() {
     400           32 :             serializer.collect_str(self)
     401              :         } else {
     402              :             // Binary encoding is not used in index_part.json, but is included in anticipation of
     403              :             // switching various structures (e.g. inter-process communication, remote metadata) to more
     404              :             // compact binary encodings in future.
     405            2 :             let mut packed: [u8; 2] = [0; 2];
     406            2 :             packed[0] = self.shard_number.0;
     407            2 :             packed[1] = self.shard_count.0;
     408            2 :             packed.serialize(serializer)
     409              :         }
     410           34 :     }
     411              : }
     412              : 
     413              : impl<'de> Deserialize<'de> for ShardIndex {
     414         6257 :     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     415         6257 :     where
     416         6257 :         D: serde::Deserializer<'de>,
     417         6257 :     {
     418              :         struct IdVisitor {
     419              :             is_human_readable_deserializer: bool,
     420              :         }
     421              : 
     422              :         impl<'de> serde::de::Visitor<'de> for IdVisitor {
     423              :             type Value = ShardIndex;
     424              : 
     425            0 :             fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
     426            0 :                 if self.is_human_readable_deserializer {
     427            0 :                     formatter.write_str("value in form of hex string")
     428              :                 } else {
     429            0 :                     formatter.write_str("value in form of integer array([u8; 2])")
     430              :                 }
     431            0 :             }
     432              : 
     433            1 :             fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
     434            1 :             where
     435            1 :                 A: serde::de::SeqAccess<'de>,
     436            1 :             {
     437            1 :                 let s = serde::de::value::SeqAccessDeserializer::new(seq);
     438            1 :                 let id: [u8; 2] = Deserialize::deserialize(s)?;
     439            1 :                 Ok(ShardIndex::from(id))
     440            1 :             }
     441              : 
     442         6256 :             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
     443         6256 :             where
     444         6256 :                 E: serde::de::Error,
     445         6256 :             {
     446         6256 :                 ShardIndex::from_str(v).map_err(E::custom)
     447         6256 :             }
     448              :         }
     449              : 
     450         6257 :         if deserializer.is_human_readable() {
     451         6256 :             deserializer.deserialize_str(IdVisitor {
     452         6256 :                 is_human_readable_deserializer: true,
     453         6256 :             })
     454              :         } else {
     455            1 :             deserializer.deserialize_tuple(
     456            1 :                 2,
     457            1 :                 IdVisitor {
     458            1 :                     is_human_readable_deserializer: false,
     459            1 :                 },
     460            1 :             )
     461              :         }
     462         6257 :     }
     463              : }
        

Generated by: LCOV version 2.1-beta