Line data Source code
1 : use std::time::SystemTime;
2 : use utils::{serde_percent::Percent, serde_system_time};
3 :
4 : /// Pageserver current utilization and scoring for how good candidate the pageserver would be for
5 : /// the next tenant.
6 : ///
7 : /// See and maintain pageserver openapi spec for `/v1/utilization_score` as the truth.
8 : ///
9 : /// `format: int64` fields must use `ser_saturating_u63` because openapi generated clients might
10 : /// not handle full u64 values properly.
11 6 : #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
12 : pub struct PageserverUtilization {
13 : /// Used disk space (physical, ground truth from statfs())
14 : #[serde(serialize_with = "ser_saturating_u63")]
15 : pub disk_usage_bytes: u64,
16 : /// Free disk space
17 : #[serde(serialize_with = "ser_saturating_u63")]
18 : pub free_space_bytes: u64,
19 :
20 : /// Wanted disk space, based on the tenant shards currently present on this pageserver: this
21 : /// is like disk_usage_bytes, but it is stable and does not change with the cache state of
22 : /// tenants, whereas disk_usage_bytes may reach the disk eviction `max_usage_pct` and stay
23 : /// there, or may be unrealistically low if the pageserver has attached tenants which haven't
24 : /// downloaded layers yet.
25 : #[serde(serialize_with = "ser_saturating_u63", default)]
26 : pub disk_wanted_bytes: u64,
27 :
28 : // What proportion of total disk space will this pageserver use before it starts evicting data?
29 : #[serde(default = "unity_percent")]
30 : pub disk_usable_pct: Percent,
31 :
32 : // How many shards are currently on this node?
33 : #[serde(default)]
34 : pub shard_count: u32,
35 :
36 : // How many shards should this node be able to handle at most?
37 : #[serde(default)]
38 : pub max_shard_count: u32,
39 :
40 : /// Cached result of [`Self::score`]
41 : pub utilization_score: u64,
42 :
43 : /// When was this snapshot captured, pageserver local time.
44 : ///
45 : /// Use millis to give confidence that the value is regenerated often enough.
46 : pub captured_at: serde_system_time::SystemTime,
47 : }
48 :
49 0 : fn unity_percent() -> Percent {
50 0 : Percent::new(0).unwrap()
51 0 : }
52 :
53 : impl PageserverUtilization {
54 : const UTILIZATION_FULL: u64 = 1000000;
55 :
56 : /// Calculate a utilization score. The result is to be inrepreted as a fraction of
57 : /// Self::UTILIZATION_FULL.
58 : ///
59 : /// Lower values are more affine to scheduling more work on this node.
60 : /// - UTILIZATION_FULL represents an ideal node which is fully utilized but should not receive any more work.
61 : /// - 0.0 represents an empty node.
62 : /// - Negative values are forbidden
63 : /// - Values over UTILIZATION_FULL indicate an overloaded node, which may show degraded performance due to
64 : /// layer eviction.
65 0 : pub fn score(&self) -> u64 {
66 0 : let disk_usable_capacity = ((self.disk_usage_bytes + self.free_space_bytes)
67 0 : * self.disk_usable_pct.get() as u64)
68 0 : / 100;
69 0 : let disk_utilization_score =
70 0 : self.disk_wanted_bytes * Self::UTILIZATION_FULL / disk_usable_capacity;
71 0 :
72 0 : let shard_utilization_score =
73 0 : self.shard_count as u64 * Self::UTILIZATION_FULL / self.max_shard_count as u64;
74 0 : std::cmp::max(disk_utilization_score, shard_utilization_score)
75 0 : }
76 :
77 0 : pub fn refresh_score(&mut self) {
78 0 : self.utilization_score = self.score();
79 0 : }
80 :
81 : /// A utilization structure that has a full utilization score: use this as a placeholder when
82 : /// you need a utilization but don't have real values yet.
83 0 : pub fn full() -> Self {
84 0 : Self {
85 0 : disk_usage_bytes: 1,
86 0 : free_space_bytes: 0,
87 0 : disk_wanted_bytes: 1,
88 0 : disk_usable_pct: Percent::new(100).unwrap(),
89 0 : shard_count: 1,
90 0 : max_shard_count: 1,
91 0 : utilization_score: Self::UTILIZATION_FULL,
92 0 : captured_at: serde_system_time::SystemTime(SystemTime::now()),
93 0 : }
94 0 : }
95 : }
96 :
97 : /// openapi knows only `format: int64`, so avoid outputting a non-parseable value by generated clients.
98 : ///
99 : /// Instead of newtype, use this because a newtype would get require handling deserializing values
100 : /// with the highest bit set which is properly parsed by serde formats, but would create a
101 : /// conundrum on how to handle and again serialize such values at type level. It will be a few
102 : /// years until we can use more than `i64::MAX` bytes on a disk.
103 6 : fn ser_saturating_u63<S: serde::Serializer>(value: &u64, serializer: S) -> Result<S::Ok, S::Error> {
104 6 : const MAX_FORMAT_INT64: u64 = i64::MAX as u64;
105 6 :
106 6 : let value = (*value).min(MAX_FORMAT_INT64);
107 6 :
108 6 : serializer.serialize_u64(value)
109 6 : }
110 :
111 : #[cfg(test)]
112 : mod tests {
113 : use std::time::Duration;
114 :
115 : use super::*;
116 :
117 : #[test]
118 2 : fn u64_max_is_serialized_as_u63_max() {
119 2 : let doc = PageserverUtilization {
120 2 : disk_usage_bytes: u64::MAX,
121 2 : free_space_bytes: 0,
122 2 : disk_wanted_bytes: u64::MAX,
123 2 : utilization_score: 13,
124 2 : disk_usable_pct: Percent::new(90).unwrap(),
125 2 : shard_count: 100,
126 2 : max_shard_count: 200,
127 2 : captured_at: serde_system_time::SystemTime(
128 2 : std::time::SystemTime::UNIX_EPOCH + Duration::from_secs(1708509779),
129 2 : ),
130 2 : };
131 2 :
132 2 : let s = serde_json::to_string(&doc).unwrap();
133 2 :
134 2 : let expected = "{\"disk_usage_bytes\":9223372036854775807,\"free_space_bytes\":0,\"disk_wanted_bytes\":9223372036854775807,\"disk_usable_pct\":90,\"shard_count\":100,\"max_shard_count\":200,\"utilization_score\":13,\"captured_at\":\"2024-02-21T10:02:59.000Z\"}";
135 2 :
136 2 : assert_eq!(s, expected);
137 2 : }
138 : }
|