Line data Source code
1 : //! Defines [`RequestContext`].
2 : //!
3 : //! It is a structure that we use throughout the pageserver to propagate
4 : //! high-level context from places that _originate_ activity down to the
5 : //! shared code paths at the heart of the pageserver. It's inspired by
6 : //! Golang's `context.Context`.
7 : //!
8 : //! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:
9 : //! 1. What high-level activity ([`TaskKind`]) needs this page?
10 : //! We need that information as a categorical dimension for page access
11 : //! statistics, which we, in turn, need to guide layer eviction policy design.
12 : //! 2. How should we behave if, to produce the page image, we need to
13 : //! on-demand download a layer file ([`DownloadBehavior`]).
14 : //!
15 : //! [`RequestContext`] satisfies those needs.
16 : //! The current implementation is a small `struct` that is passed through
17 : //! the call chain by reference.
18 : //!
19 : //! ### Future Work
20 : //!
21 : //! However, we do not intend to stop here, since there are other needs that
22 : //! require carrying information from high to low levels of the app.
23 : //!
24 : //! Most importantly, **cancellation signaling** in response to
25 : //! 1. timeouts (page_service max response time) and
26 : //! 2. lifecycle requests (detach tenant, delete timeline).
27 : //!
28 : //! Related to that, there is sometimes a need to ensure that all tokio tasks spawned
29 : //! by the transitive callees of a request have finished. The keyword here
30 : //! is **Structured Concurrency**, and right now, we use `task_mgr` in most places,
31 : //! `TaskHandle` in some places, and careful code review around `FuturesUnordered`
32 : //! or `JoinSet` in other places.
33 : //!
34 : //! We do not yet have a systematic cancellation story in pageserver, and it is
35 : //! pretty clear that [`RequestContext`] will be responsible for that.
36 : //! So, the API already prepares for this role through the
37 : //! [`RequestContext::detached_child`] and [`RequestContext::attached_child`] methods.
38 : //! See their doc comments for details on how we will use them in the future.
39 : //!
40 : //! It is not clear whether or how we will enforce Structured Concurrency, and
41 : //! what role [`RequestContext`] will play there.
42 : //! So, the API doesn't prepare us for this topic.
43 : //!
44 : //! Other future uses of `RequestContext`:
45 : //! - Communicate compute & IO priorities (user-initiated request vs. background-loop)
46 : //! - Request IDs for distributed tracing
47 : //! - Request/Timeline/Tenant-scoped log levels
48 : //!
49 : //! RequestContext might look quite different once it supports those features.
50 : //! Likely, it will have a shape similar to Golang's `context.Context`.
51 : //!
52 : //! ### Why A Struct Instead Of Method Parameters
53 : //!
54 : //! What's typical about such information is that it needs to be passed down
55 : //! along the call chain from high level to low level, but few of the functions
56 : //! in the middle need to understand it.
57 : //! Further, it is to be expected that we will need to propagate more data
58 : //! in the future (see the earlier section on future work).
59 : //! Hence, for functions in the middle of the call chain, we have the following
60 : //! requirements:
61 : //! 1. It should be easy to forward the context to callees.
62 : //! 2. To propagate more data from high-level to low-level code, the functions in
63 : //! the middle should not need to be modified.
64 : //!
65 : //! The solution is to have a container structure ([`RequestContext`]) that
66 : //! carries the information. Functions that don't care about what's in it
67 : //! pass it along to callees.
68 : //!
69 : //! ### Why Not Task-Local Variables
70 : //!
71 : //! One could use task-local variables (the equivalent of thread-local variables)
72 : //! to address the immediate needs outlined above.
73 : //! However, we reject task-local variables because:
74 : //! 1. they are implicit, thereby making it harder to trace the data flow in code
75 : //! reviews and during debugging,
76 : //! 2. they can be mutable, which enables implicit return data flow,
77 : //! 3. they are restrictive in that code which fans out into multiple tasks,
78 : //! or even threads, needs to carefully propagate the state.
79 : //!
80 : //! In contrast, information flow with [`RequestContext`] is
81 : //! 1. always explicit,
82 : //! 2. strictly uni-directional because RequestContext is immutable,
83 : //! 3. tangible because a [`RequestContext`] is just a value.
84 : //! When creating child activities, regardless of whether it's a task,
85 : //! thread, or even an RPC to another service, the value can
86 : //! be used like any other argument.
87 : //!
88 : //! The solution is that all code paths are infected with precisely one
89 : //! [`RequestContext`] argument. Functions in the middle of the call chain
90 : //! only need to pass it on.
91 :
92 : use std::sync::Arc;
93 :
94 : use once_cell::sync::Lazy;
95 : use tracing::warn;
96 : use utils::{id::TimelineId, shard::TenantShardId};
97 :
98 : use crate::{
99 : metrics::{StorageIoSizeMetrics, TimelineMetrics},
100 : task_mgr::TaskKind,
101 : tenant::Timeline,
102 : };
103 :
104 : // The main structure of this module, see module-level comment.
105 : pub struct RequestContext {
106 : task_kind: TaskKind,
107 : download_behavior: DownloadBehavior,
108 : access_stats_behavior: AccessStatsBehavior,
109 : page_content_kind: PageContentKind,
110 : read_path_debug: bool,
111 : scope: Scope,
112 : }
113 :
114 : #[derive(Clone)]
115 : pub(crate) enum Scope {
116 : Global {
117 : io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
118 : },
119 : SecondaryTenant {
120 : io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
121 : },
122 : SecondaryTimeline {
123 : io_size_metrics: crate::metrics::StorageIoSizeMetrics,
124 : },
125 : Timeline {
126 : // We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child
127 : // context creation contending for the ref counters of the Arc<TimelineMetrics>,
128 : // which are shared among all tasks that operate on the timeline, especially
129 : // concurrent page_service connections.
130 : #[allow(clippy::redundant_allocation)]
131 : arc_arc: Arc<Arc<TimelineMetrics>>,
132 : },
133 : #[cfg(test)]
134 : UnitTest {
135 : io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
136 : },
137 : DebugTools {
138 : io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
139 : },
140 : }
141 :
142 : static GLOBAL_IO_SIZE_METRICS: Lazy<crate::metrics::StorageIoSizeMetrics> =
143 548 : Lazy::new(|| crate::metrics::StorageIoSizeMetrics::new("*", "*", "*"));
144 :
145 : impl Scope {
146 2112 : pub(crate) fn new_global() -> Self {
147 2112 : Scope::Global {
148 2112 : io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
149 2112 : }
150 2112 : }
151 : /// NB: this allocates, so, use only at relatively long-lived roots, e.g., at start
152 : /// of a compaction iteration.
153 1836 : pub(crate) fn new_timeline(timeline: &Timeline) -> Self {
154 1836 : Scope::Timeline {
155 1836 : arc_arc: Arc::new(Arc::clone(&timeline.metrics)),
156 1836 : }
157 1836 : }
158 0 : pub(crate) fn new_page_service_pagestream(
159 0 : timeline_handle: &crate::tenant::timeline::handle::Handle<
160 0 : crate::page_service::TenantManagerTypes,
161 0 : >,
162 0 : ) -> Self {
163 0 : Scope::Timeline {
164 0 : arc_arc: Arc::clone(&timeline_handle.metrics),
165 0 : }
166 0 : }
167 0 : pub(crate) fn new_secondary_timeline(
168 0 : tenant_shard_id: &TenantShardId,
169 0 : timeline_id: &TimelineId,
170 0 : ) -> Self {
171 0 : // TODO(https://github.com/neondatabase/neon/issues/11156): secondary timelines have no infrastructure for metrics lifecycle.
172 0 :
173 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
174 0 : let shard_id = tenant_shard_id.shard_slug().to_string();
175 0 : let timeline_id = timeline_id.to_string();
176 0 :
177 0 : let io_size_metrics =
178 0 : crate::metrics::StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
179 0 : Scope::SecondaryTimeline { io_size_metrics }
180 0 : }
181 0 : pub(crate) fn new_secondary_tenant(_tenant_shard_id: &TenantShardId) -> Self {
182 0 : // Before propagating metrics via RequestContext, the labels were inferred from file path.
183 0 : // The only user of VirtualFile at tenant scope is the heatmap download & read.
184 0 : // The inferred labels for the path of the heatmap file on local disk were that of the global metric (*,*,*).
185 0 : // Thus, we do the same here, and extend that for anything secondary-tenant scoped.
186 0 : //
187 0 : // If we want to have (tenant_id, shard_id, '*') labels for secondary tenants in the future,
188 0 : // we will need to think about the metric lifecycle, i.e., remove them during secondary tenant shutdown,
189 0 : // like we do for attached timelines. (We don't have attached-tenant-scoped usage of VirtualFile
190 0 : // at this point, so, we were able to completely side-step tenant-scoped stuff there).
191 0 : Scope::SecondaryTenant {
192 0 : io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
193 0 : }
194 0 : }
195 : #[cfg(test)]
196 580 : pub(crate) fn new_unit_test() -> Self {
197 580 : Scope::UnitTest {
198 580 : io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
199 580 : }
200 580 : }
201 :
202 0 : pub(crate) fn new_debug_tools() -> Self {
203 0 : Scope::DebugTools {
204 0 : io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
205 0 : }
206 0 : }
207 : }
208 :
209 : /// The kind of access to the page cache.
210 : #[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
211 : pub enum PageContentKind {
212 : Unknown,
213 : DeltaLayerSummary,
214 : DeltaLayerBtreeNode,
215 : DeltaLayerValue,
216 : ImageLayerSummary,
217 : ImageLayerBtreeNode,
218 : ImageLayerValue,
219 : InMemoryLayer,
220 : }
221 :
222 : /// Desired behavior if the operation requires an on-demand download
223 : /// to proceed.
224 : #[derive(Clone, Copy, PartialEq, Eq, Debug)]
225 : pub enum DownloadBehavior {
226 : /// Download the layer file. It can take a while.
227 : Download,
228 :
229 : /// Download the layer file, but print a warning to the log. This should be used
230 : /// in code where the layer file is expected to already exist locally.
231 : Warn,
232 :
233 : /// Return a PageReconstructError::NeedsDownload error
234 : Error,
235 : }
236 :
237 : /// Whether this request should update access times used in LRU eviction
238 : #[derive(Clone, Copy, PartialEq, Eq, Debug)]
239 : pub(crate) enum AccessStatsBehavior {
240 : /// Update access times: this request's access to data should be taken
241 : /// as a hint that the accessed layer is likely to be accessed again
242 : Update,
243 :
244 : /// Do not update access times: this request is accessing the layer
245 : /// but does not want to indicate that the layer should be retained in cache,
246 : /// perhaps because the requestor is a compaction routine that will soon cover
247 : /// this layer with another.
248 : Skip,
249 : }
250 :
251 : pub struct RequestContextBuilder {
252 : inner: RequestContext,
253 : }
254 :
255 : impl RequestContextBuilder {
256 : /// A new builder with default settings
257 2112 : pub fn new(task_kind: TaskKind) -> Self {
258 2112 : Self {
259 2112 : inner: RequestContext {
260 2112 : task_kind,
261 2112 : download_behavior: DownloadBehavior::Download,
262 2112 : access_stats_behavior: AccessStatsBehavior::Update,
263 2112 : page_content_kind: PageContentKind::Unknown,
264 2112 : read_path_debug: false,
265 2112 : scope: Scope::new_global(),
266 2112 : },
267 2112 : }
268 2112 : }
269 :
270 3229880 : pub fn extend(original: &RequestContext) -> Self {
271 3229880 : Self {
272 3229880 : // This is like a Copy, but avoid implementing Copy because ordinary users of
273 3229880 : // RequestContext should always move or ref it.
274 3229880 : inner: RequestContext {
275 3229880 : task_kind: original.task_kind,
276 3229880 : download_behavior: original.download_behavior,
277 3229880 : access_stats_behavior: original.access_stats_behavior,
278 3229880 : page_content_kind: original.page_content_kind,
279 3229880 : read_path_debug: original.read_path_debug,
280 3229880 : scope: original.scope.clone(),
281 3229880 : },
282 3229880 : }
283 3229880 : }
284 :
285 1531679 : pub fn task_kind(mut self, k: TaskKind) -> Self {
286 1531679 : self.inner.task_kind = k;
287 1531679 : self
288 1531679 : }
289 :
290 : /// Configure the DownloadBehavior of the context: whether to
291 : /// download missing layers, and/or warn on the download.
292 1533235 : pub fn download_behavior(mut self, b: DownloadBehavior) -> Self {
293 1533235 : self.inner.download_behavior = b;
294 1533235 : self
295 1533235 : }
296 :
297 : /// Configure the AccessStatsBehavior of the context: whether layer
298 : /// accesses should update the access time of the layer.
299 725 : pub(crate) fn access_stats_behavior(mut self, b: AccessStatsBehavior) -> Self {
300 725 : self.inner.access_stats_behavior = b;
301 725 : self
302 725 : }
303 :
304 1695616 : pub(crate) fn page_content_kind(mut self, k: PageContentKind) -> Self {
305 1695616 : self.inner.page_content_kind = k;
306 1695616 : self
307 1695616 : }
308 :
309 0 : pub(crate) fn read_path_debug(mut self, b: bool) -> Self {
310 0 : self.inner.read_path_debug = b;
311 0 : self
312 0 : }
313 :
314 2416 : pub(crate) fn scope(mut self, s: Scope) -> Self {
315 2416 : self.inner.scope = s;
316 2416 : self
317 2416 : }
318 :
319 3231992 : pub fn build(self) -> RequestContext {
320 3231992 : self.inner
321 3231992 : }
322 : }
323 :
324 : impl RequestContext {
325 : /// Create a new RequestContext that has no parent.
326 : ///
327 : /// The function is called `new` because, once we add children
328 : /// to it using `detached_child` or `attached_child`, the context
329 : /// form a tree (not implemented yet since cancellation will be
330 : /// the first feature that requires a tree).
331 : ///
332 : /// # Future: Cancellation
333 : ///
334 : /// The only reason why a context like this one can be canceled is
335 : /// because someone explicitly canceled it.
336 : /// It has no parent, so it cannot inherit cancellation from there.
337 1532 : pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
338 1532 : RequestContextBuilder::new(task_kind)
339 1532 : .download_behavior(download_behavior)
340 1532 : .build()
341 1532 : }
342 :
343 : /// Create a detached child context for a task that may outlive `self`.
344 : ///
345 : /// Use this when spawning new background activity that should complete
346 : /// even if the current request is canceled.
347 : ///
348 : /// # Future: Cancellation
349 : ///
350 : /// Cancellation of `self` will not propagate to the child context returned
351 : /// by this method.
352 : ///
353 : /// # Future: Structured Concurrency
354 : ///
355 : /// We could add the Future as a parameter to this function, spawn it as a task,
356 : /// and pass to the new task the child context as an argument.
357 : /// That would be an ergonomic improvement.
358 : ///
359 : /// We could make new calls to this function fail if `self` is already canceled.
360 456 : pub fn detached_child(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
361 456 : self.child_impl(task_kind, download_behavior)
362 456 : }
363 :
364 : /// Create a child of context `self` for a task that shall not outlive `self`.
365 : ///
366 : /// Use this when fanning-out work to other async tasks.
367 : ///
368 : /// # Future: Cancellation
369 : ///
370 : /// Cancelling a context will propagate to its attached children.
371 : ///
372 : /// # Future: Structured Concurrency
373 : ///
374 : /// We could add the Future as a parameter to this function, spawn it as a task,
375 : /// and track its `JoinHandle` inside the `RequestContext`.
376 : ///
377 : /// We could then provide another method to allow waiting for all child tasks
378 : /// to finish.
379 : ///
380 : /// We could make new calls to this function fail if `self` is already canceled.
381 : /// Alternatively, we could allow the creation but not spawn the task.
382 : /// The method to wait for child tasks would return an error, indicating
383 : /// that the child task was not started because the context was canceled.
384 1531223 : pub fn attached_child(&self) -> Self {
385 1531223 : self.child_impl(self.task_kind(), self.download_behavior())
386 1531223 : }
387 :
388 : /// Use this function when you should be creating a child context using
389 : /// [`attached_child`] or [`detached_child`], but your caller doesn't provide
390 : /// a context and you are unwilling to change all callers to provide one.
391 : ///
392 : /// Before we add cancellation, we should get rid of this method.
393 : ///
394 : /// [`attached_child`]: Self::attached_child
395 : /// [`detached_child`]: Self::detached_child
396 892 : pub fn todo_child(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
397 892 : Self::new(task_kind, download_behavior)
398 892 : }
399 :
400 1531679 : fn child_impl(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
401 1531679 : RequestContextBuilder::extend(self)
402 1531679 : .task_kind(task_kind)
403 1531679 : .download_behavior(download_behavior)
404 1531679 : .build()
405 1531679 : }
406 :
407 932 : pub fn with_scope_timeline(&self, timeline: &Arc<Timeline>) -> Self {
408 932 : RequestContextBuilder::extend(self)
409 932 : .scope(Scope::new_timeline(timeline))
410 932 : .build()
411 932 : }
412 :
413 0 : pub(crate) fn with_scope_page_service_pagestream(
414 0 : &self,
415 0 : timeline_handle: &crate::tenant::timeline::handle::Handle<
416 0 : crate::page_service::TenantManagerTypes,
417 0 : >,
418 0 : ) -> Self {
419 0 : RequestContextBuilder::extend(self)
420 0 : .scope(Scope::new_page_service_pagestream(timeline_handle))
421 0 : .build()
422 0 : }
423 :
424 0 : pub fn with_scope_secondary_timeline(
425 0 : &self,
426 0 : tenant_shard_id: &TenantShardId,
427 0 : timeline_id: &TimelineId,
428 0 : ) -> Self {
429 0 : RequestContextBuilder::extend(self)
430 0 : .scope(Scope::new_secondary_timeline(tenant_shard_id, timeline_id))
431 0 : .build()
432 0 : }
433 :
434 0 : pub fn with_scope_secondary_tenant(&self, tenant_shard_id: &TenantShardId) -> Self {
435 0 : RequestContextBuilder::extend(self)
436 0 : .scope(Scope::new_secondary_tenant(tenant_shard_id))
437 0 : .build()
438 0 : }
439 :
440 : #[cfg(test)]
441 580 : pub fn with_scope_unit_test(&self) -> Self {
442 580 : RequestContextBuilder::new(TaskKind::UnitTest)
443 580 : .scope(Scope::new_unit_test())
444 580 : .build()
445 580 : }
446 :
447 0 : pub fn with_scope_debug_tools(&self) -> Self {
448 0 : RequestContextBuilder::new(TaskKind::DebugTool)
449 0 : .scope(Scope::new_debug_tools())
450 0 : .build()
451 0 : }
452 :
453 3962492 : pub fn task_kind(&self) -> TaskKind {
454 3962492 : self.task_kind
455 3962492 : }
456 :
457 1531283 : pub fn download_behavior(&self) -> DownloadBehavior {
458 1531283 : self.download_behavior
459 1531283 : }
460 :
461 478498 : pub(crate) fn access_stats_behavior(&self) -> AccessStatsBehavior {
462 478498 : self.access_stats_behavior
463 478498 : }
464 :
465 1941016 : pub(crate) fn page_content_kind(&self) -> PageContentKind {
466 1941016 : self.page_content_kind
467 1941016 : }
468 :
469 0 : pub(crate) fn read_path_debug(&self) -> bool {
470 0 : self.read_path_debug
471 0 : }
472 :
473 3241894 : pub(crate) fn io_size_metrics(&self) -> &StorageIoSizeMetrics {
474 3241894 : match &self.scope {
475 0 : Scope::Global { io_size_metrics } => {
476 0 : let is_unit_test = cfg!(test);
477 0 : let is_regress_test_build = cfg!(feature = "testing");
478 0 : if is_unit_test || is_regress_test_build {
479 0 : panic!("all VirtualFile instances are timeline-scoped");
480 : } else {
481 3241894 : use once_cell::sync::Lazy;
482 3241894 : use std::sync::Mutex;
483 3241894 : use std::time::Duration;
484 3241894 : use utils::rate_limit::RateLimit;
485 3241894 : static LIMIT: Lazy<Mutex<RateLimit>> =
486 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1))));
487 0 : let mut guard = LIMIT.lock().unwrap();
488 0 : guard.call2(|rate_limit_stats| {
489 0 : warn!(
490 : %rate_limit_stats,
491 0 : backtrace=%std::backtrace::Backtrace::force_capture(),
492 0 : "all VirtualFile instances are timeline-scoped",
493 : );
494 0 : });
495 0 :
496 0 : io_size_metrics
497 : }
498 : }
499 42828 : Scope::Timeline { arc_arc } => &arc_arc.storage_io_size,
500 0 : Scope::SecondaryTimeline { io_size_metrics } => io_size_metrics,
501 0 : Scope::SecondaryTenant { io_size_metrics } => io_size_metrics,
502 : #[cfg(test)]
503 3199066 : Scope::UnitTest { io_size_metrics } => io_size_metrics,
504 0 : Scope::DebugTools { io_size_metrics } => io_size_metrics,
505 : }
506 3241894 : }
507 : }
|