Line data Source code
1 : mod auth;
2 : pub mod basebackup;
3 : pub mod config;
4 : pub mod consumption_metrics;
5 : pub mod context;
6 : pub mod disk_usage_eviction_task;
7 : pub mod http;
8 : pub mod import_datadir;
9 : pub mod keyspace;
10 : pub mod metrics;
11 : pub mod page_cache;
12 : pub mod page_service;
13 : pub mod pgdatadir_mapping;
14 : pub mod repository;
15 : pub(crate) mod statvfs;
16 : pub mod task_mgr;
17 : pub mod tenant;
18 : pub mod trace;
19 : pub mod virtual_file;
20 : pub mod walingest;
21 : pub mod walrecord;
22 : pub mod walredo;
23 :
24 : pub mod failpoint_support;
25 :
26 : use std::path::Path;
27 :
28 : use crate::task_mgr::TaskKind;
29 : use tracing::info;
30 :
31 : /// Current storage format version
32 : ///
33 : /// This is embedded in the header of all the layer files.
34 : /// If you make any backwards-incompatible changes to the storage
35 : /// format, bump this!
36 : /// Note that TimelineMetadata uses its own version number to track
37 : /// backwards-compatible changes to the metadata format.
38 : pub const STORAGE_FORMAT_VERSION: u16 = 3;
39 :
40 : pub const DEFAULT_PG_VERSION: u32 = 15;
41 :
42 : // Magic constants used to identify different kinds of files
43 : pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
44 : pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
45 :
46 : static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
47 :
48 : pub use crate::metrics::preinitialize_metrics;
49 :
50 444 : #[tracing::instrument]
51 : pub async fn shutdown_pageserver(exit_code: i32) {
52 : use std::time::Duration;
53 : // Shut down the libpq endpoint task. This prevents new connections from
54 : // being accepted.
55 : timed(
56 : task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None),
57 : "shutdown LibpqEndpointListener",
58 : Duration::from_secs(1),
59 : )
60 : .await;
61 :
62 : // Shut down any page service tasks.
63 : timed(
64 : task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None),
65 : "shutdown PageRequestHandlers",
66 : Duration::from_secs(1),
67 : )
68 : .await;
69 :
70 : // Shut down all the tenants. This flushes everything to disk and kills
71 : // the checkpoint and GC tasks.
72 : timed(
73 : tenant::mgr::shutdown_all_tenants(),
74 : "shutdown all tenants",
75 : Duration::from_secs(5),
76 : )
77 : .await;
78 :
79 : // Shut down the HTTP endpoint last, so that you can still check the server's
80 : // status while it's shutting down.
81 : // FIXME: We should probably stop accepting commands like attach/detach earlier.
82 : timed(
83 : task_mgr::shutdown_tasks(Some(TaskKind::HttpEndpointListener), None, None),
84 : "shutdown http",
85 : Duration::from_secs(1),
86 : )
87 : .await;
88 :
89 : // There should be nothing left, but let's be sure
90 : timed(
91 : task_mgr::shutdown_tasks(None, None, None),
92 : "shutdown leftovers",
93 : Duration::from_secs(1),
94 : )
95 : .await;
96 148 : info!("Shut down successfully completed");
97 : std::process::exit(exit_code);
98 : }
99 :
100 : /// The name of the metadata file pageserver creates per timeline.
101 : /// Full path: `tenants/<tenant_id>/timelines/<timeline_id>/metadata`.
102 : pub const METADATA_FILE_NAME: &str = "metadata";
103 :
104 : /// Per-tenant configuration file.
105 : /// Full path: `tenants/<tenant_id>/config`.
106 : pub const TENANT_CONFIG_NAME: &str = "config";
107 :
108 : /// A suffix used for various temporary files. Any temporary files found in the
109 : /// data directory at pageserver startup can be automatically removed.
110 : pub const TEMP_FILE_SUFFIX: &str = "___temp";
111 :
112 : /// A marker file to mark that a timeline directory was not fully initialized.
113 : /// If a timeline directory with this marker is encountered at pageserver startup,
114 : /// the timeline directory and the marker file are both removed.
115 : /// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
116 : pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
117 :
118 : pub const TIMELINE_DELETE_MARK_SUFFIX: &str = "___delete";
119 :
120 : /// A marker file to prevent pageserver from loading a certain tenant on restart.
121 : /// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding
122 : /// `ignore` management API command, that expects the ignored tenant to be properly loaded
123 : /// into pageserver's memory before being ignored.
124 : /// Full path: `tenants/<tenant_id>/___ignored_tenant`.
125 : pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant";
126 :
127 1302 : pub fn is_temporary(path: &Path) -> bool {
128 1302 : match path.file_name() {
129 1302 : Some(name) => name.to_string_lossy().ends_with(TEMP_FILE_SUFFIX),
130 0 : None => false,
131 : }
132 1302 : }
133 :
134 675 : fn ends_with_suffix(path: &Path, suffix: &str) -> bool {
135 675 : match path.file_name() {
136 675 : Some(name) => name.to_string_lossy().ends_with(suffix),
137 0 : None => false,
138 : }
139 675 : }
140 :
141 338 : pub fn is_uninit_mark(path: &Path) -> bool {
142 338 : ends_with_suffix(path, TIMELINE_UNINIT_MARK_SUFFIX)
143 338 : }
144 :
145 337 : pub fn is_delete_mark(path: &Path) -> bool {
146 337 : ends_with_suffix(path, TIMELINE_DELETE_MARK_SUFFIX)
147 337 : }
148 :
149 : fn is_walkdir_io_not_found(e: &walkdir::Error) -> bool {
150 0 : if let Some(e) = e.io_error() {
151 0 : if e.kind() == std::io::ErrorKind::NotFound {
152 0 : return true;
153 0 : }
154 0 : }
155 0 : false
156 0 : }
157 :
158 : /// During pageserver startup, we need to order operations not to exhaust tokio worker threads by
159 : /// blocking.
160 : ///
161 : /// The instances of this value exist only during startup, otherwise `None` is provided, meaning no
162 : /// delaying is needed.
163 215 : #[derive(Clone)]
164 : pub struct InitializationOrder {
165 : /// Each initial tenant load task carries this until completion.
166 : pub initial_tenant_load: Option<utils::completion::Completion>,
167 :
168 : /// Barrier for when we can start initial logical size calculations.
169 : pub initial_logical_size_can_start: utils::completion::Barrier,
170 :
171 : /// Each timeline owns a clone of this to be consumed on the initial logical size calculation
172 : /// attempt. It is important to drop this once the attempt has completed.
173 : pub initial_logical_size_attempt: Option<utils::completion::Completion>,
174 :
175 : /// Barrier for when we can start any background jobs.
176 : ///
177 : /// This can be broken up later on, but right now there is just one class of a background job.
178 : pub background_jobs_can_start: utils::completion::Barrier,
179 : }
180 :
181 : /// Time the future with a warning when it exceeds a threshold.
182 742 : async fn timed<Fut: std::future::Future>(
183 742 : fut: Fut,
184 742 : name: &str,
185 742 : warn_at: std::time::Duration,
186 742 : ) -> <Fut as std::future::Future>::Output {
187 742 : let started = std::time::Instant::now();
188 742 :
189 742 : let mut fut = std::pin::pin!(fut);
190 742 :
191 742 : match tokio::time::timeout(warn_at, &mut fut).await {
192 741 : Ok(ret) => {
193 741 : tracing::info!(
194 741 : task = name,
195 741 : elapsed_ms = started.elapsed().as_millis(),
196 741 : "completed"
197 741 : );
198 741 : ret
199 : }
200 : Err(_) => {
201 1 : tracing::info!(
202 1 : task = name,
203 1 : elapsed_ms = started.elapsed().as_millis(),
204 1 : "still waiting, taking longer than expected..."
205 1 : );
206 :
207 1 : let ret = fut.await;
208 :
209 : // this has a global allowed_errors
210 1 : tracing::warn!(
211 1 : task = name,
212 1 : elapsed_ms = started.elapsed().as_millis(),
213 1 : "completed, took longer than expected"
214 1 : );
215 :
216 1 : ret
217 : }
218 : }
219 742 : }
220 :
221 : #[cfg(test)]
222 : mod timed_tests {
223 : use super::timed;
224 : use std::time::Duration;
225 :
226 1 : #[tokio::test]
227 1 : async fn timed_completes_when_inner_future_completes() {
228 : // A future that completes on time should have its result returned
229 1 : let r1 = timed(
230 1 : async move {
231 1 : tokio::time::sleep(Duration::from_millis(10)).await;
232 1 : 123
233 1 : },
234 1 : "test 1",
235 1 : Duration::from_millis(50),
236 1 : )
237 1 : .await;
238 1 : assert_eq!(r1, 123);
239 :
240 : // A future that completes too slowly should also have its result returned
241 1 : let r1 = timed(
242 1 : async move {
243 3 : tokio::time::sleep(Duration::from_millis(50)).await;
244 1 : 456
245 1 : },
246 1 : "test 1",
247 1 : Duration::from_millis(10),
248 1 : )
249 2 : .await;
250 1 : assert_eq!(r1, 456);
251 : }
252 : }
|