Line data Source code
1 : #![deny(clippy::undocumented_unsafe_blocks)]
2 :
3 : extern crate hyper0 as hyper;
4 :
5 : use std::time::Duration;
6 :
7 : use camino::Utf8PathBuf;
8 : use once_cell::sync::Lazy;
9 : use pem::Pem;
10 : use remote_storage::RemoteStorageConfig;
11 : use storage_broker::Uri;
12 : use tokio::runtime::Runtime;
13 : use utils::auth::SwappableJwtAuth;
14 : use utils::id::NodeId;
15 : use utils::logging::SecretString;
16 :
17 : mod auth;
18 : pub mod broker;
19 : pub mod control_file;
20 : pub mod control_file_upgrade;
21 : pub mod copy_timeline;
22 : pub mod debug_dump;
23 : pub mod handler;
24 : pub mod http;
25 : pub mod metrics;
26 : pub mod patch_control_file;
27 : pub mod pull_timeline;
28 : pub mod rate_limit;
29 : pub mod receive_wal;
30 : pub mod recovery;
31 : pub mod remove_wal;
32 : pub mod safekeeper;
33 : pub mod send_interpreted_wal;
34 : pub mod send_wal;
35 : pub mod state;
36 : pub mod timeline;
37 : pub mod timeline_eviction;
38 : pub mod timeline_guard;
39 : pub mod timeline_manager;
40 : pub mod timelines_set;
41 : pub mod wal_backup;
42 : pub mod wal_backup_partial;
43 : pub mod wal_reader_stream;
44 : pub mod wal_service;
45 : pub mod wal_storage;
46 :
47 : #[cfg(any(test, feature = "benchmarking"))]
48 : pub mod test_utils;
49 :
50 : mod timelines_global_map;
51 : use std::sync::Arc;
52 :
53 : pub use timelines_global_map::GlobalTimelines;
54 : use utils::auth::JwtAuth;
55 :
56 : pub mod defaults {
57 : pub use safekeeper_api::{
58 : DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
59 : DEFAULT_PG_LISTEN_PORT,
60 : };
61 :
62 : pub const DEFAULT_HEARTBEAT_TIMEOUT: &str = "5000ms";
63 : pub const DEFAULT_MAX_OFFLOADER_LAG_BYTES: u64 = 128 * (1 << 20);
64 : /* BEGIN_HADRON */
65 : // Default leader re-elect is 0(disabled). SK will re-elect leader if the current leader is lagging this many bytes.
66 : pub const DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES: u64 = 0;
67 : // Default disk usage limit is 0 (disabled). It means each timeline by default can use up to this many WAL
68 : // disk space on this SK until SK begins to reject WALs.
69 : pub const DEFAULT_MAX_TIMELINE_DISK_USAGE_BYTES: u64 = 0;
70 : /* END_HADRON */
71 : pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m";
72 : pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s";
73 : pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5";
74 : pub const DEFAULT_EVICTION_CONCURRENCY: usize = 2;
75 :
76 : // By default, our required residency before eviction is the same as the period that passes
77 : // before uploading a partial segment, so that in normal operation the eviction can happen
78 : // as soon as we have done the partial segment upload.
79 : pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT;
80 :
81 : pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
82 : pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
83 : pub const DEFAULT_SSL_CERT_RELOAD_PERIOD: &str = "60s";
84 : }
85 :
86 : #[derive(Debug, Clone)]
87 : pub struct SafeKeeperConf {
88 : // Repository directory, relative to current working directory.
89 : // Normally, the safekeeper changes the current working directory
90 : // to the repository, and 'workdir' is always '.'. But we don't do
91 : // that during unit testing, because the current directory is global
92 : // to the process but different unit tests work on different
93 : // data directories to avoid clashing with each other.
94 : pub workdir: Utf8PathBuf,
95 : pub my_id: NodeId,
96 : pub listen_pg_addr: String,
97 : pub listen_pg_addr_tenant_only: Option<String>,
98 : pub listen_http_addr: String,
99 : pub listen_https_addr: Option<String>,
100 : pub advertise_pg_addr: Option<String>,
101 : pub availability_zone: Option<String>,
102 : pub no_sync: bool,
103 : pub broker_endpoint: Uri,
104 : pub broker_keepalive_interval: Duration,
105 : pub heartbeat_timeout: Duration,
106 : pub peer_recovery_enabled: bool,
107 : pub remote_storage: Option<RemoteStorageConfig>,
108 : pub max_offloader_lag_bytes: u64,
109 : /* BEGIN_HADRON */
110 : pub max_reelect_offloader_lag_bytes: u64,
111 : pub max_timeline_disk_usage_bytes: u64,
112 : /* END_HADRON */
113 : pub backup_parallel_jobs: usize,
114 : pub wal_backup_enabled: bool,
115 : pub pg_auth: Option<Arc<JwtAuth>>,
116 : pub pg_tenant_only_auth: Option<Arc<JwtAuth>>,
117 : pub http_auth: Option<Arc<SwappableJwtAuth>>,
118 : /// JWT token to connect to other safekeepers with.
119 : pub sk_auth_token: Option<SecretString>,
120 : pub current_thread_runtime: bool,
121 : pub walsenders_keep_horizon: bool,
122 : pub partial_backup_timeout: Duration,
123 : pub disable_periodic_broker_push: bool,
124 : pub enable_offload: bool,
125 : pub delete_offloaded_wal: bool,
126 : pub control_file_save_interval: Duration,
127 : pub partial_backup_concurrency: usize,
128 : pub eviction_min_resident: Duration,
129 : pub wal_reader_fanout: bool,
130 : pub max_delta_for_fanout: Option<u64>,
131 : pub ssl_key_file: Utf8PathBuf,
132 : pub ssl_cert_file: Utf8PathBuf,
133 : pub ssl_cert_reload_period: Duration,
134 : pub ssl_ca_certs: Vec<Pem>,
135 : pub use_https_safekeeper_api: bool,
136 : pub enable_tls_wal_service_api: bool,
137 : pub force_metric_collection_on_scrape: bool,
138 : }
139 :
140 : impl SafeKeeperConf {
141 10 : pub fn dummy() -> Self {
142 10 : SafeKeeperConf {
143 10 : workdir: Utf8PathBuf::from("./"),
144 10 : no_sync: false,
145 10 : listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
146 10 : listen_pg_addr_tenant_only: None,
147 10 : listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
148 10 : listen_https_addr: None,
149 10 : advertise_pg_addr: None,
150 10 : availability_zone: None,
151 10 : remote_storage: None,
152 10 : my_id: NodeId(0),
153 10 : broker_endpoint: storage_broker::DEFAULT_ENDPOINT
154 10 : .parse()
155 10 : .expect("failed to parse default broker endpoint"),
156 10 : broker_keepalive_interval: Duration::from_secs(5),
157 10 : peer_recovery_enabled: true,
158 10 : wal_backup_enabled: true,
159 10 : backup_parallel_jobs: 1,
160 10 : pg_auth: None,
161 10 : pg_tenant_only_auth: None,
162 10 : http_auth: None,
163 10 : sk_auth_token: None,
164 10 : heartbeat_timeout: Duration::new(5, 0),
165 10 : max_offloader_lag_bytes: defaults::DEFAULT_MAX_OFFLOADER_LAG_BYTES,
166 10 : /* BEGIN_HADRON */
167 10 : max_reelect_offloader_lag_bytes: defaults::DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES,
168 10 : max_timeline_disk_usage_bytes: defaults::DEFAULT_MAX_TIMELINE_DISK_USAGE_BYTES,
169 10 : /* END_HADRON */
170 10 : current_thread_runtime: false,
171 10 : walsenders_keep_horizon: false,
172 10 : partial_backup_timeout: Duration::from_secs(0),
173 10 : disable_periodic_broker_push: false,
174 10 : enable_offload: false,
175 10 : delete_offloaded_wal: false,
176 10 : control_file_save_interval: Duration::from_secs(1),
177 10 : partial_backup_concurrency: 1,
178 10 : eviction_min_resident: Duration::ZERO,
179 10 : wal_reader_fanout: false,
180 10 : max_delta_for_fanout: None,
181 10 : ssl_key_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_KEY_FILE),
182 10 : ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE),
183 10 : ssl_cert_reload_period: Duration::from_secs(60),
184 10 : ssl_ca_certs: Vec::new(),
185 10 : use_https_safekeeper_api: false,
186 10 : enable_tls_wal_service_api: false,
187 10 : force_metric_collection_on_scrape: true,
188 10 : }
189 10 : }
190 : }
191 :
192 : // Tokio runtimes.
193 0 : pub static WAL_SERVICE_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
194 0 : tokio::runtime::Builder::new_multi_thread()
195 0 : .thread_name("WAL service worker")
196 0 : .enable_all()
197 0 : .build()
198 0 : .expect("Failed to create WAL service runtime")
199 0 : });
200 :
201 0 : pub static HTTP_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
202 0 : tokio::runtime::Builder::new_multi_thread()
203 0 : .thread_name("HTTP worker")
204 0 : .enable_all()
205 0 : .build()
206 0 : .expect("Failed to create HTTP runtime")
207 0 : });
208 :
209 0 : pub static BROKER_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
210 0 : tokio::runtime::Builder::new_multi_thread()
211 0 : .thread_name("broker worker")
212 0 : .worker_threads(2) // there are only 2 tasks, having more threads doesn't make sense
213 0 : .enable_all()
214 0 : .build()
215 0 : .expect("Failed to create broker runtime")
216 0 : });
217 :
218 0 : pub static WAL_BACKUP_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
219 0 : tokio::runtime::Builder::new_multi_thread()
220 0 : .thread_name("WAL backup worker")
221 0 : .enable_all()
222 0 : .build()
223 0 : .expect("Failed to create WAL backup runtime")
224 0 : });
225 :
226 0 : pub static BACKGROUND_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
227 0 : tokio::runtime::Builder::new_multi_thread()
228 0 : .thread_name("background worker")
229 0 : .worker_threads(1) // there is only one task now (ssl certificate reloading), having more threads doesn't make sense
230 0 : .enable_all()
231 0 : .build()
232 0 : .expect("Failed to create background runtime")
233 0 : });
|