Line data Source code
1 : use crate::config::RetryConfig;
2 : use crate::console::messages::{ConsoleError, Reason};
3 : use crate::console::{errors::WakeComputeError, provider::CachedNodeInfo};
4 : use crate::context::RequestMonitoring;
5 : use crate::metrics::{
6 : ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
7 : WakeupFailureKind,
8 : };
9 : use crate::proxy::retry::{retry_after, should_retry};
10 : use hyper1::StatusCode;
11 : use tracing::{error, info, warn};
12 :
13 : use super::connect_compute::ComputeConnectBackend;
14 :
15 22 : pub async fn wake_compute<B: ComputeConnectBackend>(
16 22 : num_retries: &mut u32,
17 22 : ctx: &mut RequestMonitoring,
18 22 : api: &B,
19 22 : config: RetryConfig,
20 22 : ) -> Result<CachedNodeInfo, WakeComputeError> {
21 22 : let retry_type = RetryType::WakeCompute;
22 : loop {
23 26 : match api.wake_compute(ctx).await {
24 6 : Err(e) if !should_retry(&e, *num_retries, config) => {
25 2 : error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node");
26 2 : report_error(&e, false);
27 2 : Metrics::get().proxy.retries_metric.observe(
28 2 : RetriesMetricGroup {
29 2 : outcome: ConnectOutcome::Failed,
30 2 : retry_type,
31 2 : },
32 2 : (*num_retries).into(),
33 2 : );
34 2 : return Err(e);
35 : }
36 4 : Err(e) => {
37 4 : warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node");
38 4 : report_error(&e, true);
39 : }
40 20 : Ok(n) => {
41 20 : Metrics::get().proxy.retries_metric.observe(
42 20 : RetriesMetricGroup {
43 20 : outcome: ConnectOutcome::Success,
44 20 : retry_type,
45 20 : },
46 20 : (*num_retries).into(),
47 20 : );
48 20 : info!(?num_retries, "compute node woken up after");
49 20 : return Ok(n);
50 : }
51 : }
52 :
53 4 : let wait_duration = retry_after(*num_retries, config);
54 4 : *num_retries += 1;
55 4 : let pause = ctx
56 4 : .latency_timer
57 4 : .pause(crate::metrics::Waiting::RetryTimeout);
58 4 : tokio::time::sleep(wait_duration).await;
59 4 : drop(pause);
60 : }
61 22 : }
62 :
63 6 : fn report_error(e: &WakeComputeError, retry: bool) {
64 : use crate::console::errors::ApiError;
65 6 : let kind = match e {
66 0 : WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
67 0 : WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
68 6 : WakeComputeError::ApiError(ApiError::Console(e)) => match e.get_reason() {
69 0 : Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest,
70 0 : Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest,
71 0 : Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest,
72 0 : Reason::EndpointNotFound => WakeupFailureKind::ApiConsoleBadRequest,
73 0 : Reason::BranchNotFound => WakeupFailureKind::ApiConsoleBadRequest,
74 0 : Reason::RateLimitExceeded => WakeupFailureKind::ApiConsoleLocked,
75 0 : Reason::NonDefaultBranchComputeTimeExceeded => WakeupFailureKind::QuotaExceeded,
76 0 : Reason::ActiveTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
77 0 : Reason::ComputeTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
78 0 : Reason::WrittenDataQuotaExceeded => WakeupFailureKind::QuotaExceeded,
79 0 : Reason::DataTransferQuotaExceeded => WakeupFailureKind::QuotaExceeded,
80 0 : Reason::LogicalSizeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
81 0 : Reason::ConcurrencyLimitReached => WakeupFailureKind::ApiConsoleLocked,
82 0 : Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked,
83 0 : Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked,
84 0 : Reason::Unknown => match e {
85 0 : ConsoleError {
86 0 : http_status_code: StatusCode::LOCKED,
87 0 : ref error,
88 0 : ..
89 0 : } if error.contains("written data quota exceeded")
90 0 : || error.contains("the limit for current plan reached") =>
91 0 : {
92 0 : WakeupFailureKind::QuotaExceeded
93 : }
94 : ConsoleError {
95 : http_status_code: StatusCode::UNPROCESSABLE_ENTITY,
96 0 : ref error,
97 0 : ..
98 0 : } if error.contains("compute time quota of non-primary branches is exceeded") => {
99 0 : WakeupFailureKind::QuotaExceeded
100 : }
101 : ConsoleError {
102 : http_status_code: StatusCode::LOCKED,
103 : ..
104 0 : } => WakeupFailureKind::ApiConsoleLocked,
105 : ConsoleError {
106 : http_status_code: StatusCode::BAD_REQUEST,
107 : ..
108 6 : } => WakeupFailureKind::ApiConsoleBadRequest,
109 : ConsoleError {
110 0 : http_status_code, ..
111 0 : } if http_status_code.is_server_error() => {
112 0 : WakeupFailureKind::ApiConsoleOtherServerError
113 : }
114 0 : ConsoleError { .. } => WakeupFailureKind::ApiConsoleOtherError,
115 : },
116 : },
117 0 : WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,
118 0 : WakeComputeError::TooManyConnectionAttempts(_) => WakeupFailureKind::TimeoutError,
119 : };
120 6 : Metrics::get()
121 6 : .proxy
122 6 : .connection_failures_breakdown
123 6 : .inc(ConnectionFailuresBreakdownGroup {
124 6 : kind,
125 6 : retry: retry.into(),
126 6 : });
127 6 : }
|