From 1fc59674f54eb90f73d0de2b5d68c71c65d49e27 Mon Sep 17 00:00:00 2001 From: Silas Brack Date: Sun, 8 Mar 2026 13:08:49 +0100 Subject: [PATCH] Allow for reads if one volume is down --- Cargo.lock | 39 +++++++++++++++++++++++++++++++++++---- Cargo.toml | 1 + src/error.rs | 3 +++ src/server.rs | 41 +++++++++++++++++++++++++++++++---------- 4 files changed, 70 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73e1f42..d697dbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -774,6 +774,7 @@ version = "0.1.0" dependencies = [ "axum", "clap", + "rand 0.8.5", "reqwest", "rusqlite", "serde", @@ -908,7 +909,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash", "rustls", @@ -949,14 +950,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -966,7 +988,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b074b32..4498eb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ clap = { version = "4", features = ["derive", "env"] } tracing = "0.1" tracing-subscriber = "0.3" sha2 = "0.10" +rand = "0.8" [profile.release] opt-level = 3 diff --git a/src/error.rs b/src/error.rs index 32e25a4..7536cec 100644 --- a/src/error.rs +++ b/src/error.rs @@ -35,6 +35,7 @@ pub enum AppError { Db(rusqlite::Error), InsufficientVolumes { need: usize, have: usize }, PartialWrite, + AllVolumesUnreachable, } impl From for AppError { @@ -58,6 +59,7 @@ impl std::fmt::Display for AppError { write!(f, "need {need} volumes but only {have} available") } AppError::PartialWrite => write!(f, "not all volume writes succeeded"), + AppError::AllVolumesUnreachable => write!(f, "all volume replicas are unreachable"), } } } @@ -70,6 +72,7 @@ impl IntoResponse for AppError { AppError::Db(_) => StatusCode::INTERNAL_SERVER_ERROR, AppError::InsufficientVolumes { .. } => StatusCode::SERVICE_UNAVAILABLE, AppError::PartialWrite => StatusCode::BAD_GATEWAY, + AppError::AllVolumesUnreachable => StatusCode::BAD_GATEWAY, }; (status, self.to_string()).into_response() } diff --git a/src/server.rs b/src/server.rs index a4b24d4..ea27108 100644 --- a/src/server.rs +++ b/src/server.rs @@ -19,17 +19,38 @@ pub async fn get_key( State(state): State, Path(key): Path, ) -> Result { + use rand::seq::SliceRandom; + let record = state.db.get(&key).await?; - let vol = record - .volumes - .first() - .ok_or_else(|| AppError::CorruptRecord { key: key.clone() })?; - let location = format!("{vol}/{key}"); - Ok(( - StatusCode::FOUND, - [(axum::http::header::LOCATION, location)], - ) - .into_response()) + if record.volumes.is_empty() { + return Err(AppError::CorruptRecord { key }); + } + + // Shuffle volumes for load balancing + let mut volumes = record.volumes.clone(); + volumes.shuffle(&mut rand::thread_rng()); + + // Probe each volume until we find one that's reachable + for vol in &volumes { + let url = format!("{vol}/{key}"); + match state.http.head(&url).send().await { + Ok(resp) if resp.status().is_success() => { + return Ok(( + StatusCode::FOUND, + [(axum::http::header::LOCATION, url)], + ) + .into_response()); + } + Ok(resp) => { + tracing::warn!("volume {vol} returned {} for {key}", resp.status()); + } + Err(e) => { + tracing::warn!("volume {vol} unreachable for {key}: {e}"); + } + } + } + + Err(AppError::AllVolumesUnreachable) } pub async fn put_key(