From 0c7e217135a68b5e827e96c91b2d55e43ff252d4 Mon Sep 17 00:00:00 2001 From: Silas Brack Date: Sat, 7 Mar 2026 16:12:29 +0100 Subject: [PATCH] Make error paths explicit --- README.md | 20 ++++++++++++++++++++ src/error.rs | 5 +++++ src/rebalance.rs | 16 ++++++++++++---- src/server.rs | 31 +++++++++++++++++++++++++------ 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 956bf74..ac77fe3 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,26 @@ Tested on the same machine with shared nginx volumes: Both are bottlenecked by nginx volume I/O. The index layer (SQLite) can sustain 378,000 writes/sec in isolation. +## Error responses + +Every error returns a plain-text body with a human-readable message. + +| Status | Error | When | +|--------|-------|------| +| `404 Not Found` | `not found` | GET, HEAD, DELETE for a key that doesn't exist | +| `500 Internal Server Error` | `corrupt record for key {key}: no volumes` | Key exists in index but has no volume locations (data integrity issue) | +| `500 Internal Server Error` | `database error: {detail}` | SQLite failure (disk full, corruption, locked) | +| `502 Bad Gateway` | `not all volume writes succeeded` | PUT where one or more volume writes failed; all volumes are rolled back | +| `503 Service Unavailable` | `need {n} volumes but only {m} available` | PUT when fewer volumes are configured than the replication factor requires | + +### Failure modes + +**PUT** writes to all target volumes concurrently, then updates the index. If any volume write fails, all volumes are rolled back (best-effort) and the client gets 502. If volume writes succeed but the index update fails, volumes are rolled back and the client gets 500. + +**DELETE** removes the key from the index and issues best-effort deletes to all volumes. Volume delete failures are logged but do not fail the request — the client always gets 204 if the key existed. This can leave orphaned blobs on volumes; use `rebuild` to reconcile. + +**GET** looks up the key in the index and returns a 302 redirect to the first volume. If the volume is unreachable, the client sees the failure directly from nginx (the index server does not proxy the blob). + ## Security mkv assumes a **trusted network**. There is no built-in authentication, authorization, or encryption. This is the same security model as minikeyvalue — neither system is designed for direct exposure to the public internet. diff --git a/src/error.rs b/src/error.rs index 61eacc2..d81de8d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -25,6 +25,7 @@ impl std::fmt::Display for VolumeError { #[derive(Debug)] pub enum AppError { NotFound, + CorruptRecord { key: String }, Db(rusqlite::Error), InsufficientVolumes { need: usize, have: usize }, PartialWrite, @@ -43,6 +44,9 @@ impl std::fmt::Display for AppError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { AppError::NotFound => write!(f, "not found"), + AppError::CorruptRecord { key } => { + write!(f, "corrupt record for key {key}: no volumes") + } AppError::Db(e) => write!(f, "database error: {e}"), AppError::InsufficientVolumes { need, have } => { write!(f, "need {need} volumes but only {have} available") @@ -56,6 +60,7 @@ impl IntoResponse for AppError { fn into_response(self) -> Response { let status = match &self { AppError::NotFound => StatusCode::NOT_FOUND, + AppError::CorruptRecord { .. } => StatusCode::INTERNAL_SERVER_ERROR, AppError::Db(_) => StatusCode::INTERNAL_SERVER_ERROR, AppError::InsufficientVolumes { .. } => StatusCode::SERVICE_UNAVAILABLE, AppError::PartialWrite => StatusCode::BAD_GATEWAY, diff --git a/src/rebalance.rs b/src/rebalance.rs index a44d07c..e63ec12 100644 --- a/src/rebalance.rs +++ b/src/rebalance.rs @@ -77,10 +77,18 @@ pub async fn run(args: &Args, dry_run: bool) { } }; let dst_url = format!("{dst}/{}", m.key); - if let Err(e) = client.put(&dst_url).body(data).send().await { - eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e); - copy_ok = false; - errors += 1; + match client.put(&dst_url).body(data).send().await { + Ok(resp) if !resp.status().is_success() => { + eprintln!(" ERROR copy {} to {}: status {}", m.key, dst, resp.status()); + copy_ok = false; + errors += 1; + } + Err(e) => { + eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e); + copy_ok = false; + errors += 1; + } + Ok(_) => {} } } Ok(resp) => { diff --git a/src/server.rs b/src/server.rs index cff3ccf..d2e1845 100644 --- a/src/server.rs +++ b/src/server.rs @@ -20,7 +20,10 @@ pub async fn get_key( Path(key): Path, ) -> Result { let record = state.db.get(&key).await?; - let vol = record.volumes.first().ok_or(AppError::NotFound)?; + let vol = record + .volumes + .first() + .ok_or_else(|| AppError::CorruptRecord { key: key.clone() })?; let location = format!("{vol}/{key}"); Ok((StatusCode::FOUND, [(axum::http::header::LOCATION, location)]).into_response()) } @@ -82,7 +85,12 @@ pub async fn put_key( } let size = Some(body.len() as i64); - state.db.put(key, target_volumes, size).await?; + if let Err(e) = state.db.put(key.clone(), target_volumes.clone(), size).await { + for vol in &target_volumes { + let _ = state.http.delete(format!("{vol}/{key}")).send().await; + } + return Err(e); + } Ok(StatusCode::CREATED.into_response()) } @@ -95,14 +103,25 @@ pub async fn delete_key( let mut handles = Vec::new(); for vol in &record.volumes { let url = format!("{vol}/{key}"); - let client = state.http.clone(); - handles.push(tokio::spawn(async move { client.delete(&url).send().await })); + let handle = tokio::spawn({ + let client = state.http.clone(); + async move { + let resp = client.delete(&url).send().await.map_err(|e| { + VolumeError::Request { url: url.clone(), source: e } + })?; + if !resp.status().is_success() { + return Err(VolumeError::BadStatus { url, status: resp.status() }); + } + Ok(()) + } + }); + handles.push(handle); } for handle in handles { match handle.await { - Ok(Err(e)) => tracing::error!("DELETE from volume failed: {e}"), + Ok(Err(e)) => tracing::error!("{e}"), Err(e) => tracing::error!("volume delete task failed: {e}"), - Ok(Ok(_)) => {} + Ok(Ok(())) => {} } }