Make error paths explicit

This commit is contained in:
Silas Brack 2026-03-07 16:12:29 +01:00
parent a862400f64
commit 0c7e217135
4 changed files with 62 additions and 10 deletions

View file

@ -105,6 +105,26 @@ Tested on the same machine with shared nginx volumes:
Both are bottlenecked by nginx volume I/O. The index layer (SQLite) can sustain 378,000 writes/sec in isolation.
## Error responses
Every error returns a plain-text body with a human-readable message.
| Status | Error | When |
|--------|-------|------|
| `404 Not Found` | `not found` | GET, HEAD, DELETE for a key that doesn't exist |
| `500 Internal Server Error` | `corrupt record for key {key}: no volumes` | Key exists in index but has no volume locations (data integrity issue) |
| `500 Internal Server Error` | `database error: {detail}` | SQLite failure (disk full, corruption, locked) |
| `502 Bad Gateway` | `not all volume writes succeeded` | PUT where one or more volume writes failed; all volumes are rolled back |
| `503 Service Unavailable` | `need {n} volumes but only {m} available` | PUT when fewer volumes are configured than the replication factor requires |
### Failure modes
**PUT** writes to all target volumes concurrently, then updates the index. If any volume write fails, all volumes are rolled back (best-effort) and the client gets 502. If volume writes succeed but the index update fails, volumes are rolled back and the client gets 500.
**DELETE** removes the key from the index and issues best-effort deletes to all volumes. Volume delete failures are logged but do not fail the request — the client always gets 204 if the key existed. This can leave orphaned blobs on volumes; use `rebuild` to reconcile.
**GET** looks up the key in the index and returns a 302 redirect to the first volume. If the volume is unreachable, the client sees the failure directly from nginx (the index server does not proxy the blob).
## Security
mkv assumes a **trusted network**. There is no built-in authentication, authorization, or encryption. This is the same security model as minikeyvalue — neither system is designed for direct exposure to the public internet.

View file

@ -25,6 +25,7 @@ impl std::fmt::Display for VolumeError {
#[derive(Debug)]
pub enum AppError {
NotFound,
CorruptRecord { key: String },
Db(rusqlite::Error),
InsufficientVolumes { need: usize, have: usize },
PartialWrite,
@ -43,6 +44,9 @@ impl std::fmt::Display for AppError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AppError::NotFound => write!(f, "not found"),
AppError::CorruptRecord { key } => {
write!(f, "corrupt record for key {key}: no volumes")
}
AppError::Db(e) => write!(f, "database error: {e}"),
AppError::InsufficientVolumes { need, have } => {
write!(f, "need {need} volumes but only {have} available")
@ -56,6 +60,7 @@ impl IntoResponse for AppError {
fn into_response(self) -> Response {
let status = match &self {
AppError::NotFound => StatusCode::NOT_FOUND,
AppError::CorruptRecord { .. } => StatusCode::INTERNAL_SERVER_ERROR,
AppError::Db(_) => StatusCode::INTERNAL_SERVER_ERROR,
AppError::InsufficientVolumes { .. } => StatusCode::SERVICE_UNAVAILABLE,
AppError::PartialWrite => StatusCode::BAD_GATEWAY,

View file

@ -77,10 +77,18 @@ pub async fn run(args: &Args, dry_run: bool) {
}
};
let dst_url = format!("{dst}/{}", m.key);
if let Err(e) = client.put(&dst_url).body(data).send().await {
eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e);
copy_ok = false;
errors += 1;
match client.put(&dst_url).body(data).send().await {
Ok(resp) if !resp.status().is_success() => {
eprintln!(" ERROR copy {} to {}: status {}", m.key, dst, resp.status());
copy_ok = false;
errors += 1;
}
Err(e) => {
eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e);
copy_ok = false;
errors += 1;
}
Ok(_) => {}
}
}
Ok(resp) => {

View file

@ -20,7 +20,10 @@ pub async fn get_key(
Path(key): Path<String>,
) -> Result<Response, AppError> {
let record = state.db.get(&key).await?;
let vol = record.volumes.first().ok_or(AppError::NotFound)?;
let vol = record
.volumes
.first()
.ok_or_else(|| AppError::CorruptRecord { key: key.clone() })?;
let location = format!("{vol}/{key}");
Ok((StatusCode::FOUND, [(axum::http::header::LOCATION, location)]).into_response())
}
@ -82,7 +85,12 @@ pub async fn put_key(
}
let size = Some(body.len() as i64);
state.db.put(key, target_volumes, size).await?;
if let Err(e) = state.db.put(key.clone(), target_volumes.clone(), size).await {
for vol in &target_volumes {
let _ = state.http.delete(format!("{vol}/{key}")).send().await;
}
return Err(e);
}
Ok(StatusCode::CREATED.into_response())
}
@ -95,14 +103,25 @@ pub async fn delete_key(
let mut handles = Vec::new();
for vol in &record.volumes {
let url = format!("{vol}/{key}");
let client = state.http.clone();
handles.push(tokio::spawn(async move { client.delete(&url).send().await }));
let handle = tokio::spawn({
let client = state.http.clone();
async move {
let resp = client.delete(&url).send().await.map_err(|e| {
VolumeError::Request { url: url.clone(), source: e }
})?;
if !resp.status().is_success() {
return Err(VolumeError::BadStatus { url, status: resp.status() });
}
Ok(())
}
});
handles.push(handle);
}
for handle in handles {
match handle.await {
Ok(Err(e)) => tracing::error!("DELETE from volume failed: {e}"),
Ok(Err(e)) => tracing::error!("{e}"),
Err(e) => tracing::error!("volume delete task failed: {e}"),
Ok(Ok(_)) => {}
Ok(Ok(())) => {}
}
}