Idk broq
This commit is contained in:
parent
dc1f4bd19d
commit
2c66fa50d8
9 changed files with 1125 additions and 960 deletions
382
src/db.rs
382
src/db.rs
|
|
@ -1,182 +1,200 @@
|
|||
use rusqlite::{params, Connection, OpenFlags};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::error::AppError;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Record {
|
||||
pub key: String,
|
||||
pub volumes: Vec<String>,
|
||||
pub size: Option<i64>,
|
||||
}
|
||||
|
||||
fn apply_pragmas(conn: &Connection) {
|
||||
conn.execute_batch(
|
||||
"PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
PRAGMA busy_timeout = 5000;
|
||||
PRAGMA temp_store = memory;
|
||||
PRAGMA cache_size = -64000;
|
||||
PRAGMA mmap_size = 268435456;",
|
||||
)
|
||||
.expect("failed to set pragmas");
|
||||
}
|
||||
|
||||
fn parse_volumes(s: &str) -> Vec<String> {
|
||||
serde_json::from_str(s).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn encode_volumes(v: &[String]) -> String {
|
||||
serde_json::to_string(v).unwrap()
|
||||
}
|
||||
|
||||
/// A single SQLite connection behind a mutex, used for both reads and writes.
|
||||
#[derive(Clone)]
|
||||
pub struct Db {
|
||||
conn: Arc<Mutex<Connection>>,
|
||||
}
|
||||
|
||||
impl Db {
|
||||
pub fn new(path: &str) -> Self {
|
||||
let conn = Connection::open_with_flags(
|
||||
path,
|
||||
OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX
|
||||
| OpenFlags::SQLITE_OPEN_URI,
|
||||
)
|
||||
.expect("failed to open database");
|
||||
apply_pragmas(&conn);
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS kv (
|
||||
key TEXT PRIMARY KEY,
|
||||
volumes TEXT NOT NULL,
|
||||
size INTEGER,
|
||||
created_at INTEGER DEFAULT (unixepoch())
|
||||
);",
|
||||
)
|
||||
.expect("failed to create tables");
|
||||
Self { conn: Arc::new(Mutex::new(conn)) }
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: &str) -> Result<Record, AppError> {
|
||||
let conn = self.conn.clone();
|
||||
let key = key.to_string();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let mut stmt = conn.prepare_cached("SELECT key, volumes, size FROM kv WHERE key = ?1")?;
|
||||
Ok(stmt.query_row(params![key], |row| {
|
||||
let vj: String = row.get(1)?;
|
||||
Ok(Record { key: row.get(0)?, volumes: parse_volumes(&vj), size: row.get(2)? })
|
||||
})?)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn list_keys(&self, prefix: &str) -> Result<Vec<String>, AppError> {
|
||||
let conn = self.conn.clone();
|
||||
let prefix = prefix.to_string();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
if prefix.is_empty() {
|
||||
let mut stmt = conn.prepare_cached("SELECT key FROM kv ORDER BY key")?;
|
||||
let keys = stmt
|
||||
.query_map([], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?;
|
||||
return Ok(keys);
|
||||
}
|
||||
// Compute exclusive upper bound: increment last non-0xFF byte
|
||||
let upper = {
|
||||
let mut bytes = prefix.as_bytes().to_vec();
|
||||
let mut result = None;
|
||||
while let Some(last) = bytes.pop() {
|
||||
if last < 0xFF {
|
||||
bytes.push(last + 1);
|
||||
result = Some(String::from_utf8_lossy(&bytes).into_owned());
|
||||
break;
|
||||
}
|
||||
}
|
||||
result
|
||||
};
|
||||
let keys = match &upper {
|
||||
Some(end) => {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT key FROM kv WHERE key >= ?1 AND key < ?2 ORDER BY key",
|
||||
)?;
|
||||
stmt.query_map(params![prefix, end], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?
|
||||
}
|
||||
None => {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT key FROM kv WHERE key >= ?1 ORDER BY key",
|
||||
)?;
|
||||
stmt.query_map(params![prefix], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?
|
||||
}
|
||||
};
|
||||
Ok(keys)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn put(&self, key: String, volumes: Vec<String>, size: Option<i64>) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, size) VALUES (?1, ?2, ?3)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, size = ?3",
|
||||
)?
|
||||
.execute(params![key, encode_volumes(&volumes), size])?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn delete(&self, key: String) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.prepare_cached("DELETE FROM kv WHERE key = ?1")?
|
||||
.execute(params![key])?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn bulk_put(&self, records: Vec<(String, Vec<String>, Option<i64>)>) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.execute_batch("BEGIN")?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, size) VALUES (?1, ?2, ?3)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, size = ?3",
|
||||
)?;
|
||||
for (key, volumes, size) in &records {
|
||||
stmt.execute(params![key, encode_volumes(volumes), size])?;
|
||||
}
|
||||
drop(stmt);
|
||||
conn.execute_batch("COMMIT")?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn all_records_sync(&self) -> Result<Vec<Record>, AppError> {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
let mut stmt = conn.prepare_cached("SELECT key, volumes, size FROM kv")?;
|
||||
let records = stmt
|
||||
.query_map([], |row| {
|
||||
let vj: String = row.get(1)?;
|
||||
Ok(Record { key: row.get(0)?, volumes: parse_volumes(&vj), size: row.get(2)? })
|
||||
})?
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(records)
|
||||
}
|
||||
}
|
||||
use rusqlite::{Connection, OpenFlags, params};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::error::AppError;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Record {
|
||||
pub key: String,
|
||||
pub volumes: Vec<String>,
|
||||
pub size: Option<i64>,
|
||||
}
|
||||
|
||||
fn apply_pragmas(conn: &Connection) {
|
||||
conn.execute_batch(
|
||||
"PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
PRAGMA busy_timeout = 5000;
|
||||
PRAGMA temp_store = memory;
|
||||
PRAGMA cache_size = -64000;
|
||||
PRAGMA mmap_size = 268435456;",
|
||||
)
|
||||
.expect("failed to set pragmas");
|
||||
}
|
||||
|
||||
fn parse_volumes(s: &str) -> Vec<String> {
|
||||
serde_json::from_str(s).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn encode_volumes(v: &[String]) -> String {
|
||||
serde_json::to_string(v).unwrap()
|
||||
}
|
||||
|
||||
/// A single SQLite connection behind a mutex, used for both reads and writes.
|
||||
#[derive(Clone)]
|
||||
pub struct Db {
|
||||
conn: Arc<Mutex<Connection>>,
|
||||
}
|
||||
|
||||
impl Db {
|
||||
pub fn new(path: &str) -> Self {
|
||||
let conn = Connection::open_with_flags(
|
||||
path,
|
||||
OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX
|
||||
| OpenFlags::SQLITE_OPEN_URI,
|
||||
)
|
||||
.expect("failed to open database");
|
||||
apply_pragmas(&conn);
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS kv (
|
||||
key TEXT PRIMARY KEY,
|
||||
volumes TEXT NOT NULL,
|
||||
size INTEGER,
|
||||
created_at INTEGER DEFAULT (unixepoch())
|
||||
);",
|
||||
)
|
||||
.expect("failed to create tables");
|
||||
Self {
|
||||
conn: Arc::new(Mutex::new(conn)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: &str) -> Result<Record, AppError> {
|
||||
let conn = self.conn.clone();
|
||||
let key = key.to_string();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT key, volumes, size FROM kv WHERE key = ?1")?;
|
||||
Ok(stmt.query_row(params![key], |row| {
|
||||
let vj: String = row.get(1)?;
|
||||
Ok(Record {
|
||||
key: row.get(0)?,
|
||||
volumes: parse_volumes(&vj),
|
||||
size: row.get(2)?,
|
||||
})
|
||||
})?)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn list_keys(&self, prefix: &str) -> Result<Vec<String>, AppError> {
|
||||
let conn = self.conn.clone();
|
||||
let prefix = prefix.to_string();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
if prefix.is_empty() {
|
||||
let mut stmt = conn.prepare_cached("SELECT key FROM kv ORDER BY key")?;
|
||||
let keys = stmt
|
||||
.query_map([], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?;
|
||||
return Ok(keys);
|
||||
}
|
||||
// Compute exclusive upper bound: increment last non-0xFF byte
|
||||
let upper = {
|
||||
let mut bytes = prefix.as_bytes().to_vec();
|
||||
let mut result = None;
|
||||
while let Some(last) = bytes.pop() {
|
||||
if last < 0xFF {
|
||||
bytes.push(last + 1);
|
||||
result = Some(String::from_utf8_lossy(&bytes).into_owned());
|
||||
break;
|
||||
}
|
||||
}
|
||||
result
|
||||
};
|
||||
let keys = match &upper {
|
||||
Some(end) => {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT key FROM kv WHERE key >= ?1 AND key < ?2 ORDER BY key",
|
||||
)?;
|
||||
stmt.query_map(params![prefix, end], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?
|
||||
}
|
||||
None => {
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT key FROM kv WHERE key >= ?1 ORDER BY key")?;
|
||||
stmt.query_map(params![prefix], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?
|
||||
}
|
||||
};
|
||||
Ok(keys)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn put(
|
||||
&self,
|
||||
key: String,
|
||||
volumes: Vec<String>,
|
||||
size: Option<i64>,
|
||||
) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, size) VALUES (?1, ?2, ?3)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, size = ?3",
|
||||
)?
|
||||
.execute(params![key, encode_volumes(&volumes), size])?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn delete(&self, key: String) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.prepare_cached("DELETE FROM kv WHERE key = ?1")?
|
||||
.execute(params![key])?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn bulk_put(
|
||||
&self,
|
||||
records: Vec<(String, Vec<String>, Option<i64>)>,
|
||||
) -> Result<(), AppError> {
|
||||
let conn = self.conn.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
conn.execute_batch("BEGIN")?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, size) VALUES (?1, ?2, ?3)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, size = ?3",
|
||||
)?;
|
||||
for (key, volumes, size) in &records {
|
||||
stmt.execute(params![key, encode_volumes(volumes), size])?;
|
||||
}
|
||||
drop(stmt);
|
||||
conn.execute_batch("COMMIT")?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn all_records_sync(&self) -> Result<Vec<Record>, AppError> {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
let mut stmt = conn.prepare_cached("SELECT key, volumes, size FROM kv")?;
|
||||
let records = stmt
|
||||
.query_map([], |row| {
|
||||
let vj: String = row.get(1)?;
|
||||
Ok(Record {
|
||||
key: row.get(0)?,
|
||||
volumes: parse_volumes(&vj),
|
||||
size: row.get(2)?,
|
||||
})
|
||||
})?
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(records)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
146
src/error.rs
146
src/error.rs
|
|
@ -1,70 +1,76 @@
|
|||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
|
||||
/// Errors from individual volume HTTP requests — used for logging, not HTTP responses.
|
||||
#[derive(Debug)]
|
||||
pub enum VolumeError {
|
||||
Request { url: String, source: reqwest::Error },
|
||||
BadStatus { url: String, status: reqwest::StatusCode },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for VolumeError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
VolumeError::Request { url, source } => {
|
||||
write!(f, "volume request to {url} failed: {source}")
|
||||
}
|
||||
VolumeError::BadStatus { url, status } => {
|
||||
write!(f, "volume {url} returned status {status}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Application-level errors that map to HTTP responses.
|
||||
#[derive(Debug)]
|
||||
pub enum AppError {
|
||||
NotFound,
|
||||
CorruptRecord { key: String },
|
||||
Db(rusqlite::Error),
|
||||
InsufficientVolumes { need: usize, have: usize },
|
||||
PartialWrite,
|
||||
}
|
||||
|
||||
impl From<rusqlite::Error> for AppError {
|
||||
fn from(e: rusqlite::Error) -> Self {
|
||||
match e {
|
||||
rusqlite::Error::QueryReturnedNoRows => AppError::NotFound,
|
||||
other => AppError::Db(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AppError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
AppError::NotFound => write!(f, "not found"),
|
||||
AppError::CorruptRecord { key } => {
|
||||
write!(f, "corrupt record for key {key}: no volumes")
|
||||
}
|
||||
AppError::Db(e) => write!(f, "database error: {e}"),
|
||||
AppError::InsufficientVolumes { need, have } => {
|
||||
write!(f, "need {need} volumes but only {have} available")
|
||||
}
|
||||
AppError::PartialWrite => write!(f, "not all volume writes succeeded"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
let status = match &self {
|
||||
AppError::NotFound => StatusCode::NOT_FOUND,
|
||||
AppError::CorruptRecord { .. } => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
AppError::Db(_) => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
AppError::InsufficientVolumes { .. } => StatusCode::SERVICE_UNAVAILABLE,
|
||||
AppError::PartialWrite => StatusCode::BAD_GATEWAY,
|
||||
};
|
||||
(status, self.to_string()).into_response()
|
||||
}
|
||||
}
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
|
||||
/// Errors from individual volume HTTP requests — used for logging, not HTTP responses.
|
||||
#[derive(Debug)]
|
||||
pub enum VolumeError {
|
||||
Request {
|
||||
url: String,
|
||||
source: reqwest::Error,
|
||||
},
|
||||
BadStatus {
|
||||
url: String,
|
||||
status: reqwest::StatusCode,
|
||||
},
|
||||
}
|
||||
|
||||
impl std::fmt::Display for VolumeError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
VolumeError::Request { url, source } => {
|
||||
write!(f, "volume request to {url} failed: {source}")
|
||||
}
|
||||
VolumeError::BadStatus { url, status } => {
|
||||
write!(f, "volume {url} returned status {status}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Application-level errors that map to HTTP responses.
|
||||
#[derive(Debug)]
|
||||
pub enum AppError {
|
||||
NotFound,
|
||||
CorruptRecord { key: String },
|
||||
Db(rusqlite::Error),
|
||||
InsufficientVolumes { need: usize, have: usize },
|
||||
PartialWrite,
|
||||
}
|
||||
|
||||
impl From<rusqlite::Error> for AppError {
|
||||
fn from(e: rusqlite::Error) -> Self {
|
||||
match e {
|
||||
rusqlite::Error::QueryReturnedNoRows => AppError::NotFound,
|
||||
other => AppError::Db(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AppError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
AppError::NotFound => write!(f, "not found"),
|
||||
AppError::CorruptRecord { key } => {
|
||||
write!(f, "corrupt record for key {key}: no volumes")
|
||||
}
|
||||
AppError::Db(e) => write!(f, "database error: {e}"),
|
||||
AppError::InsufficientVolumes { need, have } => {
|
||||
write!(f, "need {need} volumes but only {have} available")
|
||||
}
|
||||
AppError::PartialWrite => write!(f, "not all volume writes succeeded"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
let status = match &self {
|
||||
AppError::NotFound => StatusCode::NOT_FOUND,
|
||||
AppError::CorruptRecord { .. } => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
AppError::Db(_) => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
AppError::InsufficientVolumes { .. } => StatusCode::SERVICE_UNAVAILABLE,
|
||||
AppError::PartialWrite => StatusCode::BAD_GATEWAY,
|
||||
};
|
||||
(status, self.to_string()).into_response()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
166
src/hasher.rs
166
src/hasher.rs
|
|
@ -1,81 +1,85 @@
|
|||
use sha2::{Digest, Sha256};
|
||||
|
||||
/// Pick `count` volumes for a key by hashing key+volume, sorting by score.
|
||||
/// Same idea as minikeyvalue's key2volume — stable in volume name, not position.
|
||||
pub fn volumes_for_key(key: &str, volumes: &[String], count: usize) -> Vec<String> {
|
||||
let mut scored: Vec<(u64, &String)> = volumes
|
||||
.iter()
|
||||
.map(|v| {
|
||||
let hash = Sha256::digest(format!("{key}:{v}").as_bytes());
|
||||
let score = u64::from_be_bytes(hash[..8].try_into().unwrap());
|
||||
(score, v)
|
||||
})
|
||||
.collect();
|
||||
scored.sort_by_key(|(score, _)| *score);
|
||||
scored.into_iter().take(count).map(|(_, v)| v.clone()).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_deterministic() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let a = volumes_for_key("my-key", &volumes, 2);
|
||||
let b = volumes_for_key("my-key", &volumes, 2);
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_capped() {
|
||||
let volumes: Vec<String> = (1..=2).map(|i| format!("http://vol{i}")).collect();
|
||||
let selected = volumes_for_key("key", &volumes, 5);
|
||||
assert_eq!(selected.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_distribution() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let mut counts = std::collections::HashMap::new();
|
||||
for i in 0..3000 {
|
||||
let key = format!("key-{i}");
|
||||
let primary = &volumes_for_key(&key, &volumes, 1)[0];
|
||||
*counts.entry(primary.clone()).or_insert(0u32) += 1;
|
||||
}
|
||||
for (vol, count) in &counts {
|
||||
assert!(
|
||||
*count > 700 && *count < 1300,
|
||||
"volume {vol} got {count} keys, expected ~1000"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stability_on_add() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let mut volumes4 = volumes.clone();
|
||||
volumes4.push("http://vol4".into());
|
||||
|
||||
let total = 10000;
|
||||
let mut moved = 0;
|
||||
for i in 0..total {
|
||||
let key = format!("key-{i}");
|
||||
let before = &volumes_for_key(&key, &volumes, 1)[0];
|
||||
let after = &volumes_for_key(&key, &volumes4, 1)[0];
|
||||
if before != after {
|
||||
moved += 1;
|
||||
}
|
||||
}
|
||||
let pct = moved as f64 / total as f64 * 100.0;
|
||||
assert!(
|
||||
pct > 15.0 && pct < 40.0,
|
||||
"expected ~25% of keys to move, got {pct:.1}%"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
assert_eq!(volumes_for_key("key", &[], 1), Vec::<String>::new());
|
||||
}
|
||||
}
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
/// Pick `count` volumes for a key by hashing key+volume, sorting by score.
|
||||
/// Same idea as minikeyvalue's key2volume — stable in volume name, not position.
|
||||
pub fn volumes_for_key(key: &str, volumes: &[String], count: usize) -> Vec<String> {
|
||||
let mut scored: Vec<(u64, &String)> = volumes
|
||||
.iter()
|
||||
.map(|v| {
|
||||
let hash = Sha256::digest(format!("{key}:{v}").as_bytes());
|
||||
let score = u64::from_be_bytes(hash[..8].try_into().unwrap());
|
||||
(score, v)
|
||||
})
|
||||
.collect();
|
||||
scored.sort_by_key(|(score, _)| *score);
|
||||
scored
|
||||
.into_iter()
|
||||
.take(count)
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_deterministic() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let a = volumes_for_key("my-key", &volumes, 2);
|
||||
let b = volumes_for_key("my-key", &volumes, 2);
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_capped() {
|
||||
let volumes: Vec<String> = (1..=2).map(|i| format!("http://vol{i}")).collect();
|
||||
let selected = volumes_for_key("key", &volumes, 5);
|
||||
assert_eq!(selected.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_distribution() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let mut counts = std::collections::HashMap::new();
|
||||
for i in 0..3000 {
|
||||
let key = format!("key-{i}");
|
||||
let primary = &volumes_for_key(&key, &volumes, 1)[0];
|
||||
*counts.entry(primary.clone()).or_insert(0u32) += 1;
|
||||
}
|
||||
for (vol, count) in &counts {
|
||||
assert!(
|
||||
*count > 700 && *count < 1300,
|
||||
"volume {vol} got {count} keys, expected ~1000"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stability_on_add() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let mut volumes4 = volumes.clone();
|
||||
volumes4.push("http://vol4".into());
|
||||
|
||||
let total = 10000;
|
||||
let mut moved = 0;
|
||||
for i in 0..total {
|
||||
let key = format!("key-{i}");
|
||||
let before = &volumes_for_key(&key, &volumes, 1)[0];
|
||||
let after = &volumes_for_key(&key, &volumes4, 1)[0];
|
||||
if before != after {
|
||||
moved += 1;
|
||||
}
|
||||
}
|
||||
let pct = moved as f64 / total as f64 * 100.0;
|
||||
assert!(
|
||||
pct > 15.0 && pct < 40.0,
|
||||
"expected ~25% of keys to move, got {pct:.1}%"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
assert_eq!(volumes_for_key("key", &[], 1), Vec::<String>::new());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
106
src/lib.rs
106
src/lib.rs
|
|
@ -1,53 +1,53 @@
|
|||
pub mod db;
|
||||
pub mod error;
|
||||
pub mod hasher;
|
||||
pub mod server;
|
||||
pub mod rebalance;
|
||||
pub mod rebuild;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
const DEFAULT_BODY_LIMIT: usize = 256 * 1024 * 1024; // 256 MB
|
||||
|
||||
pub struct Args {
|
||||
pub db_path: String,
|
||||
pub volumes: Vec<String>,
|
||||
pub replicas: usize,
|
||||
}
|
||||
|
||||
pub fn build_app(args: &Args) -> axum::Router {
|
||||
if args.replicas > args.volumes.len() {
|
||||
eprintln!(
|
||||
"Error: replication factor ({}) exceeds number of volumes ({})",
|
||||
args.replicas,
|
||||
args.volumes.len()
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if let Some(parent) = std::path::Path::new(&args.db_path).parent() {
|
||||
std::fs::create_dir_all(parent).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to create database directory: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
let state = server::AppState {
|
||||
db: db::Db::new(&args.db_path),
|
||||
volumes: Arc::new(args.volumes.clone()),
|
||||
replicas: args.replicas,
|
||||
http: reqwest::Client::new(),
|
||||
};
|
||||
|
||||
axum::Router::new()
|
||||
.route("/", axum::routing::get(server::list_keys))
|
||||
.route(
|
||||
"/{*key}",
|
||||
axum::routing::get(server::get_key)
|
||||
.put(server::put_key)
|
||||
.delete(server::delete_key)
|
||||
.head(server::head_key),
|
||||
)
|
||||
.layer(axum::extract::DefaultBodyLimit::max(DEFAULT_BODY_LIMIT))
|
||||
.with_state(state)
|
||||
}
|
||||
pub mod db;
|
||||
pub mod error;
|
||||
pub mod hasher;
|
||||
pub mod rebalance;
|
||||
pub mod rebuild;
|
||||
pub mod server;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
const DEFAULT_BODY_LIMIT: usize = 256 * 1024 * 1024; // 256 MB
|
||||
|
||||
pub struct Args {
|
||||
pub db_path: String,
|
||||
pub volumes: Vec<String>,
|
||||
pub replicas: usize,
|
||||
}
|
||||
|
||||
pub fn build_app(args: &Args) -> axum::Router {
|
||||
if args.replicas > args.volumes.len() {
|
||||
eprintln!(
|
||||
"Error: replication factor ({}) exceeds number of volumes ({})",
|
||||
args.replicas,
|
||||
args.volumes.len()
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if let Some(parent) = std::path::Path::new(&args.db_path).parent() {
|
||||
std::fs::create_dir_all(parent).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to create database directory: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
let state = server::AppState {
|
||||
db: db::Db::new(&args.db_path),
|
||||
volumes: Arc::new(args.volumes.clone()),
|
||||
replicas: args.replicas,
|
||||
http: reqwest::Client::new(),
|
||||
};
|
||||
|
||||
axum::Router::new()
|
||||
.route("/", axum::routing::get(server::list_keys))
|
||||
.route(
|
||||
"/{*key}",
|
||||
axum::routing::get(server::get_key)
|
||||
.put(server::put_key)
|
||||
.delete(server::delete_key)
|
||||
.head(server::head_key),
|
||||
)
|
||||
.layer(axum::extract::DefaultBodyLimit::max(DEFAULT_BODY_LIMIT))
|
||||
.with_state(state)
|
||||
}
|
||||
|
|
|
|||
13
src/main.rs
13
src/main.rs
|
|
@ -6,7 +6,13 @@ struct Cli {
|
|||
#[arg(short, long, env = "MKV_DB", default_value = "/tmp/mkv/index.db")]
|
||||
db: String,
|
||||
|
||||
#[arg(short, long, env = "MKV_VOLUMES", required = true, value_delimiter = ',')]
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
env = "MKV_VOLUMES",
|
||||
required = true,
|
||||
value_delimiter = ','
|
||||
)]
|
||||
volumes: Vec<String>,
|
||||
|
||||
#[arg(short, long, env = "MKV_REPLICAS", default_value_t = 2)]
|
||||
|
|
@ -36,9 +42,8 @@ async fn shutdown_signal() {
|
|||
let ctrl_c = tokio::signal::ctrl_c();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let mut sigterm =
|
||||
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
|
||||
.expect("failed to install SIGTERM handler");
|
||||
let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
|
||||
.expect("failed to install SIGTERM handler");
|
||||
tokio::select! {
|
||||
_ = ctrl_c => tracing::info!("Received SIGINT, shutting down..."),
|
||||
_ = sigterm.recv() => tracing::info!("Received SIGTERM, shutting down..."),
|
||||
|
|
|
|||
353
src/rebalance.rs
353
src/rebalance.rs
|
|
@ -1,159 +1,194 @@
|
|||
use crate::db;
|
||||
use crate::Args;
|
||||
|
||||
pub struct KeyMove {
|
||||
pub key: String,
|
||||
pub size: Option<i64>,
|
||||
pub current_volumes: Vec<String>,
|
||||
pub desired_volumes: Vec<String>,
|
||||
pub to_add: Vec<String>,
|
||||
pub to_remove: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn plan_rebalance(records: &[db::Record], volumes: &[String], replication: usize) -> Vec<KeyMove> {
|
||||
let mut moves = Vec::new();
|
||||
for record in records {
|
||||
let desired = crate::hasher::volumes_for_key(&record.key, volumes, replication);
|
||||
let to_add: Vec<String> = desired.iter().filter(|v| !record.volumes.contains(v)).cloned().collect();
|
||||
let to_remove: Vec<String> = record.volumes.iter().filter(|v| !desired.contains(v)).cloned().collect();
|
||||
|
||||
if !to_add.is_empty() || !to_remove.is_empty() {
|
||||
moves.push(KeyMove {
|
||||
key: record.key.clone(),
|
||||
size: record.size,
|
||||
current_volumes: record.volumes.clone(),
|
||||
desired_volumes: desired,
|
||||
to_add,
|
||||
to_remove,
|
||||
});
|
||||
}
|
||||
}
|
||||
moves
|
||||
}
|
||||
|
||||
pub async fn run(args: &Args, dry_run: bool) {
|
||||
let db = db::Db::new(&args.db_path);
|
||||
let records = db.all_records_sync().expect("failed to read records");
|
||||
let moves = plan_rebalance(&records, &args.volumes, args.replicas);
|
||||
|
||||
if moves.is_empty() {
|
||||
eprintln!("Nothing to rebalance — all keys are already correctly placed.");
|
||||
return;
|
||||
}
|
||||
|
||||
let total_bytes: i64 = moves.iter().filter_map(|m| m.size).sum();
|
||||
eprintln!("{} keys to move ({} bytes)", moves.len(), total_bytes);
|
||||
|
||||
if dry_run {
|
||||
for m in &moves {
|
||||
eprintln!(" {} : add {:?}, remove {:?}", m.key, m.to_add, m.to_remove);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let mut moved = 0;
|
||||
let mut errors = 0;
|
||||
|
||||
for m in &moves {
|
||||
let Some(src) = m.current_volumes.first() else {
|
||||
eprintln!(" SKIP {} : no source volume", m.key);
|
||||
errors += 1;
|
||||
continue;
|
||||
};
|
||||
let mut copy_ok = true;
|
||||
|
||||
for dst in &m.to_add {
|
||||
let src_url = format!("{src}/{}", m.key);
|
||||
match client.get(&src_url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let data = match resp.bytes().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR read body {} from {}: {}", m.key, src, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
break;
|
||||
}
|
||||
};
|
||||
let dst_url = format!("{dst}/{}", m.key);
|
||||
match client.put(&dst_url).body(data).send().await {
|
||||
Ok(resp) if !resp.status().is_success() => {
|
||||
eprintln!(" ERROR copy {} to {}: status {}", m.key, dst, resp.status());
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Ok(_) => {}
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
eprintln!(" ERROR read {} from {}: status {}", m.key, src, resp.status());
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR read {} from {}: {}", m.key, src, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !copy_ok { continue; }
|
||||
|
||||
db.put(m.key.clone(), m.desired_volumes.clone(), m.size).await.expect("failed to update index");
|
||||
|
||||
for old in &m.to_remove {
|
||||
let url = format!("{old}/{}", m.key);
|
||||
if let Err(e) = client.delete(&url).send().await {
|
||||
eprintln!(" WARN delete {} from {}: {}", m.key, old, e);
|
||||
}
|
||||
}
|
||||
moved += 1;
|
||||
}
|
||||
|
||||
eprintln!("Rebalanced {moved}/{} keys ({errors} errors)", moves.len());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_plan_rebalance_no_change() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let records: Vec<db::Record> = (0..100)
|
||||
.map(|i| {
|
||||
let key = format!("key-{i}");
|
||||
let vols = crate::hasher::volumes_for_key(&key, &volumes, 2);
|
||||
db::Record { key, volumes: vols, size: Some(100) }
|
||||
})
|
||||
.collect();
|
||||
|
||||
let moves = plan_rebalance(&records, &volumes, 2);
|
||||
assert!(moves.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plan_rebalance_new_volume() {
|
||||
let volumes3: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let records: Vec<db::Record> = (0..1000)
|
||||
.map(|i| {
|
||||
let key = format!("key-{i}");
|
||||
let vols = crate::hasher::volumes_for_key(&key, &volumes3, 2);
|
||||
db::Record { key, volumes: vols, size: Some(100) }
|
||||
})
|
||||
.collect();
|
||||
|
||||
let volumes4: Vec<String> = (1..=4).map(|i| format!("http://vol{i}")).collect();
|
||||
let moves = plan_rebalance(&records, &volumes4, 2);
|
||||
|
||||
assert!(!moves.is_empty());
|
||||
assert!(moves.len() < 800, "too many moves: {}", moves.len());
|
||||
}
|
||||
}
|
||||
use crate::Args;
|
||||
use crate::db;
|
||||
|
||||
pub struct KeyMove {
|
||||
pub key: String,
|
||||
pub size: Option<i64>,
|
||||
pub current_volumes: Vec<String>,
|
||||
pub desired_volumes: Vec<String>,
|
||||
pub to_add: Vec<String>,
|
||||
pub to_remove: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn plan_rebalance(
|
||||
records: &[db::Record],
|
||||
volumes: &[String],
|
||||
replication: usize,
|
||||
) -> Vec<KeyMove> {
|
||||
let mut moves = Vec::new();
|
||||
for record in records {
|
||||
let desired = crate::hasher::volumes_for_key(&record.key, volumes, replication);
|
||||
let to_add: Vec<String> = desired
|
||||
.iter()
|
||||
.filter(|v| !record.volumes.contains(v))
|
||||
.cloned()
|
||||
.collect();
|
||||
let to_remove: Vec<String> = record
|
||||
.volumes
|
||||
.iter()
|
||||
.filter(|v| !desired.contains(v))
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if !to_add.is_empty() || !to_remove.is_empty() {
|
||||
moves.push(KeyMove {
|
||||
key: record.key.clone(),
|
||||
size: record.size,
|
||||
current_volumes: record.volumes.clone(),
|
||||
desired_volumes: desired,
|
||||
to_add,
|
||||
to_remove,
|
||||
});
|
||||
}
|
||||
}
|
||||
moves
|
||||
}
|
||||
|
||||
pub async fn run(args: &Args, dry_run: bool) {
|
||||
let db = db::Db::new(&args.db_path);
|
||||
let records = db.all_records_sync().expect("failed to read records");
|
||||
let moves = plan_rebalance(&records, &args.volumes, args.replicas);
|
||||
|
||||
if moves.is_empty() {
|
||||
eprintln!("Nothing to rebalance — all keys are already correctly placed.");
|
||||
return;
|
||||
}
|
||||
|
||||
let total_bytes: i64 = moves.iter().filter_map(|m| m.size).sum();
|
||||
eprintln!("{} keys to move ({} bytes)", moves.len(), total_bytes);
|
||||
|
||||
if dry_run {
|
||||
for m in &moves {
|
||||
eprintln!(" {} : add {:?}, remove {:?}", m.key, m.to_add, m.to_remove);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let mut moved = 0;
|
||||
let mut errors = 0;
|
||||
|
||||
for m in &moves {
|
||||
let Some(src) = m.current_volumes.first() else {
|
||||
eprintln!(" SKIP {} : no source volume", m.key);
|
||||
errors += 1;
|
||||
continue;
|
||||
};
|
||||
let mut copy_ok = true;
|
||||
|
||||
for dst in &m.to_add {
|
||||
let src_url = format!("{src}/{}", m.key);
|
||||
match client.get(&src_url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let data = match resp.bytes().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR read body {} from {}: {}", m.key, src, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
break;
|
||||
}
|
||||
};
|
||||
let dst_url = format!("{dst}/{}", m.key);
|
||||
match client.put(&dst_url).body(data).send().await {
|
||||
Ok(resp) if !resp.status().is_success() => {
|
||||
eprintln!(
|
||||
" ERROR copy {} to {}: status {}",
|
||||
m.key,
|
||||
dst,
|
||||
resp.status()
|
||||
);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR copy {} to {}: {}", m.key, dst, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Ok(_) => {}
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
eprintln!(
|
||||
" ERROR read {} from {}: status {}",
|
||||
m.key,
|
||||
src,
|
||||
resp.status()
|
||||
);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR read {} from {}: {}", m.key, src, e);
|
||||
copy_ok = false;
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !copy_ok {
|
||||
continue;
|
||||
}
|
||||
|
||||
db.put(m.key.clone(), m.desired_volumes.clone(), m.size)
|
||||
.await
|
||||
.expect("failed to update index");
|
||||
|
||||
for old in &m.to_remove {
|
||||
let url = format!("{old}/{}", m.key);
|
||||
if let Err(e) = client.delete(&url).send().await {
|
||||
eprintln!(" WARN delete {} from {}: {}", m.key, old, e);
|
||||
}
|
||||
}
|
||||
moved += 1;
|
||||
}
|
||||
|
||||
eprintln!("Rebalanced {moved}/{} keys ({errors} errors)", moves.len());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_plan_rebalance_no_change() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let records: Vec<db::Record> = (0..100)
|
||||
.map(|i| {
|
||||
let key = format!("key-{i}");
|
||||
let vols = crate::hasher::volumes_for_key(&key, &volumes, 2);
|
||||
db::Record {
|
||||
key,
|
||||
volumes: vols,
|
||||
size: Some(100),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let moves = plan_rebalance(&records, &volumes, 2);
|
||||
assert!(moves.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plan_rebalance_new_volume() {
|
||||
let volumes3: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let records: Vec<db::Record> = (0..1000)
|
||||
.map(|i| {
|
||||
let key = format!("key-{i}");
|
||||
let vols = crate::hasher::volumes_for_key(&key, &volumes3, 2);
|
||||
db::Record {
|
||||
key,
|
||||
volumes: vols,
|
||||
size: Some(100),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let volumes4: Vec<String> = (1..=4).map(|i| format!("http://vol{i}")).collect();
|
||||
let moves = plan_rebalance(&records, &volumes4, 2);
|
||||
|
||||
assert!(!moves.is_empty());
|
||||
assert!(moves.len() < 800, "too many moves: {}", moves.len());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
158
src/rebuild.rs
158
src/rebuild.rs
|
|
@ -1,72 +1,86 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::db;
|
||||
use crate::Args;
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct NginxEntry {
|
||||
name: String,
|
||||
#[serde(rename = "type")]
|
||||
entry_type: String,
|
||||
#[serde(default)]
|
||||
size: Option<i64>,
|
||||
}
|
||||
|
||||
async fn list_volume_keys(volume_url: &str) -> Result<Vec<(String, i64)>, String> {
|
||||
let http = reqwest::Client::new();
|
||||
let mut keys = Vec::new();
|
||||
let mut dirs = vec![String::new()];
|
||||
|
||||
while let Some(prefix) = dirs.pop() {
|
||||
let url = format!("{volume_url}/{prefix}");
|
||||
let resp = http.get(&url).send().await.map_err(|e| format!("GET {url}: {e}"))?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(format!("GET {url}: status {}", resp.status()));
|
||||
}
|
||||
let entries: Vec<NginxEntry> = resp.json().await.map_err(|e| format!("parse {url}: {e}"))?;
|
||||
for entry in entries {
|
||||
let full_path = if prefix.is_empty() { entry.name.clone() } else { format!("{prefix}{}", entry.name) };
|
||||
match entry.entry_type.as_str() {
|
||||
"directory" => dirs.push(format!("{full_path}/")),
|
||||
"file" => keys.push((full_path, entry.size.unwrap_or(0))),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
pub async fn run(args: &Args) {
|
||||
let db_path = &args.db_path;
|
||||
|
||||
if let Some(parent) = std::path::Path::new(db_path).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(db_path);
|
||||
let _ = std::fs::remove_file(format!("{db_path}-wal"));
|
||||
let _ = std::fs::remove_file(format!("{db_path}-shm"));
|
||||
|
||||
let db = db::Db::new(db_path);
|
||||
let mut index: HashMap<String, (Vec<String>, i64)> = HashMap::new();
|
||||
|
||||
for vol_url in &args.volumes {
|
||||
eprintln!("Scanning {vol_url}...");
|
||||
match list_volume_keys(vol_url).await {
|
||||
Ok(keys) => {
|
||||
eprintln!(" Found {} keys", keys.len());
|
||||
for (key, size) in keys {
|
||||
let entry = index.entry(key).or_insert_with(|| (Vec::new(), size));
|
||||
entry.0.push(vol_url.clone());
|
||||
if size > entry.1 { entry.1 = size; }
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!(" Error scanning {vol_url}: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
let records: Vec<_> = index.into_iter().map(|(k, (v, s))| (k, v, Some(s))).collect();
|
||||
let count = records.len();
|
||||
db.bulk_put(records).await.expect("bulk_put failed");
|
||||
eprintln!("Rebuilt index with {count} keys");
|
||||
}
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::Args;
|
||||
use crate::db;
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct NginxEntry {
|
||||
name: String,
|
||||
#[serde(rename = "type")]
|
||||
entry_type: String,
|
||||
#[serde(default)]
|
||||
size: Option<i64>,
|
||||
}
|
||||
|
||||
async fn list_volume_keys(volume_url: &str) -> Result<Vec<(String, i64)>, String> {
|
||||
let http = reqwest::Client::new();
|
||||
let mut keys = Vec::new();
|
||||
let mut dirs = vec![String::new()];
|
||||
|
||||
while let Some(prefix) = dirs.pop() {
|
||||
let url = format!("{volume_url}/{prefix}");
|
||||
let resp = http
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("GET {url}: {e}"))?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(format!("GET {url}: status {}", resp.status()));
|
||||
}
|
||||
let entries: Vec<NginxEntry> =
|
||||
resp.json().await.map_err(|e| format!("parse {url}: {e}"))?;
|
||||
for entry in entries {
|
||||
let full_path = if prefix.is_empty() {
|
||||
entry.name.clone()
|
||||
} else {
|
||||
format!("{prefix}{}", entry.name)
|
||||
};
|
||||
match entry.entry_type.as_str() {
|
||||
"directory" => dirs.push(format!("{full_path}/")),
|
||||
"file" => keys.push((full_path, entry.size.unwrap_or(0))),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
pub async fn run(args: &Args) {
|
||||
let db_path = &args.db_path;
|
||||
|
||||
if let Some(parent) = std::path::Path::new(db_path).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let _ = std::fs::remove_file(db_path);
|
||||
let _ = std::fs::remove_file(format!("{db_path}-wal"));
|
||||
let _ = std::fs::remove_file(format!("{db_path}-shm"));
|
||||
|
||||
let db = db::Db::new(db_path);
|
||||
let mut index: HashMap<String, (Vec<String>, i64)> = HashMap::new();
|
||||
|
||||
for vol_url in &args.volumes {
|
||||
eprintln!("Scanning {vol_url}...");
|
||||
match list_volume_keys(vol_url).await {
|
||||
Ok(keys) => {
|
||||
eprintln!(" Found {} keys", keys.len());
|
||||
for (key, size) in keys {
|
||||
let entry = index.entry(key).or_insert_with(|| (Vec::new(), size));
|
||||
entry.0.push(vol_url.clone());
|
||||
if size > entry.1 {
|
||||
entry.1 = size;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!(" Error scanning {vol_url}: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
let records: Vec<_> = index
|
||||
.into_iter()
|
||||
.map(|(k, (v, s))| (k, v, Some(s)))
|
||||
.collect();
|
||||
let count = records.len();
|
||||
db.bulk_put(records).await.expect("bulk_put failed");
|
||||
eprintln!("Rebuilt index with {count} keys");
|
||||
}
|
||||
|
|
|
|||
372
src/server.rs
372
src/server.rs
|
|
@ -1,173 +1,199 @@
|
|||
use axum::body::Bytes;
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::{HeaderMap, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::db;
|
||||
use crate::error::{AppError, VolumeError};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub db: db::Db,
|
||||
pub volumes: Arc<Vec<String>>,
|
||||
pub replicas: usize,
|
||||
pub http: reqwest::Client,
|
||||
}
|
||||
|
||||
pub async fn get_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
let vol = record
|
||||
.volumes
|
||||
.first()
|
||||
.ok_or_else(|| AppError::CorruptRecord { key: key.clone() })?;
|
||||
let location = format!("{vol}/{key}");
|
||||
Ok((StatusCode::FOUND, [(axum::http::header::LOCATION, location)]).into_response())
|
||||
}
|
||||
|
||||
pub async fn put_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
body: Bytes,
|
||||
) -> Result<Response, AppError> {
|
||||
let target_volumes = crate::hasher::volumes_for_key(&key, &state.volumes, state.replicas);
|
||||
if target_volumes.len() < state.replicas {
|
||||
return Err(AppError::InsufficientVolumes {
|
||||
need: state.replicas,
|
||||
have: target_volumes.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Fan out PUTs to all target volumes concurrently
|
||||
let mut handles = Vec::with_capacity(target_volumes.len());
|
||||
for vol in &target_volumes {
|
||||
let url = format!("{vol}/{key}");
|
||||
let handle = tokio::spawn({
|
||||
let client = state.http.clone();
|
||||
let data = body.clone();
|
||||
async move {
|
||||
let resp = client.put(&url).body(data).send().await.map_err(|e| {
|
||||
VolumeError::Request { url: url.clone(), source: e }
|
||||
})?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(VolumeError::BadStatus { url, status: resp.status() });
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
let mut failed = false;
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok(Err(e)) => {
|
||||
tracing::error!("{e}");
|
||||
failed = true;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("volume write task failed: {e}");
|
||||
failed = true;
|
||||
}
|
||||
Ok(Ok(())) => {}
|
||||
}
|
||||
}
|
||||
|
||||
if failed {
|
||||
// Rollback: best-effort delete from volumes
|
||||
for vol in &target_volumes {
|
||||
let _ = state.http.delete(format!("{vol}/{key}")).send().await;
|
||||
}
|
||||
return Err(AppError::PartialWrite);
|
||||
}
|
||||
|
||||
let size = Some(body.len() as i64);
|
||||
if let Err(e) = state.db.put(key.clone(), target_volumes.clone(), size).await {
|
||||
for vol in &target_volumes {
|
||||
let _ = state.http.delete(format!("{vol}/{key}")).send().await;
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
Ok(StatusCode::CREATED.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for vol in &record.volumes {
|
||||
let url = format!("{vol}/{key}");
|
||||
let handle = tokio::spawn({
|
||||
let client = state.http.clone();
|
||||
async move {
|
||||
let resp = client.delete(&url).send().await.map_err(|e| {
|
||||
VolumeError::Request { url: url.clone(), source: e }
|
||||
})?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(VolumeError::BadStatus { url, status: resp.status() });
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok(Err(e)) => tracing::error!("{e}"),
|
||||
Err(e) => tracing::error!("volume delete task failed: {e}"),
|
||||
Ok(Ok(())) => {}
|
||||
}
|
||||
}
|
||||
|
||||
state.db.delete(key).await?;
|
||||
Ok(StatusCode::NO_CONTENT.into_response())
|
||||
}
|
||||
|
||||
pub async fn head_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
let mut headers = HeaderMap::new();
|
||||
if let Some(size) = record.size {
|
||||
headers.insert(axum::http::header::CONTENT_LENGTH, size.to_string().parse().unwrap());
|
||||
}
|
||||
Ok((StatusCode::OK, headers).into_response())
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
pub struct ListQuery {
|
||||
#[serde(default)]
|
||||
pub prefix: String,
|
||||
}
|
||||
|
||||
pub async fn list_keys(
|
||||
State(state): State<AppState>,
|
||||
Query(query): Query<ListQuery>,
|
||||
) -> Result<Response, AppError> {
|
||||
let keys = state.db.list_keys(&query.prefix).await?;
|
||||
Ok((StatusCode::OK, keys.join("\n")).into_response())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_volumes_for_key_sufficient() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let selected = crate::hasher::volumes_for_key("test-key", &volumes, 2);
|
||||
assert_eq!(selected.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_volumes_for_key_insufficient() {
|
||||
let volumes: Vec<String> = vec!["http://vol1".into()];
|
||||
let selected = crate::hasher::volumes_for_key("test-key", &volumes, 2);
|
||||
assert_eq!(selected.len(), 1);
|
||||
}
|
||||
}
|
||||
use axum::body::Bytes;
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::{HeaderMap, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::db;
|
||||
use crate::error::{AppError, VolumeError};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub db: db::Db,
|
||||
pub volumes: Arc<Vec<String>>,
|
||||
pub replicas: usize,
|
||||
pub http: reqwest::Client,
|
||||
}
|
||||
|
||||
pub async fn get_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
let vol = record
|
||||
.volumes
|
||||
.first()
|
||||
.ok_or_else(|| AppError::CorruptRecord { key: key.clone() })?;
|
||||
let location = format!("{vol}/{key}");
|
||||
Ok((
|
||||
StatusCode::FOUND,
|
||||
[(axum::http::header::LOCATION, location)],
|
||||
)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn put_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
body: Bytes,
|
||||
) -> Result<Response, AppError> {
|
||||
let target_volumes = crate::hasher::volumes_for_key(&key, &state.volumes, state.replicas);
|
||||
if target_volumes.len() < state.replicas {
|
||||
return Err(AppError::InsufficientVolumes {
|
||||
need: state.replicas,
|
||||
have: target_volumes.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Fan out PUTs to all target volumes concurrently
|
||||
let mut handles = Vec::with_capacity(target_volumes.len());
|
||||
for vol in &target_volumes {
|
||||
let url = format!("{vol}/{key}");
|
||||
let handle =
|
||||
tokio::spawn({
|
||||
let client = state.http.clone();
|
||||
let data = body.clone();
|
||||
async move {
|
||||
let resp = client.put(&url).body(data).send().await.map_err(|e| {
|
||||
VolumeError::Request {
|
||||
url: url.clone(),
|
||||
source: e,
|
||||
}
|
||||
})?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(VolumeError::BadStatus {
|
||||
url,
|
||||
status: resp.status(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
let mut failed = false;
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok(Err(e)) => {
|
||||
tracing::error!("{e}");
|
||||
failed = true;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("volume write task failed: {e}");
|
||||
failed = true;
|
||||
}
|
||||
Ok(Ok(())) => {}
|
||||
}
|
||||
}
|
||||
|
||||
if failed {
|
||||
// Rollback: best-effort delete from volumes
|
||||
for vol in &target_volumes {
|
||||
let _ = state.http.delete(format!("{vol}/{key}")).send().await;
|
||||
}
|
||||
return Err(AppError::PartialWrite);
|
||||
}
|
||||
|
||||
let size = Some(body.len() as i64);
|
||||
if let Err(e) = state
|
||||
.db
|
||||
.put(key.clone(), target_volumes.clone(), size)
|
||||
.await
|
||||
{
|
||||
for vol in &target_volumes {
|
||||
let _ = state.http.delete(format!("{vol}/{key}")).send().await;
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
Ok(StatusCode::CREATED.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for vol in &record.volumes {
|
||||
let url = format!("{vol}/{key}");
|
||||
let handle = tokio::spawn({
|
||||
let client = state.http.clone();
|
||||
async move {
|
||||
let resp = client
|
||||
.delete(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| VolumeError::Request {
|
||||
url: url.clone(),
|
||||
source: e,
|
||||
})?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(VolumeError::BadStatus {
|
||||
url,
|
||||
status: resp.status(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok(Err(e)) => tracing::error!("{e}"),
|
||||
Err(e) => tracing::error!("volume delete task failed: {e}"),
|
||||
Ok(Ok(())) => {}
|
||||
}
|
||||
}
|
||||
|
||||
state.db.delete(key).await?;
|
||||
Ok(StatusCode::NO_CONTENT.into_response())
|
||||
}
|
||||
|
||||
pub async fn head_key(
|
||||
State(state): State<AppState>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Response, AppError> {
|
||||
let record = state.db.get(&key).await?;
|
||||
let mut headers = HeaderMap::new();
|
||||
if let Some(size) = record.size {
|
||||
headers.insert(
|
||||
axum::http::header::CONTENT_LENGTH,
|
||||
size.to_string().parse().unwrap(),
|
||||
);
|
||||
}
|
||||
Ok((StatusCode::OK, headers).into_response())
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
pub struct ListQuery {
|
||||
#[serde(default)]
|
||||
pub prefix: String,
|
||||
}
|
||||
|
||||
pub async fn list_keys(
|
||||
State(state): State<AppState>,
|
||||
Query(query): Query<ListQuery>,
|
||||
) -> Result<Response, AppError> {
|
||||
let keys = state.db.list_keys(&query.prefix).await?;
|
||||
Ok((StatusCode::OK, keys.join("\n")).into_response())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_volumes_for_key_sufficient() {
|
||||
let volumes: Vec<String> = (1..=3).map(|i| format!("http://vol{i}")).collect();
|
||||
let selected = crate::hasher::volumes_for_key("test-key", &volumes, 2);
|
||||
assert_eq!(selected.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_volumes_for_key_insufficient() {
|
||||
let volumes: Vec<String> = vec!["http://vol1".into()];
|
||||
let selected = crate::hasher::volumes_for_key("test-key", &volumes, 2);
|
||||
assert_eq!(selected.len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue