Initial commit
This commit is contained in:
commit
8d32777f9f
8 changed files with 2790 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
*.db
|
||||
2228
Cargo.lock
generated
Normal file
2228
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
22
Cargo.toml
Normal file
22
Cargo.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
[package]
|
||||
name = "mkv"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
axum = "0.8"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
rusqlite = { version = "0.35", features = ["bundled"] }
|
||||
reqwest = { version = "0.12", features = ["stream"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
toml = "0.8"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
tokio-stream = "0.1"
|
||||
sha2 = "0.10"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = true
|
||||
16
config.toml
Normal file
16
config.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[server]
|
||||
port = 3000
|
||||
replication_factor = 2
|
||||
virtual_nodes = 100
|
||||
|
||||
[database]
|
||||
path = "/tmp/mkv/index.db"
|
||||
|
||||
[[volumes]]
|
||||
url = "http://localhost:3001"
|
||||
|
||||
[[volumes]]
|
||||
url = "http://localhost:3002"
|
||||
|
||||
[[volumes]]
|
||||
url = "http://localhost:3003"
|
||||
47
src/config.rs
Normal file
47
src/config.rs
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
use serde::Deserialize;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct Config {
|
||||
pub server: ServerConfig,
|
||||
pub database: DatabaseConfig,
|
||||
pub volumes: Vec<VolumeConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct ServerConfig {
|
||||
pub port: u16,
|
||||
pub replication_factor: usize,
|
||||
pub virtual_nodes: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct DatabaseConfig {
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct VolumeConfig {
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn load(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let contents = std::fs::read_to_string(path)?;
|
||||
let config: Config = toml::from_str(&contents)?;
|
||||
if config.volumes.is_empty() {
|
||||
return Err("at least one volume is required".into());
|
||||
}
|
||||
if config.server.replication_factor == 0 {
|
||||
return Err("replication_factor must be >= 1".into());
|
||||
}
|
||||
if config.server.replication_factor > config.volumes.len() {
|
||||
return Err("replication_factor exceeds number of volumes".into());
|
||||
}
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
pub fn volume_urls(&self) -> Vec<String> {
|
||||
self.volumes.iter().map(|v| v.url.clone()).collect()
|
||||
}
|
||||
}
|
||||
334
src/db.rs
Normal file
334
src/db.rs
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
use rusqlite::{params, Connection, OpenFlags};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use crate::error::AppError;
|
||||
|
||||
// --- Record type ---
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Record {
|
||||
pub key: String,
|
||||
pub volumes: Vec<String>,
|
||||
pub path: String,
|
||||
pub size: Option<i64>,
|
||||
}
|
||||
|
||||
// --- SQLite setup ---
|
||||
|
||||
fn apply_pragmas(conn: &Connection) {
|
||||
conn.execute_batch(
|
||||
"
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
PRAGMA busy_timeout = 5000;
|
||||
PRAGMA temp_store = memory;
|
||||
PRAGMA cache_size = -64000;
|
||||
PRAGMA mmap_size = 268435456;
|
||||
PRAGMA page_size = 4096;
|
||||
",
|
||||
)
|
||||
.expect("failed to set pragmas");
|
||||
}
|
||||
|
||||
fn open_readonly(path: &str) -> Connection {
|
||||
let conn = Connection::open_with_flags(
|
||||
path,
|
||||
OpenFlags::SQLITE_OPEN_READ_ONLY
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX
|
||||
| OpenFlags::SQLITE_OPEN_URI,
|
||||
)
|
||||
.expect("failed to open read connection");
|
||||
apply_pragmas(&conn);
|
||||
conn
|
||||
}
|
||||
|
||||
fn open_readwrite(path: &str) -> Connection {
|
||||
let conn = Connection::open_with_flags(
|
||||
path,
|
||||
OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX
|
||||
| OpenFlags::SQLITE_OPEN_URI,
|
||||
)
|
||||
.expect("failed to open write connection");
|
||||
apply_pragmas(&conn);
|
||||
conn
|
||||
}
|
||||
|
||||
fn create_tables(conn: &Connection) {
|
||||
conn.execute_batch(
|
||||
"
|
||||
CREATE TABLE IF NOT EXISTS kv (
|
||||
key TEXT PRIMARY KEY,
|
||||
volumes TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
size INTEGER,
|
||||
created_at INTEGER DEFAULT (unixepoch()),
|
||||
deleted INTEGER DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_kv_deleted ON kv(deleted);
|
||||
",
|
||||
)
|
||||
.expect("failed to create tables");
|
||||
}
|
||||
|
||||
fn parse_volumes(volumes_json: &str) -> Vec<String> {
|
||||
serde_json::from_str(volumes_json).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn encode_volumes(volumes: &[String]) -> String {
|
||||
serde_json::to_string(volumes).unwrap()
|
||||
}
|
||||
|
||||
// --- ReadPool ---
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ReadPool {
|
||||
conns: Vec<Arc<Mutex<Connection>>>,
|
||||
next: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl ReadPool {
|
||||
pub fn new(path: &str, size: usize) -> Self {
|
||||
let conns = (0..size)
|
||||
.map(|_| Arc::new(Mutex::new(open_readonly(path))))
|
||||
.collect();
|
||||
Self {
|
||||
conns,
|
||||
next: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn query<T, F>(&self, f: F) -> Result<T, AppError>
|
||||
where
|
||||
T: Send + 'static,
|
||||
F: FnOnce(&Connection) -> Result<T, AppError> + Send + 'static,
|
||||
{
|
||||
let idx = self.next.fetch_add(1, Ordering::Relaxed) % self.conns.len();
|
||||
let conn = self.conns[idx].clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = conn.lock().unwrap();
|
||||
f(&conn)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
// --- Read query functions ---
|
||||
|
||||
pub fn get(conn: &Connection, key: &str) -> Result<Record, AppError> {
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT key, volumes, path, size FROM kv WHERE key = ?1 AND deleted = 0")?;
|
||||
Ok(stmt.query_row(params![key], |row| {
|
||||
let volumes_json: String = row.get(1)?;
|
||||
Ok(Record {
|
||||
key: row.get(0)?,
|
||||
volumes: parse_volumes(&volumes_json),
|
||||
path: row.get(2)?,
|
||||
size: row.get(3)?,
|
||||
})
|
||||
})?)
|
||||
}
|
||||
|
||||
pub fn list_keys(conn: &Connection, prefix: &str) -> Result<Vec<String>, AppError> {
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT key FROM kv WHERE key LIKE ?1 AND deleted = 0 ORDER BY key")?;
|
||||
let pattern = format!("{prefix}%");
|
||||
let keys = stmt
|
||||
.query_map(params![pattern], |row| row.get(0))?
|
||||
.collect::<Result<Vec<String>, _>>()?;
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
pub fn all_records(conn: &Connection) -> Result<Vec<Record>, AppError> {
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT key, volumes, path, size FROM kv WHERE deleted = 0")?;
|
||||
let records = stmt
|
||||
.query_map([], |row| {
|
||||
let volumes_json: String = row.get(1)?;
|
||||
Ok(Record {
|
||||
key: row.get(0)?,
|
||||
volumes: parse_volumes(&volumes_json),
|
||||
path: row.get(2)?,
|
||||
size: row.get(3)?,
|
||||
})
|
||||
})?
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
// --- Write commands ---
|
||||
|
||||
pub enum WriteCmd {
|
||||
Put {
|
||||
key: String,
|
||||
volumes: Vec<String>,
|
||||
path: String,
|
||||
size: Option<i64>,
|
||||
reply: oneshot::Sender<Result<(), AppError>>,
|
||||
},
|
||||
Delete {
|
||||
key: String,
|
||||
reply: oneshot::Sender<Result<(), AppError>>,
|
||||
},
|
||||
BulkPut {
|
||||
records: Vec<(String, Vec<String>, String, Option<i64>)>,
|
||||
reply: oneshot::Sender<Result<(), AppError>>,
|
||||
},
|
||||
}
|
||||
|
||||
fn execute_cmd(conn: &Connection, cmd: WriteCmd) -> (Result<(), AppError>, oneshot::Sender<Result<(), AppError>>) {
|
||||
match cmd {
|
||||
WriteCmd::Put {
|
||||
key,
|
||||
volumes,
|
||||
path,
|
||||
size,
|
||||
reply,
|
||||
} => {
|
||||
let volumes_json = encode_volumes(&volumes);
|
||||
let result = conn
|
||||
.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, path, size) VALUES (?1, ?2, ?3, ?4)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, path = ?3, size = ?4, deleted = 0",
|
||||
)
|
||||
.and_then(|mut s| s.execute(params![key, volumes_json, path, size]))
|
||||
.map(|_| ())
|
||||
.map_err(AppError::from);
|
||||
(result, reply)
|
||||
}
|
||||
WriteCmd::Delete { key, reply } => {
|
||||
let result = conn
|
||||
.prepare_cached("DELETE FROM kv WHERE key = ?1")
|
||||
.and_then(|mut s| s.execute(params![key]))
|
||||
.map(|_| ())
|
||||
.map_err(AppError::from);
|
||||
(result, reply)
|
||||
}
|
||||
WriteCmd::BulkPut { records, reply } => {
|
||||
let result = (|| -> Result<(), AppError> {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"INSERT INTO kv (key, volumes, path, size) VALUES (?1, ?2, ?3, ?4)
|
||||
ON CONFLICT(key) DO UPDATE SET volumes = ?2, path = ?3, size = ?4, deleted = 0",
|
||||
)?;
|
||||
for (key, volumes, path, size) in &records {
|
||||
let volumes_json = encode_volumes(volumes);
|
||||
stmt.execute(params![key, volumes_json, path, size])?;
|
||||
}
|
||||
Ok(())
|
||||
})();
|
||||
(result, reply)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- WriterHandle ---
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WriterHandle {
|
||||
tx: mpsc::Sender<WriteCmd>,
|
||||
}
|
||||
|
||||
impl WriterHandle {
|
||||
pub async fn put(
|
||||
&self,
|
||||
key: String,
|
||||
volumes: Vec<String>,
|
||||
path: String,
|
||||
size: Option<i64>,
|
||||
) -> Result<(), AppError> {
|
||||
let (reply_tx, reply_rx) = oneshot::channel();
|
||||
self.tx
|
||||
.send(WriteCmd::Put {
|
||||
key,
|
||||
volumes,
|
||||
path,
|
||||
size,
|
||||
reply: reply_tx,
|
||||
})
|
||||
.await
|
||||
.map_err(|_| AppError::WriterDead)?;
|
||||
reply_rx.await.map_err(|_| AppError::WriterDroppedReply)?
|
||||
}
|
||||
|
||||
pub async fn delete(&self, key: String) -> Result<(), AppError> {
|
||||
let (reply_tx, reply_rx) = oneshot::channel();
|
||||
self.tx
|
||||
.send(WriteCmd::Delete {
|
||||
key,
|
||||
reply: reply_tx,
|
||||
})
|
||||
.await
|
||||
.map_err(|_| AppError::WriterDead)?;
|
||||
reply_rx.await.map_err(|_| AppError::WriterDroppedReply)?
|
||||
}
|
||||
|
||||
pub async fn bulk_put(
|
||||
&self,
|
||||
records: Vec<(String, Vec<String>, String, Option<i64>)>,
|
||||
) -> Result<(), AppError> {
|
||||
let (reply_tx, reply_rx) = oneshot::channel();
|
||||
self.tx
|
||||
.send(WriteCmd::BulkPut {
|
||||
records,
|
||||
reply: reply_tx,
|
||||
})
|
||||
.await
|
||||
.map_err(|_| AppError::WriterDead)?;
|
||||
reply_rx.await.map_err(|_| AppError::WriterDroppedReply)?
|
||||
}
|
||||
}
|
||||
|
||||
// --- spawn_writer ---
|
||||
|
||||
pub fn spawn_writer(path: String) -> (WriterHandle, oneshot::Receiver<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<WriteCmd>(4096);
|
||||
let (ready_tx, ready_rx) = oneshot::channel();
|
||||
|
||||
std::thread::spawn(move || {
|
||||
let conn = open_readwrite(&path);
|
||||
create_tables(&conn);
|
||||
let _ = ready_tx.send(());
|
||||
|
||||
loop {
|
||||
let Some(first) = rx.blocking_recv() else {
|
||||
break;
|
||||
};
|
||||
|
||||
let mut batch = vec![first];
|
||||
while batch.len() < 512 {
|
||||
match rx.try_recv() {
|
||||
Ok(cmd) => batch.push(cmd),
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
let _ = conn.execute_batch("BEGIN");
|
||||
let mut replies: Vec<(Result<(), AppError>, oneshot::Sender<Result<(), AppError>>)> =
|
||||
Vec::with_capacity(batch.len());
|
||||
|
||||
for (i, cmd) in batch.into_iter().enumerate() {
|
||||
let sp = format!("sp{i}");
|
||||
let _ = conn.execute(&format!("SAVEPOINT {sp}"), []);
|
||||
let (result, reply) = execute_cmd(&conn, cmd);
|
||||
if result.is_ok() {
|
||||
let _ = conn.execute(&format!("RELEASE {sp}"), []);
|
||||
} else {
|
||||
let _ = conn.execute(&format!("ROLLBACK TO {sp}"), []);
|
||||
let _ = conn.execute(&format!("RELEASE {sp}"), []);
|
||||
}
|
||||
replies.push((result, reply));
|
||||
}
|
||||
|
||||
let _ = conn.execute_batch("COMMIT");
|
||||
for (result, reply) in replies {
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
(WriterHandle { tx }, ready_rx)
|
||||
}
|
||||
45
src/error.rs
Normal file
45
src/error.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AppError {
|
||||
NotFound,
|
||||
Db(rusqlite::Error),
|
||||
WriterDead,
|
||||
WriterDroppedReply,
|
||||
VolumeError(String),
|
||||
NoHealthyVolume,
|
||||
}
|
||||
|
||||
impl From<rusqlite::Error> for AppError {
|
||||
fn from(e: rusqlite::Error) -> Self {
|
||||
match e {
|
||||
rusqlite::Error::QueryReturnedNoRows => AppError::NotFound,
|
||||
other => AppError::Db(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AppError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
AppError::NotFound => write!(f, "not found"),
|
||||
AppError::Db(e) => write!(f, "database error: {e}"),
|
||||
AppError::WriterDead => write!(f, "writer dead"),
|
||||
AppError::WriterDroppedReply => write!(f, "writer dropped reply"),
|
||||
AppError::VolumeError(msg) => write!(f, "volume error: {msg}"),
|
||||
AppError::NoHealthyVolume => write!(f, "no healthy volume available"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
let (status, msg) = match &self {
|
||||
AppError::NotFound => (StatusCode::NOT_FOUND, self.to_string()),
|
||||
AppError::NoHealthyVolume => (StatusCode::SERVICE_UNAVAILABLE, self.to_string()),
|
||||
_ => (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()),
|
||||
};
|
||||
(status, msg).into_response()
|
||||
}
|
||||
}
|
||||
96
src/main.rs
Normal file
96
src/main.rs
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
mod config;
|
||||
mod db;
|
||||
mod error;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "mkv", about = "Distributed key-value store")]
|
||||
struct Cli {
|
||||
#[arg(short, long, default_value = "config.toml")]
|
||||
config: PathBuf,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Start the index server
|
||||
Serve,
|
||||
/// Rebuild SQLite index from volume servers
|
||||
Rebuild,
|
||||
/// Rebalance data after adding/removing volumes
|
||||
Rebalance {
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
let config = config::Config::load(&cli.config).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load config: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
match cli.command {
|
||||
Commands::Serve => serve(config).await,
|
||||
Commands::Rebuild => {
|
||||
eprintln!("rebuild not yet implemented");
|
||||
std::process::exit(1);
|
||||
}
|
||||
Commands::Rebalance { dry_run: _ } => {
|
||||
eprintln!("rebalance not yet implemented");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn serve(config: config::Config) {
|
||||
let db_path = &config.database.path;
|
||||
|
||||
// Ensure parent directory exists
|
||||
if let Some(parent) = std::path::Path::new(db_path).parent() {
|
||||
std::fs::create_dir_all(parent).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to create database directory: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
let (writer, ready_rx) = db::spawn_writer(db_path.to_string());
|
||||
ready_rx.await.expect("writer failed to initialize");
|
||||
|
||||
let num_readers = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(4);
|
||||
let reads = db::ReadPool::new(db_path, num_readers);
|
||||
|
||||
let port = config.server.port;
|
||||
let volumes = config.volume_urls();
|
||||
|
||||
tracing::info!("Starting mkv server on port {port}");
|
||||
tracing::info!(" Readers: {num_readers}");
|
||||
tracing::info!(" Volumes: {volumes:?}");
|
||||
tracing::info!(
|
||||
" Replication factor: {}",
|
||||
config.server.replication_factor
|
||||
);
|
||||
|
||||
// TODO: wire up axum routes, volume client, hasher, health checker
|
||||
let app = axum::Router::new()
|
||||
.route("/health", axum::routing::get(|| async { "ok" }));
|
||||
|
||||
let addr = format!("0.0.0.0:{port}");
|
||||
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
|
||||
tracing::info!("Listening on {addr}");
|
||||
axum::serve(listener, app).await.unwrap();
|
||||
|
||||
// Keep these alive (will be used in later phases)
|
||||
drop(writer);
|
||||
drop(reads);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue