fix(smarts3): replace TypeScript server with Rust-powered core and IPC bridge

This commit is contained in:
2026-02-13 13:59:44 +00:00
parent 54a0c2fb65
commit 65eb266983
32 changed files with 4083 additions and 3182 deletions

2
rust/.cargo/config.toml Normal file
View File

@@ -0,0 +1,2 @@
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"

1393
rust/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

30
rust/Cargo.toml Normal file
View File

@@ -0,0 +1,30 @@
[package]
name = "rusts3"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "rusts3"
path = "src/main.rs"
[dependencies]
tokio = { version = "1", features = ["full"] }
hyper = { version = "1", features = ["http1", "server"] }
hyper-util = { version = "0.1", features = ["tokio", "http1"] }
http-body-util = "0.1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
quick-xml = { version = "0.37", features = ["serialize"] }
md-5 = "0.10"
tokio-util = { version = "0.7", features = ["io"] }
bytes = "1"
uuid = { version = "1", features = ["v4"] }
clap = { version = "4", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
thiserror = "2"
anyhow = "1"
percent-encoding = "2"
url = "2"
chrono = { version = "0.4", features = ["serde"] }
futures-core = "0.3"

78
rust/src/config.rs Normal file
View File

@@ -0,0 +1,78 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct S3Config {
pub server: ServerConfig,
pub storage: StorageConfig,
pub auth: AuthConfig,
pub cors: CorsConfig,
pub logging: LoggingConfig,
pub limits: LimitsConfig,
pub multipart: MultipartConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ServerConfig {
pub port: u16,
pub address: String,
pub silent: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct StorageConfig {
pub directory: String,
pub clean_slate: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AuthConfig {
pub enabled: bool,
pub credentials: Vec<Credential>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Credential {
#[serde(rename = "accessKeyId")]
pub access_key_id: String,
#[serde(rename = "secretAccessKey")]
pub secret_access_key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CorsConfig {
pub enabled: bool,
pub allowed_origins: Option<Vec<String>>,
pub allowed_methods: Option<Vec<String>>,
pub allowed_headers: Option<Vec<String>>,
pub exposed_headers: Option<Vec<String>>,
pub max_age: Option<u64>,
pub allow_credentials: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct LoggingConfig {
pub level: Option<String>,
pub format: Option<String>,
pub enabled: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct LimitsConfig {
pub max_object_size: Option<u64>,
pub max_metadata_size: Option<u64>,
pub request_timeout: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MultipartConfig {
pub expiration_days: Option<u64>,
pub cleanup_interval_minutes: Option<u64>,
}

43
rust/src/main.rs Normal file
View File

@@ -0,0 +1,43 @@
mod config;
mod management;
mod s3_error;
mod server;
mod storage;
mod xml_response;
use clap::Parser;
#[derive(Parser)]
#[command(name = "rusts3", about = "High-performance S3-compatible server")]
struct Cli {
/// Run in management mode (IPC via stdin/stdout)
#[arg(long)]
management: bool,
/// Log level
#[arg(long, default_value = "info")]
log_level: String,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
if cli.management {
// Init tracing to stderr only (stdout reserved for IPC)
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter(
tracing_subscriber::EnvFilter::try_new(&cli.log_level)
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.init();
management::management_loop().await?;
} else {
eprintln!("rusts3: use --management flag for IPC mode");
std::process::exit(1);
}
Ok(())
}

155
rust/src/management.rs Normal file
View File

@@ -0,0 +1,155 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::io::Write;
use tokio::io::{AsyncBufReadExt, BufReader};
use crate::config::S3Config;
use crate::server::S3Server;
#[derive(Deserialize)]
struct IpcRequest {
id: String,
method: String,
params: Value,
}
#[derive(Serialize)]
struct IpcResponse {
id: String,
success: bool,
#[serde(skip_serializing_if = "Option::is_none")]
result: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
#[derive(Serialize)]
struct IpcEvent {
event: String,
data: Value,
}
fn send_line(value: &impl Serialize) {
let mut stdout = std::io::stdout().lock();
serde_json::to_writer(&mut stdout, value).ok();
stdout.write_all(b"\n").ok();
stdout.flush().ok();
}
fn send_response(id: String, result: Value) {
send_line(&IpcResponse {
id,
success: true,
result: Some(result),
error: None,
});
}
fn send_error(id: String, message: String) {
send_line(&IpcResponse {
id,
success: false,
result: None,
error: Some(message),
});
}
pub async fn management_loop() -> Result<()> {
// Emit ready event
send_line(&IpcEvent {
event: "ready".to_string(),
data: serde_json::json!({}),
});
let mut server: Option<S3Server> = None;
let stdin = BufReader::new(tokio::io::stdin());
let mut lines = stdin.lines();
while let Ok(Some(line)) = lines.next_line().await {
let line = line.trim().to_string();
if line.is_empty() {
continue;
}
let req: IpcRequest = match serde_json::from_str(&line) {
Ok(r) => r,
Err(e) => {
tracing::warn!("Invalid IPC request: {}", e);
continue;
}
};
let id = req.id.clone();
let method = req.method.as_str();
match method {
"start" => {
#[derive(Deserialize)]
struct StartParams {
config: S3Config,
}
match serde_json::from_value::<StartParams>(req.params) {
Ok(params) => {
match S3Server::start(params.config).await {
Ok(s) => {
server = Some(s);
send_response(id, serde_json::json!({}));
}
Err(e) => {
send_error(id, format!("Failed to start server: {}", e));
}
}
}
Err(e) => {
send_error(id, format!("Invalid start params: {}", e));
}
}
}
"stop" => {
if let Some(s) = server.take() {
s.stop().await;
}
send_response(id, serde_json::json!({}));
}
"createBucket" => {
#[derive(Deserialize)]
struct CreateBucketParams {
name: String,
}
match serde_json::from_value::<CreateBucketParams>(req.params) {
Ok(params) => {
if let Some(ref s) = server {
match s.store().create_bucket(&params.name).await {
Ok(()) => {
send_response(id, serde_json::json!({}));
}
Err(e) => {
send_error(
id,
format!("Failed to create bucket: {}", e),
);
}
}
} else {
send_error(id, "Server not started".to_string());
}
}
Err(e) => {
send_error(id, format!("Invalid createBucket params: {}", e));
}
}
}
_ => {
send_error(id, format!("Unknown method: {}", method));
}
}
}
// Clean shutdown
if let Some(s) = server.take() {
s.stop().await;
}
Ok(())
}

70
rust/src/s3_error.rs Normal file
View File

@@ -0,0 +1,70 @@
use hyper::{Response, StatusCode};
use http_body_util::Full;
use bytes::Bytes;
#[derive(Debug, thiserror::Error)]
#[error("S3Error({code}): {message}")]
pub struct S3Error {
pub code: String,
pub message: String,
pub status: StatusCode,
}
impl S3Error {
pub fn new(code: &str, message: &str, status: StatusCode) -> Self {
Self {
code: code.to_string(),
message: message.to_string(),
status,
}
}
pub fn no_such_key() -> Self {
Self::new("NoSuchKey", "The specified key does not exist.", StatusCode::NOT_FOUND)
}
pub fn no_such_bucket() -> Self {
Self::new("NoSuchBucket", "The specified bucket does not exist", StatusCode::NOT_FOUND)
}
pub fn bucket_not_empty() -> Self {
Self::new("BucketNotEmpty", "The bucket you tried to delete is not empty", StatusCode::CONFLICT)
}
pub fn access_denied() -> Self {
Self::new("AccessDenied", "Access Denied", StatusCode::FORBIDDEN)
}
pub fn no_such_upload() -> Self {
Self::new("NoSuchUpload", "The specified upload does not exist", StatusCode::NOT_FOUND)
}
pub fn invalid_part_number() -> Self {
Self::new("InvalidPartNumber", "Part number must be between 1 and 10000", StatusCode::BAD_REQUEST)
}
pub fn internal_error(msg: &str) -> Self {
Self::new("InternalError", msg, StatusCode::INTERNAL_SERVER_ERROR)
}
pub fn invalid_request(msg: &str) -> Self {
Self::new("InvalidRequest", msg, StatusCode::BAD_REQUEST)
}
pub fn to_xml(&self) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Error><Code>{}</Code><Message>{}</Message></Error>",
self.code, self.message
)
}
pub fn to_response(&self, request_id: &str) -> Response<Full<Bytes>> {
let xml = self.to_xml();
Response::builder()
.status(self.status)
.header("content-type", "application/xml")
.header("x-amz-request-id", request_id)
.body(Full::new(Bytes::from(xml)))
.unwrap()
}
}

865
rust/src/server.rs Normal file
View File

@@ -0,0 +1,865 @@
use anyhow::Result;
use bytes::Bytes;
use futures_core::Stream;
use http_body_util::BodyExt;
use hyper::body::Incoming;
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper::{Method, Request, Response, StatusCode};
use hyper_util::rt::TokioIo;
use std::collections::HashMap;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use tokio::io::AsyncReadExt;
use tokio::net::TcpListener;
use tokio::sync::watch;
use tokio_util::io::ReaderStream;
use uuid::Uuid;
use crate::config::S3Config;
use crate::s3_error::S3Error;
use crate::storage::FileStore;
use crate::xml_response;
pub struct S3Server {
store: Arc<FileStore>,
config: S3Config,
shutdown_tx: watch::Sender<bool>,
server_handle: tokio::task::JoinHandle<()>,
}
impl S3Server {
pub async fn start(config: S3Config) -> Result<Self> {
let store = Arc::new(FileStore::new(config.storage.directory.clone().into()));
// Initialize or reset storage
if config.storage.clean_slate {
store.reset().await?;
} else {
store.initialize().await?;
}
let addr: SocketAddr = format!("{}:{}", config.address(), config.server.port)
.parse()?;
let listener = TcpListener::bind(addr).await?;
let (shutdown_tx, shutdown_rx) = watch::channel(false);
let server_store = store.clone();
let server_config = config.clone();
let server_handle = tokio::spawn(async move {
loop {
let mut rx = shutdown_rx.clone();
tokio::select! {
result = listener.accept() => {
match result {
Ok((stream, _remote_addr)) => {
let io = TokioIo::new(stream);
let store = server_store.clone();
let cfg = server_config.clone();
tokio::spawn(async move {
let svc = service_fn(move |req: Request<Incoming>| {
let store = store.clone();
let cfg = cfg.clone();
async move {
handle_request(req, store, cfg).await
}
});
if let Err(e) = http1::Builder::new()
.keep_alive(true)
.serve_connection(io, svc)
.await
{
if !e.is_incomplete_message() {
tracing::error!("Connection error: {}", e);
}
}
});
}
Err(e) => {
tracing::error!("Accept error: {}", e);
}
}
}
_ = rx.changed() => {
break;
}
}
}
});
if !config.server.silent {
tracing::info!("S3 server listening on {}", addr);
}
Ok(Self {
store,
config,
shutdown_tx,
server_handle,
})
}
pub async fn stop(self) {
let _ = self.shutdown_tx.send(true);
let _ = self.server_handle.await;
}
pub fn store(&self) -> &FileStore {
&self.store
}
}
impl S3Config {
fn address(&self) -> &str {
&self.server.address
}
}
// ============================
// Request handling
// ============================
type BoxBody = http_body_util::combinators::BoxBody<Bytes, Box<dyn std::error::Error + Send + Sync>>;
fn full_body(data: impl Into<Bytes>) -> BoxBody {
http_body_util::Full::new(data.into())
.map_err(|never: std::convert::Infallible| -> Box<dyn std::error::Error + Send + Sync> { match never {} })
.boxed()
}
fn empty_body() -> BoxBody {
http_body_util::Empty::new()
.map_err(|never: std::convert::Infallible| -> Box<dyn std::error::Error + Send + Sync> { match never {} })
.boxed()
}
fn stream_body(reader: tokio::fs::File, content_length: u64) -> BoxBody {
let stream = ReaderStream::with_capacity(reader.take(content_length), 64 * 1024);
let mapped = FrameStream { inner: stream };
http_body_util::StreamBody::new(mapped).boxed()
}
/// Adapter that converts ReaderStream into a Stream of Frame<Bytes>
struct FrameStream {
inner: ReaderStream<tokio::io::Take<tokio::fs::File>>,
}
impl Stream for FrameStream {
type Item = Result<hyper::body::Frame<Bytes>, Box<dyn std::error::Error + Send + Sync>>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let inner = unsafe { self.map_unchecked_mut(|s| &mut s.inner) };
match inner.poll_next(cx) {
Poll::Ready(Some(Ok(bytes))) => {
Poll::Ready(Some(Ok(hyper::body::Frame::data(bytes))))
}
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>))),
Poll::Ready(None) => Poll::Ready(None),
Poll::Pending => Poll::Pending,
}
}
}
fn xml_response(status: StatusCode, xml: String, request_id: &str) -> Response<BoxBody> {
Response::builder()
.status(status)
.header("content-type", "application/xml")
.header("x-amz-request-id", request_id)
.body(full_body(xml))
.unwrap()
}
fn empty_response(status: StatusCode, request_id: &str) -> Response<BoxBody> {
Response::builder()
.status(status)
.header("x-amz-request-id", request_id)
.body(empty_body())
.unwrap()
}
fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> {
let xml = err.to_xml();
Response::builder()
.status(err.status)
.header("content-type", "application/xml")
.header("x-amz-request-id", request_id)
.body(full_body(xml))
.unwrap()
}
async fn handle_request(
req: Request<Incoming>,
store: Arc<FileStore>,
config: S3Config,
) -> Result<Response<BoxBody>, std::convert::Infallible> {
let request_id = Uuid::new_v4().to_string();
let method = req.method().clone();
let uri = req.uri().clone();
let start = std::time::Instant::now();
// Handle CORS preflight
if config.cors.enabled && method == Method::OPTIONS {
let resp = build_cors_preflight(&config, &request_id);
return Ok(resp);
}
// Auth check
if config.auth.enabled {
if let Err(e) = check_auth(&req, &config) {
tracing::warn!("Auth failed: {}", e.message);
return Ok(s3_error_response(&e, &request_id));
}
}
// Route and handle
let mut response = match route_request(req, store, &config, &request_id).await {
Ok(resp) => resp,
Err(err) => {
if let Some(s3err) = err.downcast_ref::<S3Error>() {
s3_error_response(s3err, &request_id)
} else {
tracing::error!("Internal error: {}", err);
let s3err = S3Error::internal_error(&err.to_string());
s3_error_response(&s3err, &request_id)
}
}
};
// Add CORS headers if enabled
if config.cors.enabled {
add_cors_headers(response.headers_mut(), &config);
}
let duration = start.elapsed();
tracing::info!(
method = %method,
path = %uri.path(),
status = %response.status().as_u16(),
duration_ms = %duration.as_millis(),
"request"
);
Ok(response)
}
// ============================
// Routing
// ============================
async fn route_request(
req: Request<Incoming>,
store: Arc<FileStore>,
_config: &S3Config,
request_id: &str,
) -> Result<Response<BoxBody>> {
let method = req.method().clone();
let path = req.uri().path().to_string();
let query_string = req.uri().query().unwrap_or("").to_string();
let query = parse_query(&query_string);
// Parse path: /, /{bucket}, /{bucket}/{key...}
let segments: Vec<&str> = path
.trim_start_matches('/')
.splitn(2, '/')
.filter(|s| !s.is_empty())
.collect();
match segments.len() {
0 => {
// Root: GET / -> ListBuckets
match method {
Method::GET => handle_list_buckets(store, request_id).await,
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
}
}
1 => {
// Bucket level: /{bucket}
let bucket = percent_decode(segments[0]);
match method {
Method::GET => {
if query.contains_key("uploads") {
handle_list_multipart_uploads(store, &bucket, request_id).await
} else {
handle_list_objects(store, &bucket, &query, request_id).await
}
}
Method::PUT => handle_create_bucket(store, &bucket, request_id).await,
Method::DELETE => handle_delete_bucket(store, &bucket, request_id).await,
Method::HEAD => handle_head_bucket(store, &bucket, request_id).await,
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
}
}
2 => {
// Object level: /{bucket}/{key...}
let bucket = percent_decode(segments[0]);
let key = percent_decode(segments[1]);
match method {
Method::PUT => {
if query.contains_key("partNumber") && query.contains_key("uploadId") {
handle_upload_part(req, store, &query, request_id).await
} else if req.headers().contains_key("x-amz-copy-source") {
handle_copy_object(req, store, &bucket, &key, request_id).await
} else {
handle_put_object(req, store, &bucket, &key, request_id).await
}
}
Method::GET => {
handle_get_object(req, store, &bucket, &key, request_id).await
}
Method::HEAD => {
handle_head_object(store, &bucket, &key, request_id).await
}
Method::DELETE => {
if query.contains_key("uploadId") {
let upload_id = query.get("uploadId").unwrap();
handle_abort_multipart(store, upload_id, request_id).await
} else {
handle_delete_object(store, &bucket, &key, request_id).await
}
}
Method::POST => {
if query.contains_key("uploads") {
handle_initiate_multipart(req, store, &bucket, &key, request_id).await
} else if query.contains_key("uploadId") {
let upload_id = query.get("uploadId").unwrap().clone();
handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await
} else {
let err = S3Error::invalid_request("Invalid POST request");
Ok(s3_error_response(&err, request_id))
}
}
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
}
}
_ => Ok(empty_response(StatusCode::BAD_REQUEST, request_id)),
}
}
// ============================
// Handlers
// ============================
async fn handle_list_buckets(
store: Arc<FileStore>,
request_id: &str,
) -> Result<Response<BoxBody>> {
let buckets = store.list_buckets().await?;
let xml = xml_response::list_buckets_xml(&buckets);
Ok(xml_response(StatusCode::OK, xml, request_id))
}
async fn handle_create_bucket(
store: Arc<FileStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
store.create_bucket(bucket).await?;
Ok(empty_response(StatusCode::OK, request_id))
}
async fn handle_delete_bucket(
store: Arc<FileStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
store.delete_bucket(bucket).await?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
}
async fn handle_head_bucket(
store: Arc<FileStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
if store.bucket_exists(bucket).await {
Ok(empty_response(StatusCode::OK, request_id))
} else {
Err(S3Error::no_such_bucket().into())
}
}
async fn handle_list_objects(
store: Arc<FileStore>,
bucket: &str,
query: &HashMap<String, String>,
request_id: &str,
) -> Result<Response<BoxBody>> {
let prefix = query.get("prefix").map(|s| s.as_str()).unwrap_or("");
let delimiter = query.get("delimiter").map(|s| s.as_str()).unwrap_or("");
let max_keys = query
.get("max-keys")
.and_then(|s| s.parse().ok())
.unwrap_or(1000usize);
let continuation_token = query.get("continuation-token").map(|s| s.as_str());
let is_v2 = query.get("list-type").map(|s| s.as_str()) == Some("2");
let result = store
.list_objects(bucket, prefix, delimiter, max_keys, continuation_token)
.await?;
let xml = if is_v2 {
xml_response::list_objects_v2_xml(bucket, &result)
} else {
xml_response::list_objects_v1_xml(bucket, &result)
};
Ok(xml_response(StatusCode::OK, xml, request_id))
}
async fn handle_put_object(
req: Request<Incoming>,
store: Arc<FileStore>,
bucket: &str,
key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
let metadata = extract_metadata(req.headers());
let body = req.into_body();
let result = store.put_object(bucket, key, body, metadata).await?;
let resp = Response::builder()
.status(StatusCode::OK)
.header("ETag", format!("\"{}\"", result.md5))
.header("x-amz-request-id", request_id)
.body(empty_body())
.unwrap();
Ok(resp)
}
async fn handle_get_object(
req: Request<Incoming>,
store: Arc<FileStore>,
bucket: &str,
key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
// Parse Range header
let range = parse_range_header(req.headers());
let result = store.get_object(bucket, key, range).await?;
let content_type = result
.metadata
.get("content-type")
.cloned()
.unwrap_or_else(|| "binary/octet-stream".to_string());
let mut builder = Response::builder()
.header("ETag", format!("\"{}\"", result.md5))
.header("Last-Modified", result.last_modified.format("%a, %d %b %Y %H:%M:%S GMT").to_string())
.header("Content-Type", &content_type)
.header("Accept-Ranges", "bytes")
.header("x-amz-request-id", request_id);
// Add custom metadata headers
for (k, v) in &result.metadata {
if k.starts_with("x-amz-meta-") {
builder = builder.header(k.as_str(), v.as_str());
}
}
if let Some((start, end)) = range {
let content_length = end - start + 1;
let resp = builder
.status(StatusCode::PARTIAL_CONTENT)
.header("Content-Length", content_length.to_string())
.header(
"Content-Range",
format!("bytes {}-{}/{}", start, end, result.size),
)
.body(stream_body(result.body, content_length))
.unwrap();
Ok(resp)
} else {
let resp = builder
.status(StatusCode::OK)
.header("Content-Length", result.size.to_string())
.body(stream_body(result.body, result.content_length))
.unwrap();
Ok(resp)
}
}
async fn handle_head_object(
store: Arc<FileStore>,
bucket: &str,
key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
let result = store.head_object(bucket, key).await?;
let content_type = result
.metadata
.get("content-type")
.cloned()
.unwrap_or_else(|| "binary/octet-stream".to_string());
let mut builder = Response::builder()
.status(StatusCode::OK)
.header("ETag", format!("\"{}\"", result.md5))
.header("Last-Modified", result.last_modified.format("%a, %d %b %Y %H:%M:%S GMT").to_string())
.header("Content-Type", &content_type)
.header("Content-Length", result.size.to_string())
.header("Accept-Ranges", "bytes")
.header("x-amz-request-id", request_id);
for (k, v) in &result.metadata {
if k.starts_with("x-amz-meta-") {
builder = builder.header(k.as_str(), v.as_str());
}
}
Ok(builder.body(empty_body()).unwrap())
}
async fn handle_delete_object(
store: Arc<FileStore>,
bucket: &str,
key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
store.delete_object(bucket, key).await?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
}
async fn handle_copy_object(
req: Request<Incoming>,
store: Arc<FileStore>,
dest_bucket: &str,
dest_key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
let copy_source = req
.headers()
.get("x-amz-copy-source")
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
let metadata_directive = req
.headers()
.get("x-amz-metadata-directive")
.and_then(|v| v.to_str().ok())
.unwrap_or("COPY")
.to_uppercase();
// Parse source: /bucket/key or bucket/key
let source = copy_source.trim_start_matches('/');
let first_slash = source.find('/').unwrap_or(source.len());
let src_bucket = percent_decode(&source[..first_slash]);
let src_key = if first_slash < source.len() {
percent_decode(&source[first_slash + 1..])
} else {
String::new()
};
let new_metadata = if metadata_directive == "REPLACE" {
Some(extract_metadata(req.headers()))
} else {
None
};
let result = store
.copy_object(&src_bucket, &src_key, dest_bucket, dest_key, &metadata_directive, new_metadata)
.await?;
let xml = xml_response::copy_object_result_xml(&result.md5, &result.last_modified.to_rfc3339());
Ok(xml_response(StatusCode::OK, xml, request_id))
}
// ============================
// Multipart handlers
// ============================
async fn handle_initiate_multipart(
req: Request<Incoming>,
store: Arc<FileStore>,
bucket: &str,
key: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
let metadata = extract_metadata(req.headers());
let upload_id = store.initiate_multipart(bucket, key, metadata).await?;
let xml = xml_response::initiate_multipart_xml(bucket, key, &upload_id);
Ok(xml_response(StatusCode::OK, xml, request_id))
}
async fn handle_upload_part(
req: Request<Incoming>,
store: Arc<FileStore>,
query: &HashMap<String, String>,
request_id: &str,
) -> Result<Response<BoxBody>> {
let upload_id = query.get("uploadId").unwrap();
let part_number: u32 = query
.get("partNumber")
.and_then(|s| s.parse().ok())
.unwrap_or(0);
if part_number < 1 || part_number > 10000 {
return Err(S3Error::invalid_part_number().into());
}
let body = req.into_body();
let (etag, _size) = store.upload_part(upload_id, part_number, body).await?;
let resp = Response::builder()
.status(StatusCode::OK)
.header("ETag", format!("\"{}\"", etag))
.header("x-amz-request-id", request_id)
.body(empty_body())
.unwrap();
Ok(resp)
}
async fn handle_complete_multipart(
req: Request<Incoming>,
store: Arc<FileStore>,
bucket: &str,
key: &str,
upload_id: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
// Read request body (XML)
let body_bytes = req.collect().await.map_err(|e| anyhow::anyhow!("Body error: {}", e))?.to_bytes();
let body_str = String::from_utf8_lossy(&body_bytes);
// Parse parts from XML using regex-like approach
let parts = parse_complete_multipart_xml(&body_str);
let result = store.complete_multipart(upload_id, &parts).await?;
let xml = xml_response::complete_multipart_xml(bucket, key, &result.etag);
Ok(xml_response(StatusCode::OK, xml, request_id))
}
async fn handle_abort_multipart(
store: Arc<FileStore>,
upload_id: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
store.abort_multipart(upload_id).await?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
}
async fn handle_list_multipart_uploads(
store: Arc<FileStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
let uploads = store.list_multipart_uploads(bucket).await?;
let xml = xml_response::list_multipart_uploads_xml(bucket, &uploads);
Ok(xml_response(StatusCode::OK, xml, request_id))
}
// ============================
// Helpers
// ============================
fn parse_query(query_string: &str) -> HashMap<String, String> {
let mut map = HashMap::new();
if query_string.is_empty() {
return map;
}
for pair in query_string.split('&') {
let mut parts = pair.splitn(2, '=');
let key = parts.next().unwrap_or("");
let value = parts.next().unwrap_or("");
let key = percent_decode(key);
let value = percent_decode(value);
map.insert(key, value);
}
map
}
fn percent_decode(s: &str) -> String {
percent_encoding::percent_decode_str(s)
.decode_utf8_lossy()
.to_string()
}
fn extract_metadata(headers: &hyper::HeaderMap) -> HashMap<String, String> {
let mut metadata = HashMap::new();
for (name, value) in headers {
let name_str = name.as_str().to_lowercase();
if let Ok(val) = value.to_str() {
match name_str.as_str() {
"content-type" | "cache-control" | "content-disposition"
| "content-encoding" | "content-language" | "expires" => {
metadata.insert(name_str, val.to_string());
}
_ if name_str.starts_with("x-amz-meta-") => {
metadata.insert(name_str, val.to_string());
}
_ => {}
}
}
}
// Default content-type
if !metadata.contains_key("content-type") {
metadata.insert("content-type".to_string(), "binary/octet-stream".to_string());
}
metadata
}
fn parse_range_header(headers: &hyper::HeaderMap) -> Option<(u64, u64)> {
let range_val = headers.get("range")?.to_str().ok()?;
let bytes_prefix = "bytes=";
if !range_val.starts_with(bytes_prefix) {
return None;
}
let range_spec = &range_val[bytes_prefix.len()..];
let mut parts = range_spec.splitn(2, '-');
let start: u64 = parts.next()?.parse().ok()?;
let end_str = parts.next()?;
let end: u64 = if end_str.is_empty() {
// If no end specified, we'll handle this later based on file size
u64::MAX
} else {
end_str.parse().ok()?
};
Some((start, end))
}
fn parse_complete_multipart_xml(xml: &str) -> Vec<(u32, String)> {
let mut parts = Vec::new();
// Simple XML parsing for <Part><PartNumber>N</PartNumber><ETag>...</ETag></Part>
let mut remaining = xml;
while let Some(part_start) = remaining.find("<Part>") {
let after_part = &remaining[part_start + 6..];
if let Some(part_end) = after_part.find("</Part>") {
let part_content = &after_part[..part_end];
let part_number = extract_xml_value(part_content, "PartNumber")
.and_then(|s| s.parse::<u32>().ok());
let etag = extract_xml_value(part_content, "ETag")
.map(|s| s.replace('"', ""));
if let (Some(pn), Some(et)) = (part_number, etag) {
parts.push((pn, et));
}
remaining = &after_part[part_end + 7..];
} else {
break;
}
}
parts.sort_by_key(|(pn, _)| *pn);
parts
}
fn extract_xml_value<'a>(xml: &'a str, tag: &str) -> Option<String> {
let open = format!("<{}>", tag);
let close = format!("</{}>", tag);
let start = xml.find(&open)? + open.len();
let end = xml.find(&close)?;
Some(xml[start..end].to_string())
}
// ============================
// CORS
// ============================
fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody> {
let mut builder = Response::builder()
.status(StatusCode::NO_CONTENT)
.header("x-amz-request-id", request_id);
if let Some(ref origins) = config.cors.allowed_origins {
builder = builder.header("Access-Control-Allow-Origin", origins.join(", "));
}
if let Some(ref methods) = config.cors.allowed_methods {
builder = builder.header("Access-Control-Allow-Methods", methods.join(", "));
}
if let Some(ref headers) = config.cors.allowed_headers {
builder = builder.header("Access-Control-Allow-Headers", headers.join(", "));
}
if let Some(max_age) = config.cors.max_age {
builder = builder.header("Access-Control-Max-Age", max_age.to_string());
}
if config.cors.allow_credentials == Some(true) {
builder = builder.header("Access-Control-Allow-Credentials", "true");
}
builder.body(empty_body()).unwrap()
}
fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &S3Config) {
if let Some(ref origins) = config.cors.allowed_origins {
headers.insert(
"access-control-allow-origin",
origins.join(", ").parse().unwrap(),
);
}
if let Some(ref exposed) = config.cors.exposed_headers {
headers.insert(
"access-control-expose-headers",
exposed.join(", ").parse().unwrap(),
);
}
if config.cors.allow_credentials == Some(true) {
headers.insert(
"access-control-allow-credentials",
"true".parse().unwrap(),
);
}
}
// ============================
// Auth
// ============================
fn check_auth(req: &Request<Incoming>, config: &S3Config) -> Result<(), S3Error> {
let auth_header = req
.headers()
.get("authorization")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if auth_header.is_empty() {
return Err(S3Error::access_denied());
}
// Extract access key from AWS v2 or v4 signature
let access_key = if auth_header.starts_with("AWS4-HMAC-SHA256") {
// v4: AWS4-HMAC-SHA256 Credential=KEY/date/region/s3/aws4_request, ...
auth_header
.split("Credential=")
.nth(1)
.and_then(|s| s.split('/').next())
} else if auth_header.starts_with("AWS ") {
// v2: AWS KEY:signature
auth_header
.strip_prefix("AWS ")
.and_then(|s| s.split(':').next())
} else {
None
};
let access_key = access_key.unwrap_or("");
// Check against configured credentials
for cred in &config.auth.credentials {
if cred.access_key_id == access_key {
return Ok(());
}
}
Err(S3Error::access_denied())
}

885
rust/src/storage.rs Normal file
View File

@@ -0,0 +1,885 @@
use anyhow::Result;
use chrono::{DateTime, Utc};
use http_body_util::BodyExt;
use hyper::body::Incoming;
use md5::{Digest, Md5};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter};
use uuid::Uuid;
use crate::s3_error::S3Error;
// ============================
// Result types
// ============================
pub struct PutResult {
pub size: u64,
pub md5: String,
}
pub struct GetResult {
pub key: String,
pub size: u64,
pub last_modified: DateTime<Utc>,
pub md5: String,
pub metadata: HashMap<String, String>,
pub body: tokio::fs::File,
pub content_length: u64,
}
pub struct HeadResult {
pub key: String,
pub size: u64,
pub last_modified: DateTime<Utc>,
pub md5: String,
pub metadata: HashMap<String, String>,
}
pub struct CopyResult {
pub size: u64,
pub md5: String,
pub last_modified: DateTime<Utc>,
}
pub struct ListObjectEntry {
pub key: String,
pub size: u64,
pub last_modified: DateTime<Utc>,
pub md5: String,
}
pub struct ListObjectsResult {
pub contents: Vec<ListObjectEntry>,
pub common_prefixes: Vec<String>,
pub is_truncated: bool,
pub next_continuation_token: Option<String>,
pub prefix: String,
pub delimiter: String,
pub max_keys: usize,
}
pub struct BucketInfo {
pub name: String,
pub creation_date: DateTime<Utc>,
}
pub struct MultipartUploadInfo {
pub upload_id: String,
pub bucket: String,
pub key: String,
pub initiated: DateTime<Utc>,
}
pub struct CompleteMultipartResult {
pub etag: String,
pub size: u64,
}
// ============================
// Multipart metadata (disk format, compatible with TS)
// ============================
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct MultipartMetadata {
upload_id: String,
bucket: String,
key: String,
initiated: String,
metadata: HashMap<String, String>,
parts: Vec<PartMetadata>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PartMetadata {
part_number: u32,
etag: String,
size: u64,
last_modified: String,
}
// ============================
// FileStore
// ============================
pub struct FileStore {
root_dir: PathBuf,
}
impl FileStore {
pub fn new(root_dir: PathBuf) -> Self {
Self { root_dir }
}
pub async fn initialize(&self) -> Result<()> {
fs::create_dir_all(&self.root_dir).await?;
Ok(())
}
pub async fn reset(&self) -> Result<()> {
if self.root_dir.exists() {
fs::remove_dir_all(&self.root_dir).await?;
}
fs::create_dir_all(&self.root_dir).await?;
Ok(())
}
// ============================
// Bucket operations
// ============================
pub async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
let mut buckets = Vec::new();
let mut entries = fs::read_dir(&self.root_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let meta = entry.metadata().await?;
if meta.is_dir() {
let name = entry.file_name().to_string_lossy().to_string();
// Skip hidden dirs like .multipart
if name.starts_with('.') {
continue;
}
let creation_date: DateTime<Utc> = meta
.created()
.unwrap_or(meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH))
.into();
buckets.push(BucketInfo {
name,
creation_date,
});
}
}
buckets.sort_by(|a, b| a.name.cmp(&b.name));
Ok(buckets)
}
pub async fn bucket_exists(&self, bucket: &str) -> bool {
self.root_dir.join(bucket).is_dir()
}
pub async fn create_bucket(&self, bucket: &str) -> Result<()> {
let bucket_path = self.root_dir.join(bucket);
fs::create_dir_all(&bucket_path).await?;
Ok(())
}
pub async fn delete_bucket(&self, bucket: &str) -> Result<()> {
let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into());
}
// Check if bucket is empty (ignore hidden files)
let mut entries = fs::read_dir(&bucket_path).await?;
while let Some(_entry) = entries.next_entry().await? {
return Err(S3Error::bucket_not_empty().into());
}
fs::remove_dir_all(&bucket_path).await?;
Ok(())
}
// ============================
// Object operations
// ============================
pub async fn put_object(
&self,
bucket: &str,
key: &str,
body: Incoming,
metadata: HashMap<String, String>,
) -> Result<PutResult> {
if !self.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into());
}
let object_path = self.object_path(bucket, key);
if let Some(parent) = object_path.parent() {
fs::create_dir_all(parent).await?;
}
let file = fs::File::create(&object_path).await?;
let mut writer = BufWriter::new(file);
let mut hasher = Md5::new();
let mut total_size: u64 = 0;
// Stream body frames directly to file
let mut body = body;
loop {
match body.frame().await {
Some(Ok(frame)) => {
if let Ok(data) = frame.into_data() {
hasher.update(&data);
total_size += data.len() as u64;
writer.write_all(&data).await?;
}
}
Some(Err(e)) => {
return Err(anyhow::anyhow!("Body read error: {}", e));
}
None => break,
}
}
writer.flush().await?;
drop(writer);
let md5_hex = format!("{:x}", hasher.finalize());
// Write MD5 sidecar
let md5_path = format!("{}.md5", object_path.display());
fs::write(&md5_path, &md5_hex).await?;
// Write metadata sidecar
let metadata_path = format!("{}.metadata.json", object_path.display());
let metadata_json = serde_json::to_string_pretty(&metadata)?;
fs::write(&metadata_path, metadata_json).await?;
Ok(PutResult {
size: total_size,
md5: md5_hex,
})
}
pub async fn put_object_bytes(
&self,
bucket: &str,
key: &str,
data: &[u8],
metadata: HashMap<String, String>,
) -> Result<PutResult> {
if !self.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into());
}
let object_path = self.object_path(bucket, key);
if let Some(parent) = object_path.parent() {
fs::create_dir_all(parent).await?;
}
let mut hasher = Md5::new();
hasher.update(data);
let md5_hex = format!("{:x}", hasher.finalize());
fs::write(&object_path, data).await?;
// Write MD5 sidecar
let md5_path = format!("{}.md5", object_path.display());
fs::write(&md5_path, &md5_hex).await?;
// Write metadata sidecar
let metadata_path = format!("{}.metadata.json", object_path.display());
let metadata_json = serde_json::to_string_pretty(&metadata)?;
fs::write(&metadata_path, metadata_json).await?;
Ok(PutResult {
size: data.len() as u64,
md5: md5_hex,
})
}
pub async fn get_object(
&self,
bucket: &str,
key: &str,
range: Option<(u64, u64)>,
) -> Result<GetResult> {
let object_path = self.object_path(bucket, key);
if !object_path.exists() {
return Err(S3Error::no_such_key().into());
}
let file_meta = fs::metadata(&object_path).await?;
let size = file_meta.len();
let last_modified: DateTime<Utc> = file_meta.modified()?.into();
let md5 = self.read_md5(&object_path).await;
let metadata = self.read_metadata(&object_path).await;
let mut file = fs::File::open(&object_path).await?;
let content_length = if let Some((start, end)) = range {
file.seek(std::io::SeekFrom::Start(start)).await?;
end - start + 1
} else {
size
};
Ok(GetResult {
key: key.to_string(),
size,
last_modified,
md5,
metadata,
body: file,
content_length,
})
}
pub async fn head_object(&self, bucket: &str, key: &str) -> Result<HeadResult> {
let object_path = self.object_path(bucket, key);
if !object_path.exists() {
return Err(S3Error::no_such_key().into());
}
// Only stat the file, don't open it
let file_meta = fs::metadata(&object_path).await?;
let size = file_meta.len();
let last_modified: DateTime<Utc> = file_meta.modified()?.into();
let md5 = self.read_md5(&object_path).await;
let metadata = self.read_metadata(&object_path).await;
Ok(HeadResult {
key: key.to_string(),
size,
last_modified,
md5,
metadata,
})
}
pub async fn delete_object(&self, bucket: &str, key: &str) -> Result<()> {
let object_path = self.object_path(bucket, key);
let md5_path = format!("{}.md5", object_path.display());
let metadata_path = format!("{}.metadata.json", object_path.display());
// S3 doesn't error if object doesn't exist
let _ = fs::remove_file(&object_path).await;
let _ = fs::remove_file(&md5_path).await;
let _ = fs::remove_file(&metadata_path).await;
// Clean up empty parent directories up to bucket level
let bucket_path = self.root_dir.join(bucket);
let mut current = object_path.parent().map(|p| p.to_path_buf());
while let Some(dir) = current {
if dir == bucket_path {
break;
}
if fs::read_dir(&dir).await.is_ok() {
let mut entries = fs::read_dir(&dir).await?;
if entries.next_entry().await?.is_none() {
let _ = fs::remove_dir(&dir).await;
} else {
break;
}
}
current = dir.parent().map(|p| p.to_path_buf());
}
Ok(())
}
pub async fn copy_object(
&self,
src_bucket: &str,
src_key: &str,
dest_bucket: &str,
dest_key: &str,
metadata_directive: &str,
new_metadata: Option<HashMap<String, String>>,
) -> Result<CopyResult> {
let src_path = self.object_path(src_bucket, src_key);
let dest_path = self.object_path(dest_bucket, dest_key);
if !src_path.exists() {
return Err(S3Error::no_such_key().into());
}
if !self.bucket_exists(dest_bucket).await {
return Err(S3Error::no_such_bucket().into());
}
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent).await?;
}
// Copy object file
fs::copy(&src_path, &dest_path).await?;
// Handle metadata
if metadata_directive == "COPY" {
let src_meta_path = format!("{}.metadata.json", src_path.display());
let dest_meta_path = format!("{}.metadata.json", dest_path.display());
let _ = fs::copy(&src_meta_path, &dest_meta_path).await;
} else if let Some(meta) = new_metadata {
let dest_meta_path = format!("{}.metadata.json", dest_path.display());
let json = serde_json::to_string_pretty(&meta)?;
fs::write(&dest_meta_path, json).await?;
}
// Copy MD5
let src_md5_path = format!("{}.md5", src_path.display());
let dest_md5_path = format!("{}.md5", dest_path.display());
let _ = fs::copy(&src_md5_path, &dest_md5_path).await;
let file_meta = fs::metadata(&dest_path).await?;
let md5 = self.read_md5(&dest_path).await;
let last_modified: DateTime<Utc> = file_meta.modified()?.into();
Ok(CopyResult {
size: file_meta.len(),
md5,
last_modified,
})
}
pub async fn list_objects(
&self,
bucket: &str,
prefix: &str,
delimiter: &str,
max_keys: usize,
continuation_token: Option<&str>,
) -> Result<ListObjectsResult> {
let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into());
}
// Collect all object keys recursively
let mut keys = Vec::new();
self.collect_keys(&bucket_path, &bucket_path, &mut keys)
.await?;
// Apply prefix filter
if !prefix.is_empty() {
keys.retain(|k| k.starts_with(prefix));
}
keys.sort();
// Handle continuation token
if let Some(token) = continuation_token {
if let Some(pos) = keys.iter().position(|k| k.as_str() > token) {
keys = keys[pos..].to_vec();
} else {
keys.clear();
}
}
// Handle delimiter and pagination
let mut common_prefixes: Vec<String> = Vec::new();
let mut common_prefix_set = std::collections::HashSet::new();
let mut contents: Vec<ListObjectEntry> = Vec::new();
let mut is_truncated = false;
for key in &keys {
if !delimiter.is_empty() {
let remaining = &key[prefix.len()..];
if let Some(delim_idx) = remaining.find(delimiter) {
let cp = format!(
"{}{}",
prefix,
&remaining[..delim_idx + delimiter.len()]
);
if common_prefix_set.insert(cp.clone()) {
common_prefixes.push(cp);
}
continue;
}
}
if contents.len() >= max_keys {
is_truncated = true;
break;
}
let object_path = self.object_path(bucket, key);
if let Ok(meta) = fs::metadata(&object_path).await {
let md5 = self.read_md5(&object_path).await;
let last_modified: DateTime<Utc> = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH).into();
contents.push(ListObjectEntry {
key: key.clone(),
size: meta.len(),
last_modified,
md5,
});
}
}
let next_continuation_token = if is_truncated {
contents.last().map(|e| e.key.clone())
} else {
None
};
common_prefixes.sort();
Ok(ListObjectsResult {
contents,
common_prefixes,
is_truncated,
next_continuation_token,
prefix: prefix.to_string(),
delimiter: delimiter.to_string(),
max_keys,
})
}
// ============================
// Multipart operations
// ============================
fn multipart_dir(&self) -> PathBuf {
self.root_dir.join(".multipart")
}
pub async fn initiate_multipart(
&self,
bucket: &str,
key: &str,
metadata: HashMap<String, String>,
) -> Result<String> {
let upload_id = Uuid::new_v4().to_string().replace('-', "");
let upload_dir = self.multipart_dir().join(&upload_id);
fs::create_dir_all(&upload_dir).await?;
let meta = MultipartMetadata {
upload_id: upload_id.clone(),
bucket: bucket.to_string(),
key: key.to_string(),
initiated: Utc::now().to_rfc3339(),
metadata,
parts: Vec::new(),
};
let meta_path = upload_dir.join("metadata.json");
let json = serde_json::to_string_pretty(&meta)?;
fs::write(&meta_path, json).await?;
Ok(upload_id)
}
pub async fn upload_part(
&self,
upload_id: &str,
part_number: u32,
body: Incoming,
) -> Result<(String, u64)> {
let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into());
}
let part_path = upload_dir.join(format!("part-{}", part_number));
let file = fs::File::create(&part_path).await?;
let mut writer = BufWriter::new(file);
let mut hasher = Md5::new();
let mut size: u64 = 0;
let mut body = body;
loop {
match body.frame().await {
Some(Ok(frame)) => {
if let Ok(data) = frame.into_data() {
hasher.update(&data);
size += data.len() as u64;
writer.write_all(&data).await?;
}
}
Some(Err(e)) => {
return Err(anyhow::anyhow!("Body read error: {}", e));
}
None => break,
}
}
writer.flush().await?;
drop(writer);
let etag = format!("{:x}", hasher.finalize());
// Update metadata
self.update_multipart_metadata(upload_id, part_number, &etag, size)
.await?;
Ok((etag, size))
}
async fn update_multipart_metadata(
&self,
upload_id: &str,
part_number: u32,
etag: &str,
size: u64,
) -> Result<()> {
let meta_path = self.multipart_dir().join(upload_id).join("metadata.json");
let content = fs::read_to_string(&meta_path).await?;
let mut meta: MultipartMetadata = serde_json::from_str(&content)?;
// Remove existing part with same number
meta.parts.retain(|p| p.part_number != part_number);
meta.parts.push(PartMetadata {
part_number,
etag: etag.to_string(),
size,
last_modified: Utc::now().to_rfc3339(),
});
meta.parts.sort_by_key(|p| p.part_number);
let json = serde_json::to_string_pretty(&meta)?;
fs::write(&meta_path, json).await?;
Ok(())
}
pub async fn complete_multipart(
&self,
upload_id: &str,
parts: &[(u32, String)],
) -> Result<CompleteMultipartResult> {
let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into());
}
// Read metadata to get bucket/key
let meta_path = upload_dir.join("metadata.json");
let content = fs::read_to_string(&meta_path).await?;
let meta: MultipartMetadata = serde_json::from_str(&content)?;
let object_path = self.object_path(&meta.bucket, &meta.key);
if let Some(parent) = object_path.parent() {
fs::create_dir_all(parent).await?;
}
// Concatenate parts into final object, stream each part
let dest_file = fs::File::create(&object_path).await?;
let mut writer = BufWriter::new(dest_file);
let mut hasher = Md5::new();
let mut total_size: u64 = 0;
for (part_number, _etag) in parts {
let part_path = upload_dir.join(format!("part-{}", part_number));
if !part_path.exists() {
return Err(anyhow::anyhow!("Part {} not found", part_number));
}
let mut part_file = fs::File::open(&part_path).await?;
let mut buf = vec![0u8; 64 * 1024]; // 64KB buffer
loop {
let n = part_file.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
writer.write_all(&buf[..n]).await?;
total_size += n as u64;
}
}
writer.flush().await?;
drop(writer);
let etag = format!("{:x}", hasher.finalize());
// Write MD5 sidecar
let md5_path = format!("{}.md5", object_path.display());
fs::write(&md5_path, &etag).await?;
// Write metadata sidecar
let metadata_path = format!("{}.metadata.json", object_path.display());
let metadata_json = serde_json::to_string_pretty(&meta.metadata)?;
fs::write(&metadata_path, metadata_json).await?;
// Clean up multipart directory
let _ = fs::remove_dir_all(&upload_dir).await;
Ok(CompleteMultipartResult {
etag,
size: total_size,
})
}
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into());
}
fs::remove_dir_all(&upload_dir).await?;
Ok(())
}
pub async fn list_multipart_uploads(
&self,
bucket: &str,
) -> Result<Vec<MultipartUploadInfo>> {
let multipart_dir = self.multipart_dir();
if !multipart_dir.is_dir() {
return Ok(Vec::new());
}
let mut uploads = Vec::new();
let mut entries = fs::read_dir(&multipart_dir).await?;
while let Some(entry) = entries.next_entry().await? {
if !entry.metadata().await?.is_dir() {
continue;
}
let meta_path = entry.path().join("metadata.json");
if let Ok(content) = fs::read_to_string(&meta_path).await {
if let Ok(meta) = serde_json::from_str::<MultipartMetadata>(&content) {
if meta.bucket == bucket {
let initiated = DateTime::parse_from_rfc3339(&meta.initiated)
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or_else(|_| Utc::now());
uploads.push(MultipartUploadInfo {
upload_id: meta.upload_id,
bucket: meta.bucket,
key: meta.key,
initiated,
});
}
}
}
}
Ok(uploads)
}
// ============================
// Helpers
// ============================
fn object_path(&self, bucket: &str, key: &str) -> PathBuf {
let encoded = encode_key(key);
self.root_dir
.join(bucket)
.join(format!("{}._S3_object", encoded))
}
async fn read_md5(&self, object_path: &Path) -> String {
let md5_path = format!("{}.md5", object_path.display());
match fs::read_to_string(&md5_path).await {
Ok(s) => s.trim().to_string(),
Err(_) => {
// Calculate MD5 if sidecar missing
match self.calculate_md5(object_path).await {
Ok(hash) => {
let _ = fs::write(&md5_path, &hash).await;
hash
}
Err(_) => String::new(),
}
}
}
}
async fn calculate_md5(&self, path: &Path) -> Result<String> {
let mut file = fs::File::open(path).await?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = file.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
}
async fn read_metadata(&self, object_path: &Path) -> HashMap<String, String> {
let meta_path = format!("{}.metadata.json", object_path.display());
match fs::read_to_string(&meta_path).await {
Ok(s) => serde_json::from_str(&s).unwrap_or_default(),
Err(_) => HashMap::new(),
}
}
fn collect_keys<'a>(
&'a self,
bucket_path: &'a Path,
dir: &'a Path,
keys: &'a mut Vec<String>,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
Box::pin(async move {
let mut entries = match fs::read_dir(dir).await {
Ok(e) => e,
Err(_) => return Ok(()),
};
while let Some(entry) = entries.next_entry().await? {
let meta = entry.metadata().await?;
let name = entry.file_name().to_string_lossy().to_string();
if meta.is_dir() {
self.collect_keys(bucket_path, &entry.path(), keys).await?;
} else if name.ends_with("._S3_object")
&& !name.ends_with(".metadata.json")
&& !name.ends_with(".md5")
{
let relative = entry
.path()
.strip_prefix(bucket_path)
.unwrap_or(Path::new(""))
.to_string_lossy()
.to_string();
let key = decode_key(relative.trim_end_matches("._S3_object"));
keys.push(key);
}
}
Ok(())
})
}
}
// ============================
// Key encoding (identity on Linux)
// ============================
fn encode_key(key: &str) -> String {
if cfg!(windows) {
key.chars()
.map(|c| match c {
'<' | '>' | ':' | '"' | '\\' | '|' | '?' | '*' => {
format!("&{:02x}", c as u32)
}
_ => c.to_string(),
})
.collect()
} else {
key.to_string()
}
}
fn decode_key(encoded: &str) -> String {
if cfg!(windows) {
let mut result = String::new();
let mut chars = encoded.chars();
while let Some(c) = chars.next() {
if c == '&' {
let hex: String = chars.by_ref().take(2).collect();
if let Ok(byte) = u8::from_str_radix(&hex, 16) {
result.push(byte as char);
} else {
result.push('&');
result.push_str(&hex);
}
} else {
result.push(c);
}
}
result
} else {
encoded.to_string()
}
}

220
rust/src/xml_response.rs Normal file
View File

@@ -0,0 +1,220 @@
use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo};
const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
const S3_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/";
fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}
pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String {
let mut xml = format!(
"{}\n<ListAllMyBucketsResult xmlns=\"{}\">\
<Owner><ID>123456789000</ID><DisplayName>S3rver</DisplayName></Owner>\
<Buckets>",
XML_DECL, S3_NS
);
for b in buckets {
xml.push_str(&format!(
"<Bucket><Name>{}</Name><CreationDate>{}</CreationDate></Bucket>",
xml_escape(&b.name),
b.creation_date.to_rfc3339()
));
}
xml.push_str("</Buckets></ListAllMyBucketsResult>");
xml
}
pub fn list_objects_v1_xml(bucket: &str, result: &ListObjectsResult) -> String {
let mut xml = format!(
"{}\n<ListBucketResult xmlns=\"{}\">\
<Name>{}</Name>\
<Prefix>{}</Prefix>\
<MaxKeys>{}</MaxKeys>\
<IsTruncated>{}</IsTruncated>",
XML_DECL,
S3_NS,
xml_escape(bucket),
xml_escape(&result.prefix),
result.max_keys,
result.is_truncated
);
if !result.delimiter.is_empty() {
xml.push_str(&format!("<Delimiter>{}</Delimiter>", xml_escape(&result.delimiter)));
}
for entry in &result.contents {
xml.push_str(&format!(
"<Contents>\
<Key>{}</Key>\
<LastModified>{}</LastModified>\
<ETag>\"{}\"</ETag>\
<Size>{}</Size>\
<StorageClass>STANDARD</StorageClass>\
</Contents>",
xml_escape(&entry.key),
entry.last_modified.to_rfc3339(),
xml_escape(&entry.md5),
entry.size
));
}
for cp in &result.common_prefixes {
xml.push_str(&format!(
"<CommonPrefixes><Prefix>{}</Prefix></CommonPrefixes>",
xml_escape(cp)
));
}
xml.push_str("</ListBucketResult>");
xml
}
pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
let mut xml = format!(
"{}\n<ListBucketResult xmlns=\"{}\">\
<Name>{}</Name>\
<Prefix>{}</Prefix>\
<MaxKeys>{}</MaxKeys>\
<KeyCount>{}</KeyCount>\
<IsTruncated>{}</IsTruncated>",
XML_DECL,
S3_NS,
xml_escape(bucket),
xml_escape(&result.prefix),
result.max_keys,
result.contents.len(),
result.is_truncated
);
if !result.delimiter.is_empty() {
xml.push_str(&format!("<Delimiter>{}</Delimiter>", xml_escape(&result.delimiter)));
}
if let Some(ref token) = result.next_continuation_token {
xml.push_str(&format!(
"<NextContinuationToken>{}</NextContinuationToken>",
xml_escape(token)
));
}
for entry in &result.contents {
xml.push_str(&format!(
"<Contents>\
<Key>{}</Key>\
<LastModified>{}</LastModified>\
<ETag>\"{}\"</ETag>\
<Size>{}</Size>\
<StorageClass>STANDARD</StorageClass>\
</Contents>",
xml_escape(&entry.key),
entry.last_modified.to_rfc3339(),
xml_escape(&entry.md5),
entry.size
));
}
for cp in &result.common_prefixes {
xml.push_str(&format!(
"<CommonPrefixes><Prefix>{}</Prefix></CommonPrefixes>",
xml_escape(cp)
));
}
xml.push_str("</ListBucketResult>");
xml
}
pub fn error_xml(code: &str, message: &str) -> String {
format!(
"{}\n<Error><Code>{}</Code><Message>{}</Message></Error>",
XML_DECL,
xml_escape(code),
xml_escape(message)
)
}
pub fn copy_object_result_xml(etag: &str, last_modified: &str) -> String {
format!(
"{}\n<CopyObjectResult>\
<LastModified>{}</LastModified>\
<ETag>\"{}\"</ETag>\
</CopyObjectResult>",
XML_DECL,
xml_escape(last_modified),
xml_escape(etag)
)
}
pub fn initiate_multipart_xml(bucket: &str, key: &str, upload_id: &str) -> String {
format!(
"{}\n<InitiateMultipartUploadResult xmlns=\"{}\">\
<Bucket>{}</Bucket>\
<Key>{}</Key>\
<UploadId>{}</UploadId>\
</InitiateMultipartUploadResult>",
XML_DECL,
S3_NS,
xml_escape(bucket),
xml_escape(key),
xml_escape(upload_id)
)
}
pub fn complete_multipart_xml(bucket: &str, key: &str, etag: &str) -> String {
format!(
"{}\n<CompleteMultipartUploadResult xmlns=\"{}\">\
<Location>/{}/{}</Location>\
<Bucket>{}</Bucket>\
<Key>{}</Key>\
<ETag>\"{}\"</ETag>\
</CompleteMultipartUploadResult>",
XML_DECL,
S3_NS,
xml_escape(bucket),
xml_escape(key),
xml_escape(bucket),
xml_escape(key),
xml_escape(etag)
)
}
pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo]) -> String {
let mut xml = format!(
"{}\n<ListMultipartUploadsResult xmlns=\"{}\">\
<Bucket>{}</Bucket>\
<KeyMarker></KeyMarker>\
<UploadIdMarker></UploadIdMarker>\
<MaxUploads>1000</MaxUploads>\
<IsTruncated>false</IsTruncated>",
XML_DECL,
S3_NS,
xml_escape(bucket)
);
for u in uploads {
xml.push_str(&format!(
"<Upload>\
<Key>{}</Key>\
<UploadId>{}</UploadId>\
<Initiator><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Initiator>\
<Owner><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Owner>\
<StorageClass>STANDARD</StorageClass>\
<Initiated>{}</Initiated>\
</Upload>",
xml_escape(&u.key),
xml_escape(&u.upload_id),
u.initiated.to_rfc3339()
));
}
xml.push_str("</ListMultipartUploadsResult>");
xml
}