Pragmastat / Rust
Copy-pastable implementation:
//! Statistical estimators for one-sample and two-sample analysis
/// Calculates the median of a sorted slice
fn median_sorted(sorted: &[f64]) -> Result<f64, &'static str> {
let n = sorted.len();
if n == 0 {
return Err("Input slice cannot be empty");
}
if n % 2 == 0 {
Ok((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
} else {
Ok(sorted[n / 2])
}
}
/// Calculates the median of a slice
fn median(values: &[f64]) -> Result<f64, &'static str> {
if values.is_empty() {
return Err("Input slice cannot be empty");
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
median_sorted(&sorted)
}
/// Estimates the central value of the data (Center)
///
/// Calculates the median of all pairwise averages (x[i] + x[j])/2.
/// More robust than the mean and more efficient than the median.
pub fn center(x: &[f64]) -> Result<f64, &'static str> {
let n = x.len();
if n == 0 {
return Err("Input slice cannot be empty");
}
let mut pairwise_averages = Vec::new();
for i in 0..n {
for j in i..n {
pairwise_averages.push((x[i] + x[j]) / 2.0);
}
}
median(&pairwise_averages)
}
/// Estimates data dispersion (Spread)
///
/// Calculates the median of all pairwise absolute differences |x[i] - x[j]|.
/// More robust than standard deviation and more efficient than MAD.
pub fn spread(x: &[f64]) -> Result<f64, &'static str> {
let n = x.len();
if n == 0 {
return Err("Input slice cannot be empty");
}
if n == 1 {
return Ok(0.0);
}
let mut pairwise_diffs = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
pairwise_diffs.push((x[i] - x[j]).abs());
}
}
median(&pairwise_diffs)
}
/// Measures the relative dispersion of a sample (Volatility)
///
/// Calculates the ratio of Spread to absolute Center.
/// Robust alternative to the coefficient of variation.
pub fn volatility(x: &[f64]) -> Result<f64, &'static str> {
let center_val = center(x)?;
if center_val == 0.0 {
return Err("Volatility is undefined when Center equals zero");
}
let spread_val = spread(x)?;
Ok(spread_val / center_val.abs())
}
/// Measures precision: the distance between two estimations of independent random samples (Precision)
///
/// Calculated as 2 * Spread / sqrt(n). The interval center ± precision forms a range
/// that probably contains the true center value.
pub fn precision(x: &[f64]) -> Result<f64, &'static str> {
let n = x.len();
if n == 0 {
return Err("Input slice cannot be empty");
}
let spread_val = spread(x)?;
Ok(2.0 * spread_val / (n as f64).sqrt())
}
/// Measures the typical difference between elements of x and y (MedShift)
///
/// Calculates the median of all pairwise differences (x[i] - y[j]).
/// Positive values mean x is typically larger, negative means y is typically larger.
pub fn med_shift(x: &[f64], y: &[f64]) -> Result<f64, &'static str> {
if x.is_empty() || y.is_empty() {
return Err("Input slices cannot be empty");
}
let mut pairwise_shifts = Vec::new();
for &xi in x {
for &yj in y {
pairwise_shifts.push(xi - yj);
}
}
median(&pairwise_shifts)
}
/// Measures how many times larger x is compared to y (MedRatio)
///
/// Calculates the median of all pairwise ratios (x[i] / y[j]).
/// For example, med_ratio = 1.2 means x is typically 20% larger than y.
pub fn med_ratio(x: &[f64], y: &[f64]) -> Result<f64, &'static str> {
if x.is_empty() || y.is_empty() {
return Err("Input slices cannot be empty");
}
// Check that all y values are strictly positive
if y.iter().any(|&val| val <= 0.0) {
return Err("All values in y must be strictly positive");
}
let mut pairwise_ratios = Vec::new();
for &xi in x {
for &yj in y {
pairwise_ratios.push(xi / yj);
}
}
median(&pairwise_ratios)
}
/// Measures the typical variability when considering both samples together (MedSpread)
///
/// Computes the weighted average of individual spreads: (n*Spread(x) + m*Spread(y))/(n+m).
pub fn med_spread(x: &[f64], y: &[f64]) -> Result<f64, &'static str> {
if x.is_empty() || y.is_empty() {
return Err("Input slices cannot be empty");
}
let n = x.len();
let m = y.len();
let spread_x = spread(x)?;
let spread_y = spread(y)?;
Ok((n as f64 * spread_x + m as f64 * spread_y) / (n + m) as f64)
}
/// Measures effect size: a normalized absolute difference between x and y (MedDisparity)
///
/// Calculated as MedShift / MedSpread. Robust alternative to Cohen's d.
/// Returns infinity if med_spread is zero.
pub fn med_disparity(x: &[f64], y: &[f64]) -> Result<f64, &'static str> {
let med_shift_val = med_shift(x, y)?;
let med_spread_val = med_spread(x, y)?;
if med_spread_val == 0.0 {
return Ok(f64::INFINITY);
}
Ok(med_shift_val / med_spread_val)
}