hakmem/core/hakmem_p2.c

// hakmem_p2.c - P² Algorithm Implementation
// Reference: Jain & Chlamtac (1985)
//
// License: MIT
// Date: 2025-10-21

#include "hakmem_p2.h"
#include <math.h>
#include <stdio.h>

// ============================================================================
// Helper Functions
// ============================================================================

// Parabolic prediction for marker adjustment
static double parabolic(double q_prev, double q_curr, double q_next,
                       int n_prev, int n_curr, int n_next, int d) {
    double dn = (double)d;
    double d1 = (double)(n_curr - n_prev);
    double d2 = (double)(n_next - n_curr);
    double d12 = (double)(n_next - n_prev);

    double term1 = (d1 + dn) * (q_next - q_curr) / d2;
    double term2 = (d2 - dn) * (q_curr - q_prev) / d1;

    return q_curr + (dn / d12) * (term1 + term2);
}

// Linear prediction (fallback if parabolic overshoots)
static double linear(double q_i, double q_adj, int n_i, int n_adj, int d) {
    return q_i + (double)d * (q_adj - q_i) / (double)(n_adj - n_i);
}

// Bubble sort for initial 5 samples
static void sort5(double* arr) {
    for (int i = 0; i < 4; i++) {
        for (int j = i + 1; j < 5; j++) {
            if (arr[i] > arr[j]) {
                double tmp = arr[i];
                arr[i] = arr[j];
                arr[j] = tmp;
            }
        }
    }
}

// ============================================================================
// Public API Implementation
// ============================================================================

void hak_p2_init(hak_p2_t* p2, double target_p) {
    p2->count = 0;
    p2->target_p = target_p;

    // Initialize markers (will be set after first 5 samples)
    for (int i = 0; i < 5; i++) {
        p2->q[i] = 0.0;
        p2->n[i] = i + 1;  // Positions: 1, 2, 3, 4, 5
        p2->dn[i] = 0.0;
    }
}

void hak_p2_add(hak_p2_t* p2, double value) {
    p2->count++;

    // Phase 1: Collect first 5 samples
    if (p2->count <= 5) {
        p2->q[p2->count - 1] = value;

        if (p2->count == 5) {
            // Sort initial samples
            sort5(p2->q);

            // Initialize positions
            for (int i = 0; i < 5; i++) {
                p2->n[i] = i + 1;
            }

            // Calculate desired position increments
            // Markers: 0=min(p=0), 1=p25, 2=p50, 3=p75, 4=target_p
            double p_values[5] = {0.0, 0.25, 0.50, 0.75, p2->target_p};
            for (int i = 0; i < 5; i++) {
                p2->dn[i] = p_values[i];
            }
        }
        return;
    }

    // Phase 2: Update markers (count >= 6)

    // Step 1: Find cell k such that q[k] <= value < q[k+1]
    int k;
    if (value < p2->q[0]) {
        k = 0;
        p2->q[0] = value;  // Update min
    } else if (value >= p2->q[4]) {
        k = 3;
        p2->q[4] = value;  // Update max
    } else {
        for (k = 0; k < 4; k++) {
            if (p2->q[k] <= value && value < p2->q[k + 1]) {
                break;
            }
        }
    }

    // Step 2: Increment positions for markers k+1 to 4
    for (int i = k + 1; i < 5; i++) {
        p2->n[i]++;
    }

    // Step 3: Update desired positions
    p2->dn[0] = 0.0;  // min doesn't move
    for (int i = 1; i < 5; i++) {
        double p_i = (i == 4) ? p2->target_p : (i * 0.25);
        p2->dn[i] = 1.0 + (double)(p2->count - 1) * p_i;
    }

    // Step 4: Adjust marker heights
    for (int i = 1; i < 4; i++) {  // Skip min(0) and max(4)
        double d_val = p2->dn[i] - (double)p2->n[i];
        int d = (d_val >= 1.0) ? 1 : ((d_val <= -1.0) ? -1 : 0);

        if (d != 0) {
            // Check if adjustment is within bounds
            if ((d == 1 && p2->n[i + 1] - p2->n[i] > 1) ||
                (d == -1 && p2->n[i - 1] - p2->n[i] < -1)) {

                // Try parabolic prediction
                double q_new = parabolic(
                    p2->q[i - 1], p2->q[i], p2->q[i + 1],
                    p2->n[i - 1], p2->n[i], p2->n[i + 1], d
                );

                // Check if parabolic overshoots
                if (p2->q[i - 1] < q_new && q_new < p2->q[i + 1]) {
                    p2->q[i] = q_new;
                } else {
                    // Use linear prediction
                    int adj = (d == 1) ? (i + 1) : (i - 1);
                    p2->q[i] = linear(p2->q[i], p2->q[adj], p2->n[i], p2->n[adj], d);
                }

                p2->n[i] += d;
            }
        }
    }
}

double hak_p2_get(hak_p2_t* p2) {
    if (p2->count < 5) {
        // Not enough samples, return max so far
        double max_val = p2->q[0];
        for (int i = 1; i < p2->count; i++) {
            if (p2->q[i] > max_val) max_val = p2->q[i];
        }
        return max_val;
    }

    // Return target percentile estimate (q[4] for p99)
    return p2->q[4];
}

void hak_p2_reset(hak_p2_t* p2) {
    double target_p = p2->target_p;
    hak_p2_init(p2, target_p);
}

int hak_p2_count(hak_p2_t* p2) {
    return p2->count;
}