hakmem/core/hakmem_bigcache.c

// hakmem_bigcache.c - Big-Block Cache Implementation
// Purpose: Per-site ring cache for large allocations
//
// License: MIT
// Date: 2025-10-21
// Phase 2c: Dynamic hash table implementation

#include "hakmem_bigcache.h"
#include "hakmem_internal.h"  // Phase 6.15 P0.1: For HAKMEM_LOG macro
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <pthread.h>
#include <time.h>

// ============================================================================
// Data Structures (Phase 2c: Dynamic Hash Table)
// ============================================================================

// Hash table node (chaining for collision resolution)
typedef struct BigCacheNode {
    void*     ptr;                     // Cached pointer (user pointer, not raw)
    size_t    actual_bytes;            // Actual allocated size
    size_t    class_bytes;             // Size class for indexing
    uintptr_t site;                    // Allocation site
    uint64_t  timestamp;               // Timestamp for LRU eviction
    uint64_t  access_count;            // Hit count for stats
    struct BigCacheNode* next;         // Collision chain
} BigCacheNode;

// Dynamic hash table structure
typedef struct BigCacheTable {
    BigCacheNode** buckets;            // Dynamic array of bucket heads
    size_t capacity;                   // Current number of buckets (power of 2)
    size_t count;                      // Total cached entries
    size_t max_count;                  // Resize threshold (capacity * LOAD_FACTOR)
    pthread_rwlock_t lock;             // Protect table resizing
} BigCacheTable;

static BigCacheTable g_bigcache;

// Statistics (for debugging/paper)
static struct {
    uint64_t hits;
    uint64_t misses;
    uint64_t puts;
    uint64_t evictions;
    uint64_t rejects;
} g_stats;

static int g_initialized = 0;

// ============================================================================
// Helper Functions (Phase 2c: Hash Table Operations)
// ============================================================================

// Get current timestamp in nanoseconds
static inline uint64_t get_timestamp_ns(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}

// Phase 2c: Improved hash function (FNV-1a + mixing)
// Combines size and site_id for better distribution
static inline size_t bigcache_hash(size_t size, uintptr_t site_id, size_t capacity) {
    // Combine size and site_id
    uint64_t hash = size ^ site_id;

    // FNV-1a mixing
    hash ^= (hash >> 16);
    hash *= 0x85ebca6b;
    hash ^= (hash >> 13);
    hash *= 0xc2b2ae35;
    hash ^= (hash >> 16);

    // Mask to capacity (assumes power of 2)
    return (size_t)(hash & (capacity - 1));
}

// Check if size is cacheable
static inline int is_cacheable(size_t size) {
    return size >= BIGCACHE_MIN_SIZE;
}

// Callback for actual freeing (set by hakmem.c)
static void (*g_free_callback)(void* ptr, size_t size) = NULL;

// Forward declaration for resize
static void resize_bigcache(void);

// Free a cached node (when evicting)
static inline void free_node(BigCacheNode* node) {
    if (!node) return;

    // Use callback if available to actually free the memory
    if (g_free_callback) {
        g_free_callback(node->ptr, node->actual_bytes);
    }

    free(node);
    g_stats.evictions++;
}

// ============================================================================
// Public API (Phase 2c: Dynamic Hash Table)
// ============================================================================

void hak_bigcache_init(void) {
    if (g_initialized) return;

    // Initialize hash table
    g_bigcache.capacity = BIGCACHE_INITIAL_CAPACITY;
    g_bigcache.count = 0;
    g_bigcache.max_count = (size_t)(g_bigcache.capacity * BIGCACHE_LOAD_FACTOR);
    g_bigcache.buckets = (BigCacheNode**)calloc(g_bigcache.capacity, sizeof(BigCacheNode*));

    if (!g_bigcache.buckets) {
        fprintf(stderr, "[BigCache] FATAL: Failed to allocate initial buckets\n");
        return;
    }

    pthread_rwlock_init(&g_bigcache.lock, NULL);

    // Initialize stats
    memset(&g_stats, 0, sizeof(g_stats));

    g_initialized = 1;

    HAKMEM_LOG("[BigCache] Initialized (Phase 2c: Dynamic hash table)\n");
    HAKMEM_LOG("[BigCache] Initial capacity: %zu buckets, max: %d buckets\n",
               g_bigcache.capacity, BIGCACHE_MAX_CAPACITY);
    HAKMEM_LOG("[BigCache] Load factor: %.2f, min size: %d KB\n",
               BIGCACHE_LOAD_FACTOR, BIGCACHE_MIN_SIZE / 1024);
}

void hak_bigcache_shutdown(void) {
    if (!g_initialized) return;

    // Free all cached entries
    for (size_t i = 0; i < g_bigcache.capacity; i++) {
        BigCacheNode* node = g_bigcache.buckets[i];
        while (node) {
            BigCacheNode* next = node->next;
            free_node(node);
            node = next;
        }
    }

    // Free bucket array
    free(g_bigcache.buckets);
    pthread_rwlock_destroy(&g_bigcache.lock);

    hak_bigcache_print_stats();

    g_initialized = 0;
}

// Phase 2c: Hash table lookup (with collision chaining)
int hak_bigcache_try_get(size_t size, uintptr_t site, void** out_ptr) {
    if (!g_initialized) hak_bigcache_init();
    if (!is_cacheable(size)) return 0;

    pthread_rwlock_rdlock(&g_bigcache.lock);

    // Hash to bucket
    size_t bucket_idx = bigcache_hash(size, site, g_bigcache.capacity);
    BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx];

    // Search collision chain
    BigCacheNode** prev = bucket;
    BigCacheNode* node = *bucket;

    while (node) {
        // Match by site and sufficient size
        if (node->site == site && node->actual_bytes >= size) {
            // Cache hit!
            *out_ptr = node->ptr;

            // Remove from chain
            *prev = node->next;
            free(node);  // Free node metadata only (not the cached memory)
            g_bigcache.count--;

            g_stats.hits++;
            pthread_rwlock_unlock(&g_bigcache.lock);
            return 1;
        }

        prev = &node->next;
        node = node->next;
    }

    // Cache miss
    g_stats.misses++;
    pthread_rwlock_unlock(&g_bigcache.lock);
    return 0;
}

// Phase 2c: Hash table insertion (with auto-resize)
int hak_bigcache_put(void* ptr, size_t actual_bytes, uintptr_t site) {
    if (!g_initialized) hak_bigcache_init();
    if (!is_cacheable(actual_bytes)) {
        g_stats.rejects++;
        return 0;
    }

    pthread_rwlock_rdlock(&g_bigcache.lock);

    // Check if resize needed (release lock and acquire write lock)
    if (g_bigcache.count >= g_bigcache.max_count) {
        pthread_rwlock_unlock(&g_bigcache.lock);
        resize_bigcache();
        pthread_rwlock_rdlock(&g_bigcache.lock);
    }

    // Hash to bucket
    size_t bucket_idx = bigcache_hash(actual_bytes, site, g_bigcache.capacity);
    BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx];

    // Create new node
    BigCacheNode* node = (BigCacheNode*)malloc(sizeof(BigCacheNode));
    if (!node) {
        g_stats.rejects++;
        pthread_rwlock_unlock(&g_bigcache.lock);
        return 0;
    }

    node->ptr = ptr;
    node->actual_bytes = actual_bytes;
    node->class_bytes = actual_bytes;  // For stats
    node->site = site;
    node->timestamp = get_timestamp_ns();
    node->access_count = 0;

    // Insert at head of chain (most recent)
    node->next = *bucket;
    *bucket = node;

    g_bigcache.count++;
    g_stats.puts++;

    pthread_rwlock_unlock(&g_bigcache.lock);
    return 1;
}

// Phase 2c: Resize hash table (2x capacity)
static void resize_bigcache(void) {
    pthread_rwlock_wrlock(&g_bigcache.lock);

    size_t old_capacity = g_bigcache.capacity;
    size_t new_capacity = old_capacity * 2;

    if (new_capacity > BIGCACHE_MAX_CAPACITY) {
        new_capacity = BIGCACHE_MAX_CAPACITY;
    }

    if (new_capacity == old_capacity) {
        pthread_rwlock_unlock(&g_bigcache.lock);
        return;  // Already at max
    }

    // Allocate new bucket array
    BigCacheNode** new_buckets = (BigCacheNode**)calloc(new_capacity, sizeof(BigCacheNode*));
    if (!new_buckets) {
        fprintf(stderr, "[BigCache] ERROR: Failed to resize (malloc failed)\n");
        pthread_rwlock_unlock(&g_bigcache.lock);
        return;
    }

    // Rehash all entries
    for (size_t i = 0; i < old_capacity; i++) {
        BigCacheNode* node = g_bigcache.buckets[i];

        while (node) {
            BigCacheNode* next = node->next;

            // Rehash to new bucket
            size_t new_bucket_idx = bigcache_hash(node->actual_bytes, node->site, new_capacity);
            node->next = new_buckets[new_bucket_idx];
            new_buckets[new_bucket_idx] = node;

            node = next;
        }
    }

    // Replace old buckets
    free(g_bigcache.buckets);
    g_bigcache.buckets = new_buckets;
    g_bigcache.capacity = new_capacity;
    g_bigcache.max_count = (size_t)(new_capacity * BIGCACHE_LOAD_FACTOR);

    fprintf(stderr, "[BigCache] Resized: %zu → %zu buckets (%zu entries)\n",
            old_capacity, new_capacity, g_bigcache.count);

    pthread_rwlock_unlock(&g_bigcache.lock);
}

void hak_bigcache_print_stats(void) {
    if (!g_initialized) return;

    printf("\n========================================\n");
    printf("BigCache Statistics (Phase 2c: Dynamic)\n");
    printf("========================================\n");
    printf("Capacity:  %zu buckets\n", g_bigcache.capacity);
    printf("Entries:   %zu (%.1f%% load)\n",
           g_bigcache.count,
           100.0 * g_bigcache.count / g_bigcache.capacity);
    printf("Hits:      %lu\n", (unsigned long)g_stats.hits);
    printf("Misses:    %lu\n", (unsigned long)g_stats.misses);
    printf("Puts:      %lu\n", (unsigned long)g_stats.puts);
    printf("Evictions: %lu\n", (unsigned long)g_stats.evictions);
    printf("Rejects:   %lu\n", (unsigned long)g_stats.rejects);

    if (g_stats.hits + g_stats.misses > 0) {
        double hit_rate = (double)g_stats.hits / (g_stats.hits + g_stats.misses) * 100.0;
        printf("Hit Rate:  %.1f%%\n", hit_rate);
    }

    printf("========================================\n");
}

void hak_bigcache_set_free_callback(hak_bigcache_free_fn_t fn) {
    g_free_callback = fn;
}