Files
hakmem/core/hakmem_sizeclass_dist.c

121 lines
3.8 KiB
C
Raw Permalink Normal View History

// hakmem_sizeclass_dist.c - Size Class Distribution Implementation
//
// License: MIT
// Date: 2025-10-21
#include "hakmem_sizeclass_dist.h"
#include <string.h>
#include <math.h>
#include <stdio.h>
// ============================================================================
// Size Class Thresholds (matching BigCache)
// ============================================================================
#define CLASS_1MB (1 * 1024 * 1024)
#define CLASS_2MB (2 * 1024 * 1024)
#define CLASS_4MB (4 * 1024 * 1024)
#define CLASS_8MB (8 * 1024 * 1024)
// ============================================================================
// Helper Functions
// ============================================================================
// Classify size into size class index (0-3)
// Uses same logic as BigCache (branchless, __builtin_clzll)
static int classify_size(size_t size) {
if (size < CLASS_1MB) {
return -1; // Too small (not cacheable)
}
// Branchless classification using __builtin_clzll
int leading_zeros = __builtin_clzll(size | 1);
int bit_pos = 63 - leading_zeros; // Most significant bit position
// Map bit_pos to class_index
// 20 → 0 (1MB), 21 → 1 (2MB), 22 → 2 (4MB), 23+ → 3 (8MB)
int class_idx = bit_pos - 20;
// Clamp to [0, 3]
if (class_idx < 0) class_idx = 0;
if (class_idx >= SIZECLASS_NUM_CLASSES) class_idx = SIZECLASS_NUM_CLASSES - 1;
return class_idx;
}
// ============================================================================
// Public API Implementation
// ============================================================================
void hak_sizeclass_dist_init(hak_sizeclass_dist_t* dist) {
memset(dist->freq, 0, sizeof(dist->freq));
dist->total = 0;
}
void hak_sizeclass_dist_record(hak_sizeclass_dist_t* dist, size_t size) {
int class_idx = classify_size(size);
if (class_idx >= 0 && class_idx < SIZECLASS_NUM_CLASSES) {
dist->freq[class_idx]++;
dist->total++;
}
// Sizes < 1MB are ignored (not part of BigCache distribution)
}
double hak_sizeclass_dist_l1(const hak_sizeclass_dist_t* a,
const hak_sizeclass_dist_t* b) {
// Handle edge cases (no samples)
if (a->total == 0 && b->total == 0) {
return 0.0; // Both empty → identical
}
if (a->total == 0 || b->total == 0) {
return 2.0; // One empty, one non-empty → maximum distance
}
// Calculate L1 distance: ∑|fa[i] - fb[i]|
// where fa[i] = freq_a[i] / total_a (normalized frequency)
double sum = 0.0;
for (int i = 0; i < SIZECLASS_NUM_CLASSES; i++) {
double fa = (double)a->freq[i] / (double)a->total;
double fb = (double)b->freq[i] / (double)b->total;
sum += fabs(fa - fb);
}
return sum; // Range: 0.0 (identical) to 2.0 (completely different)
}
void hak_sizeclass_dist_copy(hak_sizeclass_dist_t* dst,
const hak_sizeclass_dist_t* src) {
memcpy(dst->freq, src->freq, sizeof(dst->freq));
dst->total = src->total;
}
void hak_sizeclass_dist_reset(hak_sizeclass_dist_t* dist) {
hak_sizeclass_dist_init(dist);
}
uint64_t hak_sizeclass_dist_total(const hak_sizeclass_dist_t* dist) {
return dist->total;
}
void hak_sizeclass_dist_print(const hak_sizeclass_dist_t* dist, const char* label) {
printf("[%s] Total: %lu\n", label, (unsigned long)dist->total);
if (dist->total == 0) {
printf(" (no samples)\n");
return;
}
const char* class_names[SIZECLASS_NUM_CLASSES] = {
"1MB", "2MB", "4MB", "8MB"
};
for (int i = 0; i < SIZECLASS_NUM_CLASSES; i++) {
double pct = (double)dist->freq[i] / (double)dist->total * 100.0;
printf(" %s: %u (%.1f%%)\n",
class_names[i],
dist->freq[i],
pct);
}
}