// hakmem_sizeclass_dist.c - Size Class Distribution Implementation // // License: MIT // Date: 2025-10-21 #include "hakmem_sizeclass_dist.h" #include #include #include // ============================================================================ // Size Class Thresholds (matching BigCache) // ============================================================================ #define CLASS_1MB (1 * 1024 * 1024) #define CLASS_2MB (2 * 1024 * 1024) #define CLASS_4MB (4 * 1024 * 1024) #define CLASS_8MB (8 * 1024 * 1024) // ============================================================================ // Helper Functions // ============================================================================ // Classify size into size class index (0-3) // Uses same logic as BigCache (branchless, __builtin_clzll) static int classify_size(size_t size) { if (size < CLASS_1MB) { return -1; // Too small (not cacheable) } // Branchless classification using __builtin_clzll int leading_zeros = __builtin_clzll(size | 1); int bit_pos = 63 - leading_zeros; // Most significant bit position // Map bit_pos to class_index // 20 → 0 (1MB), 21 → 1 (2MB), 22 → 2 (4MB), 23+ → 3 (8MB) int class_idx = bit_pos - 20; // Clamp to [0, 3] if (class_idx < 0) class_idx = 0; if (class_idx >= SIZECLASS_NUM_CLASSES) class_idx = SIZECLASS_NUM_CLASSES - 1; return class_idx; } // ============================================================================ // Public API Implementation // ============================================================================ void hak_sizeclass_dist_init(hak_sizeclass_dist_t* dist) { memset(dist->freq, 0, sizeof(dist->freq)); dist->total = 0; } void hak_sizeclass_dist_record(hak_sizeclass_dist_t* dist, size_t size) { int class_idx = classify_size(size); if (class_idx >= 0 && class_idx < SIZECLASS_NUM_CLASSES) { dist->freq[class_idx]++; dist->total++; } // Sizes < 1MB are ignored (not part of BigCache distribution) } double hak_sizeclass_dist_l1(const hak_sizeclass_dist_t* a, const hak_sizeclass_dist_t* b) { // Handle edge cases (no samples) if (a->total == 0 && b->total == 0) { return 0.0; // Both empty → identical } if (a->total == 0 || b->total == 0) { return 2.0; // One empty, one non-empty → maximum distance } // Calculate L1 distance: ∑|fa[i] - fb[i]| // where fa[i] = freq_a[i] / total_a (normalized frequency) double sum = 0.0; for (int i = 0; i < SIZECLASS_NUM_CLASSES; i++) { double fa = (double)a->freq[i] / (double)a->total; double fb = (double)b->freq[i] / (double)b->total; sum += fabs(fa - fb); } return sum; // Range: 0.0 (identical) to 2.0 (completely different) } void hak_sizeclass_dist_copy(hak_sizeclass_dist_t* dst, const hak_sizeclass_dist_t* src) { memcpy(dst->freq, src->freq, sizeof(dst->freq)); dst->total = src->total; } void hak_sizeclass_dist_reset(hak_sizeclass_dist_t* dist) { hak_sizeclass_dist_init(dist); } uint64_t hak_sizeclass_dist_total(const hak_sizeclass_dist_t* dist) { return dist->total; } void hak_sizeclass_dist_print(const hak_sizeclass_dist_t* dist, const char* label) { printf("[%s] Total: %lu\n", label, (unsigned long)dist->total); if (dist->total == 0) { printf(" (no samples)\n"); return; } const char* class_names[SIZECLASS_NUM_CLASSES] = { "1MB", "2MB", "4MB", "8MB" }; for (int i = 0; i < SIZECLASS_NUM_CLASSES; i++) { double pct = (double)dist->freq[i] / (double)dist->total * 100.0; printf(" %s: %u (%.1f%%)\n", class_names[i], dist->freq[i], pct); } }