Files
hakmem/core/tiny_fastcache.h
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

114 lines
3.6 KiB
C

// tiny_fastcache.h - Ultra-Simple Tiny Fast Path (System tcache style)
// Phase 6-3: Bypass Magazine/SuperSlab for Tiny allocations (<=128B)
// Goal: 3-4 instruction fast path, 70-80% of System tcache performance
#pragma once
#include <stdint.h>
#include <stddef.h>
#include <string.h>
// ========== Configuration ==========
// Enable Tiny Fast Path (default: ON for Phase 6-3)
#ifndef HAKMEM_TINY_FAST_PATH
#define HAKMEM_TINY_FAST_PATH 1
#endif
// Tiny class count (sizes: 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128)
#define TINY_FAST_CLASS_COUNT 16
// Fast cache capacity per class (default: 64 slots, like System tcache)
#ifndef TINY_FAST_CACHE_CAP
#define TINY_FAST_CACHE_CAP 64
#endif
// Tiny size threshold (<=128B goes to fast path)
#define TINY_FAST_THRESHOLD 128
// ========== TLS Cache (System tcache style) ==========
// Per-thread fast cache: array of freelist heads (defined in tiny_fastcache.c)
extern __thread void* g_tiny_fast_cache[TINY_FAST_CLASS_COUNT];
// Per-thread cache counts (for capacity management)
extern __thread uint32_t g_tiny_fast_count[TINY_FAST_CLASS_COUNT];
// Initialized flag
extern __thread int g_tiny_fast_initialized;
// ========== Size to Class Mapping ==========
// Inline size-to-class for fast path (minimal branches)
static inline int tiny_fast_size_to_class(size_t size) {
// Class mapping (same as existing Tiny classes):
// 0: 16B, 1: 24B, 2: 32B, 3: 40B, 4: 48B, 5: 56B, 6: 64B
// 7: 80B, 8: 96B, 9: 112B, 10: 128B, 11-15: reserved
if (size <= 16) return 0;
if (size <= 24) return 1;
if (size <= 32) return 2;
if (size <= 40) return 3;
if (size <= 48) return 4;
if (size <= 56) return 5;
if (size <= 64) return 6;
if (size <= 80) return 7;
if (size <= 96) return 8;
if (size <= 112) return 9;
if (size <= 128) return 10;
return -1; // Not tiny
}
// ========== Forward Declarations ==========
// Slow path: refill from Magazine/SuperSlab (implemented in tiny_fastcache.c)
void* tiny_fast_refill(int class_idx);
void tiny_fast_drain(int class_idx);
// ========== Fast Path: Alloc (3-4 instructions!) ==========
static inline void* tiny_fast_alloc(size_t size) {
// Step 1: Size to class (1-2 instructions, branch predictor friendly)
int cls = tiny_fast_size_to_class(size);
if (__builtin_expect(cls < 0, 0)) return NULL; // Not tiny (rare)
// Step 2: Pop from TLS cache (2-3 instructions)
void* ptr = g_tiny_fast_cache[cls];
if (__builtin_expect(ptr != NULL, 1)) {
// Fast path: Pop head, decrement count
g_tiny_fast_cache[cls] = *(void**)ptr;
g_tiny_fast_count[cls]--;
return ptr;
}
// Step 3: Slow path - refill from Magazine/SuperSlab
return tiny_fast_refill(cls);
}
// ========== Fast Path: Free (2-3 instructions!) ==========
static inline void tiny_fast_free(void* ptr, size_t size) {
// Step 1: Size to class
int cls = tiny_fast_size_to_class(size);
if (__builtin_expect(cls < 0, 0)) return; // Not tiny (error)
// Step 2: Check capacity
if (__builtin_expect(g_tiny_fast_count[cls] >= TINY_FAST_CACHE_CAP, 0)) {
// Cache full - drain to Magazine/SuperSlab
tiny_fast_drain(cls);
}
// Step 3: Push to TLS cache (2 instructions)
*(void**)ptr = g_tiny_fast_cache[cls];
g_tiny_fast_cache[cls] = ptr;
g_tiny_fast_count[cls]++;
}
// ========== Initialization ==========
static inline void tiny_fast_init(void) {
if (g_tiny_fast_initialized) return;
memset(g_tiny_fast_cache, 0, sizeof(g_tiny_fast_cache));
memset(g_tiny_fast_count, 0, sizeof(g_tiny_fast_count));
g_tiny_fast_initialized = 1;
}