Phase FREE-LEGACY-OPT-5-1/5-2: C5 ULTRA free+alloc integration

Summary:
========
Implemented C5 ULTRA TLS cache pattern following the successful C6 ULTRA design:
- Phase 5-1: Free-side TLS cache + segment learning
- Phase 5-2: Alloc-side TLS pop for complete free+alloc cycle integration

Targets C5 class (129-256B) as next legacy reduction after C6 completion.

Key Changes:
============

1. NEW FILES:
   - core/box/tiny_c5_ultra_free_box.h: C5 ULTRA TLS cache structure
   - core/box/tiny_c5_ultra_free_box.c: C5 free path implementation (same pattern as C6)
   - core/box/tiny_c5_ultra_free_env_box.h: ENV gating (HAKMEM_TINY_C5_ULTRA_FREE_ENABLED)

2. MODIFIED FILES:
   - core/front/malloc_tiny_fast.h:
     * Added C5 ULTRA includes
     * Added C5 alloc-side TLS pop at lines 186-194 (integrated with C6)
     * Added C5 free path at lines 333-337 (integrated with C6)

   - core/box/tiny_ultra_classes_box.h:
     * Added TINY_CLASS_C5 constant
     * Added tiny_class_is_c5() macro
     * Extended tiny_class_is_ultra() to include C5

   - core/box/free_path_stats_box.h:
     * Added c5_ultra_free_fast counter
     * Added c5_ultra_alloc_hit counter

   - core/box/free_path_stats_box.c:
     * Updated stats dump to output C5 counters

   - Makefile:
     * Added core/box/tiny_c5_ultra_free_box.o to all object lists

3. Design Rationale:
   - Exact copy of C6 ULTRA pattern (proven effective)
   - TLS cache capacity: 128 blocks (same as C6 for consistency)
   - Segment learning on first C5 free via ss_fast_lookup()
   - Alloc-side pop integrated directly in malloc_tiny_fast.h hotpath
   - Legacy fallback unification via tiny_legacy_fallback_free_base()

4. Expected Impact:
   - C5 legacy calls: 68,871 → 0 (100% elimination)
   - Total legacy reduction: ~53% of remaining 129,623
   - Mixed workload: Minimal regression (C5 is smaller class, fewer allocations)

5. Stats Collection:
   Run with: HAKMEM_TINY_C5_ULTRA_FREE_ENABLED=1 HAKMEM_FREE_PATH_STATS=1 ./bench_allocators_hakmem

   Expected output:
   [FREE_PATH_STATS] ... c5_ultra_free=68871 c5_ultra_alloc=68871 ... legacy_fb=60752 ...
   [FREE_PATH_STATS_LEGACY_BY_CLASS] ... c5=0 ...

Status:
=======
- Code:  COMPLETE (3 new files + 5 modified files)
- Compilation:  Verified (no errors, only unused variable warnings unrelated to C5)
- Functionality: Ready to benchmark (ENV gating: default OFF, opt-in via ENV)

Phase Progression:
==================
 Phase 4-4: C6 ULTRA free+alloc (legacy C6: 137,319 → 0)
 Phase 5-1/5-2: C5 ULTRA free+alloc (legacy C5: 68,871 → 0 expected)
 Phase 4.5: C4 ULTRA (34,727 remaining)
📋 Future: C3/C2 ULTRA if beneficial

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-11 19:26:51 +09:00
parent 7b7de53167
commit ea6ed1a6e4
9 changed files with 131 additions and 6 deletions

View File

@ -16,11 +16,13 @@ static void free_path_stats_dump(void) {
return;
}
fprintf(stderr, "[FREE_PATH_STATS] total=%lu c7_ultra=%lu c6_ultra_free=%lu c6_ultra_alloc=%lu small_v3=%lu v6=%lu tiny_v1=%lu pool_v1=%lu remote=%lu super_lookup=%lu legacy_fb=%lu\n",
fprintf(stderr, "[FREE_PATH_STATS] total=%lu c7_ultra=%lu c6_ultra_free=%lu c6_ultra_alloc=%lu c5_ultra_free=%lu c5_ultra_alloc=%lu small_v3=%lu v6=%lu tiny_v1=%lu pool_v1=%lu remote=%lu super_lookup=%lu legacy_fb=%lu\n",
g_free_path_stats.total_calls,
g_free_path_stats.c7_ultra_fast,
g_free_path_stats.c6_ultra_free_fast, // Phase 4-2
g_free_path_stats.c6_ultra_alloc_hit, // Phase 4-4
g_free_path_stats.c5_ultra_free_fast, // Phase 5-1
g_free_path_stats.c5_ultra_alloc_hit, // Phase 5-2
g_free_path_stats.smallheap_v3_fast,
g_free_path_stats.smallheap_v6_fast,
g_free_path_stats.tiny_heap_v1_fast,

View File

@ -11,6 +11,8 @@ typedef struct FreePathStats {
uint64_t c7_ultra_fast;
uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free
uint64_t c6_ultra_alloc_hit; // Phase 4-4: C6 ULTRA-alloc (TLS pop)
uint64_t c5_ultra_free_fast; // Phase 5-1: C5 ULTRA-free
uint64_t c5_ultra_alloc_hit; // Phase 5-2: C5 ULTRA-alloc (TLS pop)
uint64_t smallheap_v3_fast;
uint64_t smallheap_v6_fast;
uint64_t tiny_heap_v1_fast;

View File

@ -0,0 +1,51 @@
#include "tiny_c5_ultra_free_box.h"
#include "tiny_legacy_fallback_box.h" // Phase REFACTOR-2: Unified legacy fallback
#include "free_path_stats_box.h"
#include "tiny_front_v3_env_box.h"
#include "../hakmem.h" // For HAK_BASE_FROM_RAW
#include "../front/tiny_unified_cache.h"
#include "tiny_front_hot_box.h"
#include "../superslab/superslab_inline.h" // For ss_fast_lookup
#include <string.h>
// TLS context
static __thread TinyC5UltraFreeTLS g_c5_ultra_free_tls = {0};
TinyC5UltraFreeTLS* tiny_c5_ultra_free_tls(void) {
return &g_c5_ultra_free_tls;
}
// Phase REFACTOR-2: Legacy free helper removed (now in tiny_legacy_fallback_box.h)
// Fast path: TLS cache push
void tiny_c5_ultra_free_fast(void* base, uint32_t class_idx) {
TinyC5UltraFreeTLS* ctx = &g_c5_ultra_free_tls;
// Phase 5-1: Learn segment on first C5 free (same as C6)
if (unlikely(ctx->seg_base == 0)) {
SuperSlab* ss = ss_fast_lookup(base);
if (ss != NULL) {
ctx->seg_base = (uintptr_t)ss;
ctx->seg_end = ctx->seg_base + (1u << ss->lg_size);
}
}
// Check if ptr is in our segment AND cache has room
if (likely(ctx->seg_base != 0 &&
(uintptr_t)base >= ctx->seg_base &&
(uintptr_t)base < ctx->seg_end &&
ctx->count < TINY_C5_ULTRA_FREE_CAP)) {
// Push to TLS cache
ctx->freelist[ctx->count++] = base;
FREE_PATH_STAT_INC(c5_ultra_free_fast);
return;
}
// Slow path: fallback to legacy (cache full or ptr not in segment)
tiny_c5_ultra_free_slow(base, class_idx);
}
// Slow path: fallback to legacy free
void tiny_c5_ultra_free_slow(void* base, uint32_t class_idx) {
tiny_legacy_fallback_free_base(base, class_idx);
}

View File

@ -0,0 +1,30 @@
#ifndef HAKMEM_TINY_C5_ULTRA_FREE_BOX_H
#define HAKMEM_TINY_C5_ULTRA_FREE_BOX_H
#include <stdint.h>
#include <stdbool.h>
#include "tiny_c5_ultra_free_env_box.h"
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// TLS cache capacity (Phase 5: same as C6 for consistency)
#define TINY_C5_ULTRA_FREE_CAP 128
// TLS context for C5 ULTRA-free
typedef struct TinyC5UltraFreeTLS {
void* freelist[TINY_C5_ULTRA_FREE_CAP]; // BASE pointers
uint8_t count;
uint8_t _pad[7];
uintptr_t seg_base; // C5 segment range (0 = not initialized)
uintptr_t seg_end;
} TinyC5UltraFreeTLS;
// API
TinyC5UltraFreeTLS* tiny_c5_ultra_free_tls(void);
void tiny_c5_ultra_free_fast(void* base, uint32_t class_idx);
void tiny_c5_ultra_free_slow(void* base, uint32_t class_idx);
#endif // HAKMEM_TINY_C5_ULTRA_FREE_BOX_H

View File

@ -0,0 +1,17 @@
#ifndef HAKMEM_TINY_C5_ULTRA_FREE_ENV_BOX_H
#define HAKMEM_TINY_C5_ULTRA_FREE_ENV_BOX_H
#include <stdlib.h>
#include <stdbool.h>
// ENV: HAKMEM_TINY_C5_ULTRA_FREE_ENABLED (default 0)
static inline bool tiny_c5_ultra_free_enabled(void) {
static int g_enabled = -1;
if (__builtin_expect(g_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_C5_ULTRA_FREE_ENABLED");
g_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_enabled;
}
#endif // HAKMEM_TINY_C5_ULTRA_FREE_ENV_BOX_H

View File

@ -1,13 +1,15 @@
#ifndef HAKMEM_TINY_ULTRA_CLASSES_BOX_H
#define HAKMEM_TINY_ULTRA_CLASSES_BOX_H
// Purpose: Named constants for ULTRA tier classes (C6, C7)
// Purpose: Named constants for ULTRA tier classes (C5, C6, C7)
#define TINY_CLASS_C5 5
#define TINY_CLASS_C6 6
#define TINY_CLASS_C7 7
// Helper macros for class checking
#define tiny_class_is_c5(idx) ((idx) == TINY_CLASS_C5)
#define tiny_class_is_c6(idx) ((idx) == TINY_CLASS_C6)
#define tiny_class_is_c7(idx) ((idx) == TINY_CLASS_C7)
#define tiny_class_is_ultra(idx) (tiny_class_is_c6(idx) || tiny_class_is_c7(idx))
#define tiny_class_is_ultra(idx) (tiny_class_is_c5(idx) || tiny_class_is_c6(idx) || tiny_class_is_c7(idx))
#endif // HAKMEM_TINY_ULTRA_CLASSES_BOX_H