Box API Phase 1-3: Capacity Manager, Carve-Push, Prewarm 実装

Priority 1-3のBox Modulesを実装し、安全なpre-warming APIを提供。
既存の複雑なprewarmコードを1行のBox API呼び出しに置き換え。

## 新規Box Modules

1. **Box Capacity Manager** (capacity_box.h/c)
   - TLS SLL容量の一元管理
   - adaptive_sizing初期化保証
   - Double-free バグ防止

2. **Box Carve-And-Push** (carve_push_box.h/c)
   - アトミックなblock carve + TLS SLL push
   - All-or-nothing semantics
   - Rollback保証(partial failure防止)

3. **Box Prewarm** (prewarm_box.h/c)
   - 安全なTLS cache pre-warming
   - 初期化依存性を隠蔽
   - シンプルなAPI (1関数呼び出し)

## コード簡略化

hakmem_tiny_init.inc: 20行 → 1行
```c
// BEFORE: 複雑なP0分岐とエラー処理
adaptive_sizing_init();
if (prewarm > 0) {
    #if HAKMEM_TINY_P0_BATCH_REFILL
        int taken = sll_refill_batch_from_ss(5, prewarm);
    #else
        int taken = sll_refill_small_from_ss(5, prewarm);
    #endif
}

// AFTER: Box API 1行
int taken = box_prewarm_tls(5, prewarm);
```

## シンボルExport修正

hakmem_tiny.c: 5つのシンボルをstatic → non-static
- g_tls_slabs[] (TLS slab配列)
- g_sll_multiplier (SLL容量乗数)
- g_sll_cap_override[] (容量オーバーライド)
- superslab_refill() (SuperSlab再充填)
- ss_active_add() (アクティブカウンタ)

## ビルドシステム

Makefile: TINY_BENCH_OBJS_BASEに3つのBox modules追加
- core/box/capacity_box.o
- core/box/carve_push_box.o
- core/box/prewarm_box.o

## 動作確認

 Debug build成功
 Box Prewarm API動作確認
   [PREWARM] class=5 requested=128 taken=32

## 次のステップ

- Box Refill Manager (Priority 4)
- Box SuperSlab Allocator (Priority 5)
- Release build修正(tiny_debug_ring_record)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-13 01:45:30 +09:00
parent 0543642dea
commit c7616fd161
14 changed files with 876 additions and 152 deletions

View File

@ -7,6 +7,7 @@
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
@ -33,17 +34,18 @@ extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c
// ============================================================================
// Size class table (Box 3 dependency)
// ============================================================================
// Definition for g_tiny_class_sizes (declared in hakmem_tiny_config.h)
// Used by Box 3 (tiny_box_geometry.h) for stride calculations
// Phase E1-CORRECT: ALL classes have 1-byte header
// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B]
// Usable data = stride - 1 (implicit)
const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
8, // Class 0: 8 bytes
16, // Class 1: 16 bytes
32, // Class 2: 32 bytes
64, // Class 3: 64 bytes
128, // Class 4: 128 bytes
256, // Class 5: 256 bytes
512, // Class 6: 512 bytes
1024 // Class 7: 1024 bytes
8, // Class 0: 8B total = [Header 1B][Data 7B]
16, // Class 1: 16B total = [Header 1B][Data 15B]
32, // Class 2: 32B total = [Header 1B][Data 31B]
64, // Class 3: 64B total = [Header 1B][Data 63B]
128, // Class 4: 128B total = [Header 1B][Data 127B]
256, // Class 5: 256B total = [Header 1B][Data 255B]
512, // Class 6: 512B total = [Header 1B][Data 511B]
1024 // Class 7: 1024B total = [Header 1B][Data 1023B]
};
// ============================================================================
@ -153,12 +155,9 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
#if HAKMEM_TINY_HEADER_CLASSIDX
#if HAKMEM_BUILD_RELEASE
// Phase 3: Release - Ultra-fast inline macro (3-4 instructions)
// Eliminates function call overhead, NULL check, guard check, tracking
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
// Ultra-fast inline macro (3-4 instructions)
#define HAK_RET_ALLOC(cls, base_ptr) do { \
if (__builtin_expect((cls) == 7, 0)) { \
return (base_ptr); \
} \
*(uint8_t*)(base_ptr) = HEADER_MAGIC | ((cls) & HEADER_CLASS_MASK); \
return (void*)((uint8_t*)(base_ptr) + 1); \
} while(0)
@ -215,7 +214,7 @@ static void tiny_apply_mem_diet(void);
// Phase 6.23: SuperSlab allocation forward declaration
static inline void* hak_tiny_alloc_superslab(int class_idx);
static inline void* superslab_tls_bump_fast(int class_idx);
static SuperSlab* superslab_refill(int class_idx);
SuperSlab* superslab_refill(int class_idx);
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
// Forward decl: used by tiny_spec_pop_path before its definition
@ -245,7 +244,7 @@ static void tiny_remote_drain_locked(struct TinySlab* slab);
__attribute__((always_inline))
static inline void* hak_tiny_alloc_wrapper(int class_idx);
// Helpers for SuperSlab active block accounting (atomic, saturating dec)
static inline __attribute__((always_inline)) void ss_active_add(SuperSlab* ss, uint32_t n) {
void ss_active_add(SuperSlab* ss, uint32_t n) {
atomic_fetch_add_explicit(&ss->total_active_blocks, n, memory_order_relaxed);
}
static inline __attribute__((always_inline)) void ss_active_inc(SuperSlab* ss) {
@ -502,7 +501,7 @@ static _Atomic uint32_t g_ss_partial_epoch = 0;
// Phase 6.24: Unified TLS slab cache (Medium fix)
// Reduces TLS reads from 3 to 1 (cache-line aligned for performance)
static __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
__thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_cap[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_refill[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_spill[TINY_NUM_CLASSES];
@ -1196,7 +1195,7 @@ typedef struct __attribute__((aligned(64))) {
static __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
static int g_frontend_enable = 0; // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
static int g_sll_multiplier = 2;
int g_sll_multiplier = 2;
// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
static __thread uint32_t g_tls_tid32;
static __thread int g_tls_tid32_inited;
@ -1236,7 +1235,7 @@ static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
// tiny_mmap_gate.h already included at top
#include "tiny_publish.h"
static int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_SLL_CAP_C{0..7}
int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_SLL_CAP_C{0..7}
// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
static int g_tiny_prefetch = 0;
@ -1290,15 +1289,8 @@ static __thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via
void hak_tiny_prewarm_tls_cache(void) {
// Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
// This reduces the first-allocation miss penalty by populating TLS cache
// Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
// CRITICAL: C7 (1KB) is headerless - skip TLS SLL refill, but create SuperSlab
if (class_idx == 7) {
// Create C7 SuperSlab explicitly (refill functions skip C7)
// Note: superslab_refill is already declared in hakmem_tiny_refill.inc.h
(void)superslab_refill(class_idx);
continue;
}
int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class
// Trigger refill to populate TLS cache