651 lines
17 KiB
Markdown
651 lines
17 KiB
Markdown
|
|
# HAKMEM Tiny Allocator リファクタリング実装ガイド
|
||
|
|
|
||
|
|
## クイックスタート
|
||
|
|
|
||
|
|
このドキュメントは、REFACTOR_PLAN.md の実装手順を段階的に説明します。
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Priority 1: Fast Path リファクタリング (Week 1)
|
||
|
|
|
||
|
|
### Phase 1.1: tiny_atomic.h (新規作成, 80行)
|
||
|
|
|
||
|
|
**目的**: Atomic操作の統一インターフェース
|
||
|
|
|
||
|
|
**ファイル**: `core/tiny_atomic.h`
|
||
|
|
|
||
|
|
```c
|
||
|
|
#ifndef HAKMEM_TINY_ATOMIC_H
|
||
|
|
#define HAKMEM_TINY_ATOMIC_H
|
||
|
|
|
||
|
|
#include <stdatomic.h>
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// TINY_ATOMIC: 統一インターフェース for atomics with memory ordering
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_load - Load with acquire semantics (default)
|
||
|
|
* @ptr: pointer to atomic variable
|
||
|
|
* @order: memory_order (default: memory_order_acquire)
|
||
|
|
*
|
||
|
|
* Returns: Loaded value
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_load(ptr, order) \
|
||
|
|
atomic_load_explicit((_Atomic typeof(*ptr)*)ptr, order)
|
||
|
|
|
||
|
|
#define tiny_atomic_load_acq(ptr) \
|
||
|
|
atomic_load_explicit((_Atomic typeof(*ptr)*)ptr, memory_order_acquire)
|
||
|
|
|
||
|
|
#define tiny_atomic_load_rel(ptr) \
|
||
|
|
atomic_load_explicit((_Atomic typeof(*ptr)*)ptr, memory_order_release)
|
||
|
|
|
||
|
|
#define tiny_atomic_load_relax(ptr) \
|
||
|
|
atomic_load_explicit((_Atomic typeof(*ptr)*)ptr, memory_order_relaxed)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_store - Store with release semantics (default)
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_store(ptr, val, order) \
|
||
|
|
atomic_store_explicit((_Atomic typeof(*ptr)*)ptr, val, order)
|
||
|
|
|
||
|
|
#define tiny_atomic_store_rel(ptr, val) \
|
||
|
|
atomic_store_explicit((_Atomic typeof(*ptr)*)ptr, val, memory_order_release)
|
||
|
|
|
||
|
|
#define tiny_atomic_store_acq(ptr, val) \
|
||
|
|
atomic_store_explicit((_Atomic typeof(*ptr)*)ptr, val, memory_order_acquire)
|
||
|
|
|
||
|
|
#define tiny_atomic_store_relax(ptr, val) \
|
||
|
|
atomic_store_explicit((_Atomic typeof(*ptr)*)ptr, val, memory_order_relaxed)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_cas - Compare and swap with seq_cst semantics
|
||
|
|
* @ptr: pointer to atomic variable
|
||
|
|
* @expected: expected value (in/out)
|
||
|
|
* @desired: desired value
|
||
|
|
* Returns: true if successful
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_cas(ptr, expected, desired) \
|
||
|
|
atomic_compare_exchange_strong_explicit( \
|
||
|
|
(_Atomic typeof(*ptr)*)ptr, expected, desired, \
|
||
|
|
memory_order_seq_cst, memory_order_relaxed)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_cas_weak - Weak CAS for loops
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_cas_weak(ptr, expected, desired) \
|
||
|
|
atomic_compare_exchange_weak_explicit( \
|
||
|
|
(_Atomic typeof(*ptr)*)ptr, expected, desired, \
|
||
|
|
memory_order_seq_cst, memory_order_relaxed)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_exchange - Atomic exchange
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_exchange(ptr, desired) \
|
||
|
|
atomic_exchange_explicit((_Atomic typeof(*ptr)*)ptr, desired, \
|
||
|
|
memory_order_seq_cst)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_fetch_add - Fetch and add
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_fetch_add(ptr, val) \
|
||
|
|
atomic_fetch_add_explicit((_Atomic typeof(*ptr)*)ptr, val, \
|
||
|
|
memory_order_seq_cst)
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_atomic_increment - Increment (returns new value)
|
||
|
|
*/
|
||
|
|
#define tiny_atomic_increment(ptr) \
|
||
|
|
(atomic_fetch_add_explicit((_Atomic typeof(*ptr)*)ptr, 1, \
|
||
|
|
memory_order_seq_cst) + 1)
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_ATOMIC_H
|
||
|
|
```
|
||
|
|
|
||
|
|
**テスト**:
|
||
|
|
```c
|
||
|
|
// test_tiny_atomic.c
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
|
||
|
|
void test_tiny_atomic_load_store() {
|
||
|
|
_Atomic int x = 0;
|
||
|
|
tiny_atomic_store(&x, 42, memory_order_release);
|
||
|
|
assert(tiny_atomic_load(&x, memory_order_acquire) == 42);
|
||
|
|
}
|
||
|
|
|
||
|
|
void test_tiny_atomic_cas() {
|
||
|
|
_Atomic int x = 1;
|
||
|
|
int expected = 1;
|
||
|
|
assert(tiny_atomic_cas(&x, &expected, 2) == true);
|
||
|
|
assert(tiny_atomic_load(&x, memory_order_relaxed) == 2);
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
### Phase 1.2: tiny_alloc_fast.inc.h (新規作成, 250行)
|
||
|
|
|
||
|
|
**目的**: 3-4命令のfast path allocation
|
||
|
|
|
||
|
|
**ファイル**: `core/tiny_alloc_fast.inc.h`
|
||
|
|
|
||
|
|
```c
|
||
|
|
#ifndef HAKMEM_TINY_ALLOC_FAST_INC_H
|
||
|
|
#define HAKMEM_TINY_ALLOC_FAST_INC_H
|
||
|
|
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// TINY_ALLOC_FAST: Ultra-simple fast path (3-4 命令)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// TLS storage (defined in hakmem_tiny.c)
|
||
|
|
extern void* g_tls_alloc_cache[TINY_NUM_CLASSES];
|
||
|
|
extern int g_tls_alloc_count[TINY_NUM_CLASSES];
|
||
|
|
extern int g_tls_alloc_cap[TINY_NUM_CLASSES];
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_alloc_fast_pop - Pop from TLS cache (3-4 命令)
|
||
|
|
*
|
||
|
|
* Fast path for allocation:
|
||
|
|
* 1. Load head from TLS cache
|
||
|
|
* 2. Check if non-NULL
|
||
|
|
* 3. Pop: head = head->next
|
||
|
|
* 4. Return ptr
|
||
|
|
*
|
||
|
|
* Returns: Pointer if cache hit, NULL if miss (go to slow path)
|
||
|
|
*/
|
||
|
|
static inline void* tiny_alloc_fast_pop(int class_idx) {
|
||
|
|
void* ptr = g_tls_alloc_cache[class_idx];
|
||
|
|
if (__builtin_expect(ptr != NULL, 1)) {
|
||
|
|
// Pop: store next pointer
|
||
|
|
g_tls_alloc_cache[class_idx] = *(void**)ptr;
|
||
|
|
// Update count (optional, can be batched)
|
||
|
|
g_tls_alloc_count[class_idx]--;
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
return NULL; // Cache miss → slow path
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_alloc_fast_push - Push to TLS cache
|
||
|
|
*
|
||
|
|
* Returns: 1 if success, 0 if cache full (go to spill logic)
|
||
|
|
*/
|
||
|
|
static inline int tiny_alloc_fast_push(int class_idx, void* ptr) {
|
||
|
|
int cnt = g_tls_alloc_count[class_idx];
|
||
|
|
int cap = g_tls_alloc_cap[class_idx];
|
||
|
|
|
||
|
|
if (__builtin_expect(cnt < cap, 1)) {
|
||
|
|
// Push: ptr->next = head
|
||
|
|
*(void**)ptr = g_tls_alloc_cache[class_idx];
|
||
|
|
g_tls_alloc_cache[class_idx] = ptr;
|
||
|
|
g_tls_alloc_count[class_idx]++;
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
return 0; // Cache full → slow path
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_alloc_fast - Fast allocation entry (public API for fast path)
|
||
|
|
*
|
||
|
|
* Equivalent to:
|
||
|
|
* void* ptr = tiny_alloc_fast_pop(class_idx);
|
||
|
|
* if (!ptr) ptr = tiny_alloc_slow(class_idx);
|
||
|
|
* return ptr;
|
||
|
|
*/
|
||
|
|
static inline void* tiny_alloc_fast(int class_idx) {
|
||
|
|
void* ptr = tiny_alloc_fast_pop(class_idx);
|
||
|
|
if (__builtin_expect(ptr != NULL, 1)) {
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
// Slow path call will be added in hakmem_tiny.c
|
||
|
|
return NULL; // Placeholder
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_ALLOC_FAST_INC_H
|
||
|
|
```
|
||
|
|
|
||
|
|
**テスト**:
|
||
|
|
```c
|
||
|
|
// test_tiny_alloc_fast.c
|
||
|
|
void test_tiny_alloc_fast_empty() {
|
||
|
|
g_tls_alloc_cache[0] = NULL;
|
||
|
|
g_tls_alloc_count[0] = 0;
|
||
|
|
assert(tiny_alloc_fast_pop(0) == NULL);
|
||
|
|
}
|
||
|
|
|
||
|
|
void test_tiny_alloc_fast_push_pop() {
|
||
|
|
void* ptr = (void*)0x12345678;
|
||
|
|
g_tls_alloc_count[0] = 0;
|
||
|
|
g_tls_alloc_cap[0] = 100;
|
||
|
|
|
||
|
|
assert(tiny_alloc_fast_push(0, ptr) == 1);
|
||
|
|
assert(g_tls_alloc_count[0] == 1);
|
||
|
|
assert(tiny_alloc_fast_pop(0) == ptr);
|
||
|
|
assert(g_tls_alloc_count[0] == 0);
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
### Phase 1.3: tiny_free_fast.inc.h (新規作成, 200行)
|
||
|
|
|
||
|
|
**目的**: Same-thread fast free path
|
||
|
|
|
||
|
|
**ファイル**: `core/tiny_free_fast.inc.h`
|
||
|
|
|
||
|
|
```c
|
||
|
|
#ifndef HAKMEM_TINY_FREE_FAST_INC_H
|
||
|
|
#define HAKMEM_TINY_FREE_FAST_INC_H
|
||
|
|
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
#include "tiny_alloc_fast.inc.h"
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// TINY_FREE_FAST: Same-thread fast free (15-20 命令)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_free_fast - Fast free for same-thread ownership
|
||
|
|
*
|
||
|
|
* Ownership check:
|
||
|
|
* 1. Get self TID (uint32_t)
|
||
|
|
* 2. Lookup slab owner_tid
|
||
|
|
* 3. Compare: if owner_tid == self_tid → same thread → push to cache
|
||
|
|
* 4. Otherwise: slow path (remote queue)
|
||
|
|
*
|
||
|
|
* Returns: 1 if successfully freed to cache, 0 if slow path needed
|
||
|
|
*/
|
||
|
|
static inline int tiny_free_fast(void* ptr, int class_idx) {
|
||
|
|
// Step 1: Get self TID
|
||
|
|
uint32_t self_tid = tiny_self_u32();
|
||
|
|
|
||
|
|
// Step 2: Owner lookup (O(1) via slab_handle.h)
|
||
|
|
TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||
|
|
if (__builtin_expect(slab == NULL, 0)) {
|
||
|
|
return 0; // Not owned by Tiny → slow path
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 3: Compare owner
|
||
|
|
if (__builtin_expect(slab->owner_tid != self_tid, 0)) {
|
||
|
|
return 0; // Cross-thread → slow path (remote queue)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 4: Same-thread → cache push
|
||
|
|
return tiny_alloc_fast_push(class_idx, ptr);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_free_main_entry - Main free entry point
|
||
|
|
*
|
||
|
|
* Dispatches:
|
||
|
|
* - tiny_free_fast() for same-thread
|
||
|
|
* - tiny_free_remote() for cross-thread
|
||
|
|
* - tiny_free_guard() for validation
|
||
|
|
*/
|
||
|
|
static inline void tiny_free_main_entry(void* ptr) {
|
||
|
|
if (__builtin_expect(ptr == NULL, 0)) {
|
||
|
|
return; // NULL is safe
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fast path: lookup class and owner in one step
|
||
|
|
// (This requires pre-computing or O(1) lookup)
|
||
|
|
// For now, we'll delegate to existing tiny_free()
|
||
|
|
// which will be refactored to call tiny_free_fast()
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_FREE_FAST_INC_H
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
### Phase 1.4: hakmem_tiny_free.inc Refactoring (削減)
|
||
|
|
|
||
|
|
**目的**: Free.inc から fast path を抽出し、500行削減
|
||
|
|
|
||
|
|
**手順**:
|
||
|
|
1. Lines 1-558 (Free パス) → tiny_free_fast.inc.h + tiny_free_remote.inc.h へ分割
|
||
|
|
2. Lines 559-998 (SuperSlab Alloc) → tiny_alloc_slow.inc.h へ移動
|
||
|
|
3. Lines 999-1369 (SuperSlab Free) → tiny_free_remote.inc.h + Box 4 へ移動
|
||
|
|
4. Lines 1371-1434 (Query, commented) → 削除
|
||
|
|
5. Lines 1435-1464 (Shutdown) → tiny_lifecycle_shutdown.inc.h へ移動
|
||
|
|
|
||
|
|
**結果**: hakmem_tiny_free.inc: 1470行 → 300行以下
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Priority 2: Implementation Checklist
|
||
|
|
|
||
|
|
### Week 1 Checklist
|
||
|
|
|
||
|
|
- [ ] Box 1: tiny_atomic.h 作成
|
||
|
|
- [ ] Unit tests
|
||
|
|
- [ ] Integration with tiny_free_fast
|
||
|
|
|
||
|
|
- [ ] Box 5.1: tiny_alloc_fast.inc.h 作成
|
||
|
|
- [ ] Pop/push functions
|
||
|
|
- [ ] Unit tests
|
||
|
|
- [ ] Benchmark (cache hit rate)
|
||
|
|
|
||
|
|
- [ ] Box 6.1: tiny_free_fast.inc.h 作成
|
||
|
|
- [ ] Same-thread check
|
||
|
|
- [ ] Cache push
|
||
|
|
- [ ] Unit tests
|
||
|
|
|
||
|
|
- [ ] Extract from hakmem_tiny_free.inc
|
||
|
|
- [ ] Remove fast path (lines 1-558)
|
||
|
|
- [ ] Remove shutdown (lines 1435-1464)
|
||
|
|
- [ ] Verify compilation
|
||
|
|
|
||
|
|
- [ ] Benchmark
|
||
|
|
- [ ] Measure fast path latency (should be <5 cycles)
|
||
|
|
- [ ] Measure cache hit rate (target: >80%)
|
||
|
|
- [ ] Measure throughput (target: >100M ops/sec for 16-64B)
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Priority 2: Remote Queue & Ownership (Week 2)
|
||
|
|
|
||
|
|
### Phase 2.1: tiny_remote_queue.inc.h (新規作成, 300行)
|
||
|
|
|
||
|
|
**出処**: hakmem_tiny_free.inc の remote queue logic を抽出
|
||
|
|
|
||
|
|
**責務**: MPSC remote queue operations
|
||
|
|
|
||
|
|
```c
|
||
|
|
// tiny_remote_queue.inc.h
|
||
|
|
#ifndef HAKMEM_TINY_REMOTE_QUEUE_INC_H
|
||
|
|
#define HAKMEM_TINY_REMOTE_QUEUE_INC_H
|
||
|
|
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// TINY_REMOTE_QUEUE: MPSC stack for cross-thread free
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_remote_queue_push - Push ptr to remote queue
|
||
|
|
*
|
||
|
|
* Single writer (owner) pushes to remote_heads[slab_idx]
|
||
|
|
* Multiple readers (other threads) push to same stack
|
||
|
|
*
|
||
|
|
* MPSC = Many Producers, Single Consumer
|
||
|
|
*/
|
||
|
|
static inline void tiny_remote_queue_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||
|
|
if (__builtin_expect(!ss || slab_idx < 0, 0)) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Link: ptr->next = head
|
||
|
|
uintptr_t cur_head = tiny_atomic_load_acq(&ss->remote_heads[slab_idx]);
|
||
|
|
while (1) {
|
||
|
|
*(uintptr_t*)ptr = cur_head;
|
||
|
|
|
||
|
|
// CAS: if head == cur_head, head = ptr
|
||
|
|
if (tiny_atomic_cas(&ss->remote_heads[slab_idx], &cur_head, (uintptr_t)ptr)) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_remote_queue_pop_all - Pop entire chain from remote queue
|
||
|
|
*
|
||
|
|
* Owner thread pops all pending frees
|
||
|
|
* Returns: head of chain (or NULL if empty)
|
||
|
|
*/
|
||
|
|
static inline void* tiny_remote_queue_pop_all(SuperSlab* ss, int slab_idx) {
|
||
|
|
if (__builtin_expect(!ss || slab_idx < 0, 0)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
uintptr_t head = tiny_atomic_exchange(&ss->remote_heads[slab_idx], 0);
|
||
|
|
return (void*)head;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_remote_queue_contains_guard - Guard check (security)
|
||
|
|
*
|
||
|
|
* Verify ptr is in remote queue chain (sentinel check)
|
||
|
|
*/
|
||
|
|
static inline int tiny_remote_queue_contains_guard(SuperSlab* ss, int slab_idx, void* target) {
|
||
|
|
if (!ss || slab_idx < 0) return 0;
|
||
|
|
|
||
|
|
uintptr_t cur = tiny_atomic_load_acq(&ss->remote_heads[slab_idx]);
|
||
|
|
int limit = 8192; // Prevent infinite loop
|
||
|
|
|
||
|
|
while (cur && limit-- > 0) {
|
||
|
|
if ((void*)cur == target) {
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
cur = *(uintptr_t*)cur;
|
||
|
|
}
|
||
|
|
|
||
|
|
return (limit <= 0) ? 1 : 0; // Fail-safe: treat unbounded as duplicate
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_REMOTE_QUEUE_INC_H
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
### Phase 2.2: tiny_owner.inc.h (新規作成, 120行)
|
||
|
|
|
||
|
|
**責務**: Owner TID management
|
||
|
|
|
||
|
|
```c
|
||
|
|
// tiny_owner.inc.h
|
||
|
|
#ifndef HAKMEM_TINY_OWNER_INC_H
|
||
|
|
#define HAKMEM_TINY_OWNER_INC_H
|
||
|
|
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// TINY_OWNER: Ownership tracking (owner_tid)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_owner_acquire - Acquire ownership of slab
|
||
|
|
*
|
||
|
|
* Call when thread takes ownership of a TinySlab
|
||
|
|
*/
|
||
|
|
static inline void tiny_owner_acquire(TinySlab* slab, uint32_t tid) {
|
||
|
|
if (__builtin_expect(!slab, 0)) return;
|
||
|
|
tiny_atomic_store_rel(&slab->owner_tid, tid);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_owner_release - Release ownership of slab
|
||
|
|
*
|
||
|
|
* Call when thread releases a TinySlab (e.g., spill, shutdown)
|
||
|
|
*/
|
||
|
|
static inline void tiny_owner_release(TinySlab* slab) {
|
||
|
|
if (__builtin_expect(!slab, 0)) return;
|
||
|
|
tiny_atomic_store_rel(&slab->owner_tid, 0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* tiny_owner_check - Check if self owns slab
|
||
|
|
*
|
||
|
|
* Returns: 1 if self owns, 0 otherwise
|
||
|
|
*/
|
||
|
|
static inline int tiny_owner_check(TinySlab* slab, uint32_t self_tid) {
|
||
|
|
if (__builtin_expect(!slab, 0)) return 0;
|
||
|
|
return tiny_atomic_load_acq(&slab->owner_tid) == self_tid;
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_OWNER_INC_H
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Testing Framework
|
||
|
|
|
||
|
|
### Unit Test Template
|
||
|
|
|
||
|
|
```c
|
||
|
|
// tests/test_tiny_<component>.c
|
||
|
|
|
||
|
|
#include <assert.h>
|
||
|
|
#include "hakmem.h"
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
#include "tiny_alloc_fast.inc.h"
|
||
|
|
#include "tiny_free_fast.inc.h"
|
||
|
|
|
||
|
|
static void test_<function>() {
|
||
|
|
// Setup
|
||
|
|
// Action
|
||
|
|
// Assert
|
||
|
|
printf("✅ test_<function> passed\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
int main() {
|
||
|
|
test_<function>();
|
||
|
|
// ... more tests
|
||
|
|
printf("\n✨ All tests passed!\n");
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Integration Test
|
||
|
|
|
||
|
|
```c
|
||
|
|
// tests/test_tiny_alloc_free_cycle.c
|
||
|
|
|
||
|
|
void test_alloc_free_single_thread_100k() {
|
||
|
|
void* ptrs[100];
|
||
|
|
for (int i = 0; i < 100; i++) {
|
||
|
|
ptrs[i] = hak_tiny_alloc(16);
|
||
|
|
assert(ptrs[i] != NULL);
|
||
|
|
}
|
||
|
|
|
||
|
|
for (int i = 0; i < 100; i++) {
|
||
|
|
hak_tiny_free(ptrs[i]);
|
||
|
|
}
|
||
|
|
|
||
|
|
printf("✅ test_alloc_free_single_thread_100k passed\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
void test_alloc_free_cross_thread() {
|
||
|
|
void* ptrs[100];
|
||
|
|
|
||
|
|
// Thread A: allocate
|
||
|
|
pthread_t tid;
|
||
|
|
pthread_create(&tid, NULL, allocator_thread, ptrs);
|
||
|
|
|
||
|
|
// Main: free (cross-thread)
|
||
|
|
for (int i = 0; i < 100; i++) {
|
||
|
|
sleep(10); // Wait for allocs
|
||
|
|
hak_tiny_free(ptrs[i]);
|
||
|
|
}
|
||
|
|
|
||
|
|
pthread_join(tid, NULL);
|
||
|
|
printf("✅ test_alloc_free_cross_thread passed\n");
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Performance Validation
|
||
|
|
|
||
|
|
### Assembly Check (fast path)
|
||
|
|
|
||
|
|
```bash
|
||
|
|
# Compile with -S to generate assembly
|
||
|
|
gcc -S -O3 -c core/hakmem_tiny.c -o /tmp/tiny.s
|
||
|
|
|
||
|
|
# Count instructions in fast path
|
||
|
|
grep -A20 "tiny_alloc_fast_pop:" /tmp/tiny.s | wc -l
|
||
|
|
# Expected: <= 8 instructions (3-4 ideal)
|
||
|
|
|
||
|
|
# Check branch mispredicts
|
||
|
|
grep "likely\|unlikely" /tmp/tiny.s | wc -l
|
||
|
|
# Expected: cache hits have likely, misses have unlikely
|
||
|
|
```
|
||
|
|
|
||
|
|
### Benchmark (larson)
|
||
|
|
|
||
|
|
```bash
|
||
|
|
# Baseline
|
||
|
|
./larson_hakmem 16 1 1000 1000 0
|
||
|
|
|
||
|
|
# With new fast path
|
||
|
|
./larson_hakmem 16 1 1000 1000 0
|
||
|
|
|
||
|
|
# Expected improvement: +10-15% throughput
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Compilation & Integration
|
||
|
|
|
||
|
|
### Makefile Changes
|
||
|
|
|
||
|
|
```makefile
|
||
|
|
# Add new files to dependencies
|
||
|
|
TINY_HEADERS = \
|
||
|
|
core/tiny_atomic.h \
|
||
|
|
core/tiny_alloc_fast.inc.h \
|
||
|
|
core/tiny_free_fast.inc.h \
|
||
|
|
core/tiny_owner.inc.h \
|
||
|
|
core/tiny_remote_queue.inc.h
|
||
|
|
|
||
|
|
# Rebuild if any header changes
|
||
|
|
libhakmem.so: $(TINY_HEADERS) core/hakmem_tiny.c
|
||
|
|
```
|
||
|
|
|
||
|
|
### Include Order (hakmem_tiny.c)
|
||
|
|
|
||
|
|
```c
|
||
|
|
// At the top of hakmem_tiny.c, after hakmem_tiny_config.h:
|
||
|
|
|
||
|
|
// ============================================================
|
||
|
|
// LAYER 0: Atomic + Ownership (lowest)
|
||
|
|
// ============================================================
|
||
|
|
#include "tiny_atomic.h"
|
||
|
|
#include "tiny_owner.inc.h"
|
||
|
|
#include "slab_handle.h"
|
||
|
|
|
||
|
|
// ... rest of includes
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Rollback Plan
|
||
|
|
|
||
|
|
If performance regresses or compilation fails:
|
||
|
|
|
||
|
|
1. **Keep old files**: hakmem_tiny_free.inc is not deleted, only refactored
|
||
|
|
2. **Git revert**: Can revert specific commits per Box
|
||
|
|
3. **Feature flags**: Add HAKMEM_TINY_NEW_FAST_PATH=0 to disable new code path
|
||
|
|
4. **Benchmark first**: Always run larson before and after each change
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Success Metrics
|
||
|
|
|
||
|
|
### Performance
|
||
|
|
- [ ] Fast path: 3-4 instructions (assembly review)
|
||
|
|
- [ ] Throughput: +10-15% on 16-64B allocations
|
||
|
|
- [ ] Cache hit rate: >80%
|
||
|
|
|
||
|
|
### Code Quality
|
||
|
|
- [ ] All files <= 500 lines
|
||
|
|
- [ ] Zero cyclic dependencies (verified by include analysis)
|
||
|
|
- [ ] No compilation warnings
|
||
|
|
|
||
|
|
### Testing
|
||
|
|
- [ ] Unit tests: 100% pass
|
||
|
|
- [ ] Integration tests: 100% pass
|
||
|
|
- [ ] Larson benchmark: baseline + 10-15%
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Contact & Questions
|
||
|
|
|
||
|
|
Refer to REFACTOR_PLAN.md for high-level strategy and timeline.
|
||
|
|
|
||
|
|
For specific implementation details, see the corresponding .inc.h files.
|
||
|
|
|