## Phase 2-1: Lane Classification Box (Single Source of Truth)
### New Module: hak_lane_classify.inc.h
- Centralized size-to-lane mapping with unified boundary definitions
- Lane architecture:
- LANE_TINY: [0, 1024B] SuperSlab (unchanged)
- LANE_POOL: [1025, 52KB] Pool per-thread (extended!)
- LANE_ACE: [52KB, 2MB] ACE learning
- LANE_HUGE: [2MB+] mmap direct
- Key invariant: POOL_MIN = TINY_MAX + 1 (no gaps)
### Fixed: Tiny/Pool Boundary Mismatch
- Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!)
- After: Both reference LANE_TINY_MAX=1024 (authoritative)
- Impact: Eliminates 1025-2047B "unmanaged zone" causing libc fragmentation
### Updated Files
- core/hakmem_tiny.h: Use LANE_TINY_MAX, fix sizes[7]=1024 (was 2047)
- core/hakmem_pool.h: Use POOL_MIN_REQUEST_SIZE=1025 (was 2048)
- core/box/hak_alloc_api.inc.h: Lane-based routing (HAK_LANE_IS_*)
## jemalloc Block Bug Fix
### Root Cause
- g_jemalloc_loaded initialized to -1 (unknown)
- Condition `if (block && g_jemalloc_loaded)` treated -1 as true
- Result: ALL allocations fallback to libc (even when jemalloc not loaded!)
### Fix
- Change condition to `g_jemalloc_loaded > 0`
- Only fallback when jemalloc is ACTUALLY loaded
- Applied to: malloc/free/calloc/realloc
### Impact
- Before: 100% libc fallback (jemalloc block false positive)
- After: Only genuine cases fallback (init_wait, lockdepth, etc.)
## Fallback Diagnostics (ChatGPT contribution)
### New Feature: HAKMEM_WRAP_DIAG
- ENV flag to enable fallback logging
- Reason-specific counters (init_wait, jemalloc_block, lockdepth, etc.)
- First 4 occurrences logged per reason
- Helps identify unwanted fallback paths
### Implementation
- core/box/wrapper_env_box.{c,h}: ENV cache + DIAG flag
- core/box/hak_wrappers.inc.h: wrapper_record_fallback() calls
## Verification
### Fallback Reduction
- Before fix: [wrap] libc malloc: jemalloc block (100% fallback)
- After fix: Only init_wait + lockdepth (expected, minimal)
### Known Issue
- Tiny allocator OOM (size=8) still crashes
- This is a pre-existing bug, unrelated to Phase 2-1
- Was hidden by jemalloc block false positive
- Will be investigated separately
## Performance Impact
### sh8bench 8 threads
- Phase 1-1: 15秒
- Phase 2-1: 14秒 (~7% improvement)
### Note
- True hakmem performance now measurable (no more 100% fallback)
- Tiny OOM prevents full benchmark completion
- Next: Fix Tiny allocator for complete evaluation
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
421 lines
17 KiB
C
421 lines
17 KiB
C
#ifndef HAKMEM_TINY_H
|
|
#define HAKMEM_TINY_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include "hakmem_build_flags.h"
|
|
#include "hakmem_trace.h" // Optional USDT (perf) tracepoints
|
|
#include <pthread.h>
|
|
#include <stdatomic.h>
|
|
|
|
// Include page mini-magazine module (Phase 1: Hybrid optimization)
|
|
#include "hakmem_tiny_mini_mag.h"
|
|
|
|
// Phase 2: Lane Classification Box (Single Source of Truth for boundaries)
|
|
#include "box/hak_lane_classify.inc.h"
|
|
|
|
// Forward declaration for initialization guard
|
|
int hak_is_initializing(void);
|
|
|
|
// Phase 6.12: Tiny Pool - Slab Allocator for ≤1KB allocations
|
|
// 8 size classes: 8B, 16B, 32B, 64B, 128B, 256B, 512B, 1KB
|
|
|
|
// ============================================================================
|
|
// Configuration
|
|
// ============================================================================
|
|
|
|
#define TINY_NUM_CLASSES 8
|
|
#define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab
|
|
|
|
// Phase 2 FIX: TINY_MAX_SIZE now references LANE_TINY_MAX (Single Source of Truth)
|
|
// Previously: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!)
|
|
// Now: Both reference LANE_TINY_MAX (1024) from hak_lane_classify.inc.h
|
|
#undef TINY_MAX_SIZE // Remove compatibility wrapper if defined
|
|
#define TINY_MAX_SIZE LANE_TINY_MAX // = 1024 (authoritative)
|
|
|
|
// Phase 16: Dynamic Tiny max size control (ENV: HAKMEM_TINY_MAX_CLASS)
|
|
// Strategy: Reduce Tiny coverage to ~256B, delegate 512/1024B to Pool
|
|
// ENV values:
|
|
// HAKMEM_TINY_MAX_CLASS=5 → Tiny handles up to 255B (C0-C5)
|
|
// HAKMEM_TINY_MAX_CLASS=7 → Tiny handles up to 1024B (C0-C7, default)
|
|
// Phase 2 FIX: sizes[7] = 1024 (was 2047, caused boundary mismatch!)
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
extern bool smallmid_is_enabled(void);
|
|
|
|
static inline size_t tiny_get_max_size(void) {
|
|
static size_t g_cached = 0;
|
|
if (__builtin_expect(g_cached == 0, 0)) {
|
|
const char* env = getenv("HAKMEM_TINY_MAX_CLASS");
|
|
int max_class = 7;
|
|
if (env && *env) {
|
|
int parsed = atoi(env);
|
|
if (parsed >= 0 && parsed < TINY_NUM_CLASSES) max_class = parsed;
|
|
}
|
|
if (smallmid_is_enabled() && max_class > 5) max_class = 5;
|
|
// Phase 2 FIX: sizes[7] = LANE_TINY_MAX (was 2047!)
|
|
// This ensures tiny_get_max_size() <= LANE_TINY_MAX always
|
|
static const size_t sizes[8] = {7, 15, 31, 63, 127, 255, 511, LANE_TINY_MAX};
|
|
g_cached = sizes[max_class];
|
|
}
|
|
return g_cached;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Phase 3d-B: TLS Cache Merge - Unified TLS SLL Structure
|
|
// ============================================================================
|
|
//
|
|
// Goal: Improve L1D cache hit rate by merging head+count into same struct.
|
|
//
|
|
// OLD (cache line split):
|
|
// __thread void* g_tls_sll_head[8]; // 64 bytes (cache line 0)
|
|
// __thread uint32_t g_tls_sll_count[8]; // 32 bytes (cache line 1)
|
|
// → 2 L1D loads per operation (head from line 0, count from line 1)
|
|
//
|
|
// NEW (unified):
|
|
// __thread TinyTLSSLL g_tls_sll[8]; // 128 bytes = 2 cache lines
|
|
// → 1 L1D load per operation (head+count in same 16B struct)
|
|
//
|
|
// Expected: +12-18% improvement from cache locality
|
|
//
|
|
#include "box/ptr_type_box.h" // Phase 10: Type safety for SLL head
|
|
|
|
typedef struct {
|
|
hak_base_ptr_t head; // SLL head pointer (8 bytes)
|
|
uint32_t count; // Number of elements in SLL (4 bytes)
|
|
uint32_t _pad; // Padding to 16 bytes for cache alignment (4 bytes)
|
|
} TinyTLSSLL;
|
|
|
|
// ============================================================================
|
|
// Size Classes
|
|
// ============================================================================
|
|
|
|
// Size class table (branchless lookup)
|
|
// Note: Definition is in hakmem_tiny.c to avoid multiple definition errors
|
|
// Declaration is in hakmem_tiny_config.h as: extern const size_t g_tiny_class_sizes[TINY_NUM_CLASSES];
|
|
// Box 3 (tiny_box_geometry.h) uses this via hakmem_tiny_config.h
|
|
// (Definition removed from header - see hakmem_tiny.c)
|
|
|
|
// Full LUT (1..2048) for branchless size-to-class mapping (index by size).
|
|
// Phase C7-UPGRADE: Expanded from 1025 -> 2049 to support 2048B stride (C7).
|
|
// Memory cost ~2KB. Zero hot-path arithmetic for all Tiny sizes.
|
|
// Generate repeated values via helper macros to keep the source compact.
|
|
#define HAK_R1(x) x
|
|
#define HAK_R2(x) HAK_R1(x), HAK_R1(x)
|
|
#define HAK_R4(x) HAK_R2(x), HAK_R2(x)
|
|
#define HAK_R8(x) HAK_R4(x), HAK_R4(x)
|
|
#define HAK_R16(x) HAK_R8(x), HAK_R8(x)
|
|
#define HAK_R32(x) HAK_R16(x), HAK_R16(x)
|
|
#define HAK_R64(x) HAK_R32(x), HAK_R32(x)
|
|
#define HAK_R128(x) HAK_R64(x), HAK_R64(x)
|
|
#define HAK_R256(x) HAK_R128(x), HAK_R128(x)
|
|
#define HAK_R512(x) HAK_R256(x), HAK_R256(x)
|
|
#define HAK_R1024(x) HAK_R512(x), HAK_R512(x)
|
|
|
|
static const int8_t g_size_to_class_lut_2k[2049] = {
|
|
-1, // index 0: invalid
|
|
HAK_R8(0), // 1..8 -> class 0
|
|
HAK_R8(1), // 9..16 -> class 1
|
|
HAK_R16(2), // 17..32 -> class 2
|
|
HAK_R32(3), // 33..64 -> class 3
|
|
HAK_R64(4), // 65..128 -> class 4
|
|
HAK_R128(5), // 129..256 -> class 5
|
|
HAK_R256(6), // 257..512 -> class 6
|
|
HAK_R1024(7), // 513..1536 -> class 7 (1024 entries)
|
|
HAK_R512(7), // 1537..2048 -> class 7 (512 entries)
|
|
};
|
|
|
|
#undef HAK_R1024
|
|
#undef HAK_R512
|
|
#undef HAK_R256
|
|
#undef HAK_R128
|
|
#undef HAK_R64
|
|
#undef HAK_R32
|
|
#undef HAK_R16
|
|
#undef HAK_R8
|
|
#undef HAK_R4
|
|
#undef HAK_R2
|
|
#undef HAK_R1
|
|
|
|
// Blocks per slab for each class
|
|
static const uint16_t g_tiny_blocks_per_slab[TINY_NUM_CLASSES] = {
|
|
8192, // Class 0: 64KB / 8B = 8192 blocks
|
|
4096, // Class 1: 64KB / 16B = 4096 blocks
|
|
2048, // Class 2: 64KB / 32B = 2048 blocks
|
|
1024, // Class 3: 64KB / 64B = 1024 blocks
|
|
512, // Class 4: 64KB / 128B = 512 blocks
|
|
256, // Class 5: 64KB / 256B = 256 blocks
|
|
128, // Class 6: 64KB / 512B = 128 blocks
|
|
32 // Class 7: 64KB / 2048B = 32 blocks
|
|
};
|
|
|
|
// Bitmap size (uint64_t words) for each class
|
|
static const uint8_t g_tiny_bitmap_words[TINY_NUM_CLASSES] = {
|
|
128, // Class 0: 8192 blocks / 64 = 128 words
|
|
64, // Class 1: 4096 blocks / 64 = 64 words
|
|
32, // Class 2: 2048 blocks / 64 = 32 words
|
|
16, // Class 3: 1024 blocks / 64 = 16 words
|
|
8, // Class 4: 512 blocks / 64 = 8 words
|
|
4, // Class 5: 256 blocks / 64 = 4 words
|
|
2, // Class 6: 128 blocks / 64 = 2 words
|
|
1 // Class 7: 64 blocks / 64 = 1 word
|
|
};
|
|
|
|
// ============================================================================
|
|
// Data Structures
|
|
// ============================================================================
|
|
|
|
// Forward declaration
|
|
typedef struct TinySlab TinySlab;
|
|
|
|
// Step 2: Slab Registry (Hash Table for O(1) lookup)
|
|
#define SLAB_REGISTRY_SIZE 1024
|
|
#define SLAB_REGISTRY_MASK (SLAB_REGISTRY_SIZE - 1)
|
|
#define SLAB_REGISTRY_MAX_PROBE 8
|
|
|
|
typedef struct {
|
|
uintptr_t slab_base; // 64KB aligned base address (0 = empty slot)
|
|
_Atomic(TinySlab*) owner; // Atomic pointer to TinySlab metadata (MT-safe)
|
|
} SlabRegistryEntry;
|
|
|
|
// Global registry (extern for access from multiple translation units)
|
|
extern SlabRegistryEntry g_slab_registry[SLAB_REGISTRY_SIZE];
|
|
|
|
// Tiny Pool initialization flag (extern for inline function access)
|
|
extern int g_tiny_initialized;
|
|
|
|
// Adaptive CAS: Active thread counter (for single-threaded optimization)
|
|
extern _Atomic uint32_t g_hakmem_active_threads;
|
|
|
|
// Adaptive CAS: Thread registration (called on first allocation)
|
|
void hakmem_thread_register(void);
|
|
|
|
// Per-class locks to protect slab lists and bitmaps (padded to avoid false sharing)
|
|
typedef struct __attribute__((aligned(64))) { pthread_mutex_t m; char _pad[64]; } PaddedLock;
|
|
extern PaddedLock g_tiny_class_locks[TINY_NUM_CLASSES];
|
|
|
|
// Slab header (one per 64KB slab)
|
|
typedef struct TinySlab {
|
|
void* base; // Base address (64KB aligned)
|
|
uint64_t* bitmap; // Free block bitmap (dynamic size)
|
|
uint16_t free_count; // Number of free blocks
|
|
uint16_t total_count; // Total blocks in slab
|
|
uint8_t class_idx; // Size class index (0-7)
|
|
uint8_t _padding[3];
|
|
struct TinySlab* next; // Next slab in list
|
|
// MPSC remote-free stack head (lock-free). Stores user ptrs; next is embedded in block.
|
|
atomic_uintptr_t remote_head;
|
|
// Approximate count of pending remote frees (for drain thresholding)
|
|
atomic_uint remote_count;
|
|
// Targeted remote-drain queue linkage and state (for BG drain targeting)
|
|
struct TinySlab* remote_q_next; // Intrusive next pointer for target stack
|
|
atomic_uint remote_queued; // 0=not enqueued, 1=enqueued (CAS guarded)
|
|
// Owning thread (for remote detection). Allocations from this thread use TLS fast path.
|
|
pthread_t owner_tid;
|
|
// Hint for next scan start (reduces bitmap word scanning)
|
|
uint16_t hint_word;
|
|
// Summary bitmap (2nd level): per 64-word group, bit=1 if the group has any free block
|
|
uint8_t summary_words; // number of summary words (=(bitmap_words+63)/64)
|
|
uint8_t _pad_sum[1];
|
|
uint64_t* summary; // length = summary_words
|
|
|
|
// Phase 1: Page Mini-Magazine (Hybrid bitmap+free-list optimization)
|
|
// Fast LIFO cache (16-32 items) for O(1) allocation without bitmap scan
|
|
// Cost: 1-2 ns (vs 5-6 ns bitmap scan)
|
|
PageMiniMag mini_mag; // LIFO free-list cache
|
|
} TinySlab;
|
|
|
|
// Global Tiny Pool state
|
|
typedef struct {
|
|
TinySlab* free_slabs[TINY_NUM_CLASSES]; // Slabs with free blocks
|
|
TinySlab* full_slabs[TINY_NUM_CLASSES]; // Full slabs (no free blocks)
|
|
uint64_t alloc_count[TINY_NUM_CLASSES]; // Allocation count per class
|
|
uint64_t free_count[TINY_NUM_CLASSES]; // Free count per class
|
|
uint64_t slab_count[TINY_NUM_CLASSES]; // Total slabs per class
|
|
} TinyPool;
|
|
|
|
// Global pool instance (defined in hakmem_tiny.c)
|
|
extern TinyPool g_tiny_pool;
|
|
|
|
// ============================================================================
|
|
// API Functions
|
|
// ============================================================================
|
|
|
|
// Initialize Tiny Pool
|
|
void hak_tiny_init(void);
|
|
|
|
// Allocate from Tiny Pool (returns NULL if size > 1KB)
|
|
void* hak_tiny_alloc(size_t size);
|
|
|
|
// Free to Tiny Pool (no-op if ptr is not managed by Tiny Pool)
|
|
void hak_tiny_free(void* ptr);
|
|
|
|
// Phase 6.12.1: Free with pre-calculated slab (avoids duplicate owner_slab lookup)
|
|
void hak_tiny_free_with_slab(void* ptr, TinySlab* slab);
|
|
|
|
// Check if pointer is managed by Tiny Pool
|
|
int hak_tiny_is_managed(void* ptr);
|
|
int hak_tiny_is_managed_superslab(void* ptr);
|
|
|
|
// Return the usable size for a Tiny-managed pointer (0 if unknown/not tiny).
|
|
// For SuperSlab-backed blocks, uses size class from the owning SuperSlab.
|
|
// For TinySlab-backed blocks, uses class_idx from the owning slab.
|
|
size_t hak_tiny_usable_size(void* ptr);
|
|
|
|
// Get statistics
|
|
void hak_tiny_get_stats(uint64_t* alloc_count, uint64_t* free_count, uint64_t* slab_count);
|
|
|
|
// Print statistics (debug)
|
|
void hak_tiny_print_stats(void);
|
|
|
|
// Phase 7.7: Magazine flush API (reduce memory footprint)
|
|
// Flush Magazine cache to freelists, enabling empty SuperSlab detection
|
|
void hak_tiny_magazine_flush(int class_idx);
|
|
void hak_tiny_magazine_flush_all(void);
|
|
|
|
// Trim empty Tiny slabs by releasing fully-free slabs back to the system.
|
|
// Safe to call anytime; holds per-class locks while trimming.
|
|
void hak_tiny_trim(void);
|
|
|
|
// Optional shutdown hook for Tiny subsystem.
|
|
// Stops background threads (e.g., Deferred Intelligence) and performs
|
|
// any best-effort cleanup needed during process shutdown.
|
|
void hak_tiny_shutdown(void);
|
|
|
|
// Phase 8.2: Memory profiling (toggle with HAKMEM_DEBUG_MEMORY)
|
|
// Print detailed breakdown of memory usage by component
|
|
void hak_tiny_print_memory_profile(void);
|
|
|
|
// Debug: dump Ultra Tiny counters (pop hits/refills/resets)
|
|
void hak_tiny_ultra_debug_dump(void);
|
|
void hak_tiny_path_debug_dump(void);
|
|
|
|
// ============================================================================
|
|
// ACE Learning Layer: Runtime parameter adjustment
|
|
// ============================================================================
|
|
|
|
// Exported for ACE controller access
|
|
extern int g_remote_drain_thresh_per_class[TINY_NUM_CLASSES];
|
|
|
|
// Set remote drain threshold for a specific size class
|
|
void hkm_ace_set_drain_threshold(int class_idx, uint32_t threshold);
|
|
|
|
// ============================================================================
|
|
// Internal Helpers (branchless size-to-class)
|
|
// ============================================================================
|
|
|
|
// Convert size to class index (branchless lookup)
|
|
// Phase C7-UPGRADE: ALL classes have 1-byte header
|
|
// C7 max usable: 2047B (2048B total with header)
|
|
// malloc(2048+) → routed to Mid allocator
|
|
static inline int hak_tiny_size_to_class(size_t size) {
|
|
if (size == 0) return -1;
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
// Phase C7-UPGRADE: ALL classes have 1-byte header
|
|
// Box: [Header 1B][Data NB] = (N+1) bytes total
|
|
// g_tiny_class_sizes stores TOTAL size, so we need size+1 bytes
|
|
// User requests N bytes → need (N+1) total → look up class with stride ≥ (N+1)
|
|
// Max usable: 2047B (C7 stride=2048B)
|
|
if (size > 2047) return -1; // 2048+ → Mid allocator
|
|
// Find smallest class where stride ≥ (size + 1)
|
|
// LUT maps total_size → class, so lookup (size + 1) to find class with that stride
|
|
size_t needed = size + 1; // total bytes needed (data + header)
|
|
if (needed > 2048) return -1;
|
|
return g_size_to_class_lut_2k[needed];
|
|
#else
|
|
if (size > 1024) return -1;
|
|
return g_size_to_class_lut_2k[size]; // 1..1024
|
|
#endif
|
|
}
|
|
|
|
// ============================================================================
|
|
// Phase 6.12.1: O(1) Slab Lookup (Embedded Metadata)
|
|
// ============================================================================
|
|
|
|
// Phase 6.12.1: Find slab owner by pointer
|
|
// NOTE: Reverted from O(1) embedded metadata to O(N) linear search for safety
|
|
// Embedded metadata requires dereferencing potentially unmapped memory
|
|
// This is still faster than before because Option C eliminates duplicate calls
|
|
TinySlab* hak_tiny_owner_slab(void* ptr);
|
|
|
|
// ============================================================================
|
|
// Bitmap Operations (inline for speed)
|
|
// ============================================================================
|
|
|
|
// Set block as used
|
|
static inline void hak_tiny_set_used(TinySlab* slab, int block_idx) {
|
|
int word_idx = block_idx / 64;
|
|
int bit_idx = block_idx % 64;
|
|
uint64_t v = slab->bitmap[word_idx] | (1ULL << bit_idx);
|
|
slab->bitmap[word_idx] = v;
|
|
// update summary: set to 1 if any free bit remains, else 0
|
|
int sum_word = word_idx / 64;
|
|
int sum_bit = word_idx % 64;
|
|
uint64_t has_free = ~v; // any zero in word means free
|
|
if (has_free != 0) {
|
|
slab->summary[sum_word] |= (1ULL << sum_bit);
|
|
} else {
|
|
slab->summary[sum_word] &= ~(1ULL << sum_bit);
|
|
}
|
|
}
|
|
|
|
// Set block as free
|
|
static inline void hak_tiny_set_free(TinySlab* slab, int block_idx) {
|
|
int word_idx = block_idx / 64;
|
|
int bit_idx = block_idx % 64;
|
|
uint64_t v = slab->bitmap[word_idx] & ~(1ULL << bit_idx);
|
|
slab->bitmap[word_idx] = v;
|
|
// update summary: this word now certainly has a free bit
|
|
int sum_word = word_idx / 64;
|
|
int sum_bit = word_idx % 64;
|
|
slab->summary[sum_word] |= (1ULL << sum_bit);
|
|
}
|
|
|
|
// Check if block is used
|
|
static inline int hak_tiny_is_used(TinySlab* slab, int block_idx) {
|
|
int word_idx = block_idx / 64;
|
|
int bit_idx = block_idx % 64;
|
|
return (slab->bitmap[word_idx] & (1ULL << bit_idx)) != 0;
|
|
}
|
|
|
|
// Find first free block (returns -1 if none)
|
|
static inline int hak_tiny_find_free_block(TinySlab* slab) {
|
|
// Trace bitmap scan attempts
|
|
HAK_TP1(bitmap_scan, slab->class_idx);
|
|
const int bw = g_tiny_bitmap_words[slab->class_idx];
|
|
const int sw = slab->summary_words;
|
|
if (bw <= 0 || sw <= 0) return -1;
|
|
|
|
int start_word = slab->hint_word % bw;
|
|
int start_sw = start_word / 64;
|
|
int start_sb = start_word % 64;
|
|
|
|
for (int k = 0; k < sw; k++) {
|
|
int idx = start_sw + k;
|
|
if (idx >= sw) idx -= sw;
|
|
uint64_t bits = slab->summary[idx];
|
|
// mask low bits on first iteration
|
|
if (k == 0) {
|
|
bits &= (~0ULL) << start_sb;
|
|
}
|
|
// mask out-of-range bits in last summary word
|
|
if (idx == sw - 1 && (bw % 64) != 0) {
|
|
uint64_t mask = (bw % 64) == 64 ? ~0ULL : ((1ULL << (bw % 64)) - 1ULL);
|
|
bits &= mask;
|
|
}
|
|
if (bits == 0) continue;
|
|
int woff = __builtin_ctzll(bits); // word offset within this summary word
|
|
int word_idx = idx * 64 + woff; // bitmap word index
|
|
if (word_idx >= bw) continue; // safety
|
|
uint64_t used = slab->bitmap[word_idx];
|
|
uint64_t free_bits = ~used;
|
|
if (free_bits == 0) continue; // should not happen if summary correct
|
|
int bit_idx = __builtin_ctzll(free_bits); // first free block within word
|
|
slab->hint_word = (uint16_t)((word_idx + 1) % bw);
|
|
return word_idx * 64 + bit_idx;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
#endif // HAKMEM_TINY_H
|