Tiny: Enable P0 batch refill by default + docs and task update
Summary - Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1. - Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist' to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking). - Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain), HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta). - Keep linear carve fail-fast guards across simple/general/TLS-bump paths. Perf (1T, 100k×256B) - P0 OFF: ~2.73M ops/s (stable) - P0 ON (no drain): ~2.45M ops/s - P0 ON (normal drain): ~2.76M ops/s (fastest) Known - Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used balance around batch freelist splice and remote drain splice. Docs - Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes). - Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.
This commit is contained in:
@ -24,6 +24,7 @@
|
||||
#include "hakmem_tiny_tls_list.h"
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// External declarations for TLS variables and globals
|
||||
extern int g_fast_enable;
|
||||
@ -174,16 +175,44 @@ static inline int quick_refill_from_mag(int class_idx) {
|
||||
return take;
|
||||
}
|
||||
|
||||
// P0 optimization: Batch refill (enabled by default, set HAKMEM_TINY_P0_BATCH_REFILL=0 to disable)
|
||||
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
||||
#define HAKMEM_TINY_P0_BATCH_REFILL 1 // Enable P0 by default (verified +5.16% improvement)
|
||||
#endif
|
||||
|
||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||
// P0 optimization: Batch refill(A/Bテスト用ランタイムゲートで呼び分け)
|
||||
// - デフォルトはOFF(環境変数 HAKMEM_TINY_P0_ENABLE=1 で有効化)
|
||||
#include "hakmem_tiny_refill_p0.inc.h"
|
||||
// Alias for compatibility
|
||||
#define sll_refill_small_from_ss sll_refill_batch_from_ss
|
||||
|
||||
// Debug helper: verify linear carve stays within slab usable bytes (Fail-Fast)
|
||||
static inline int tiny_linear_carve_guard(TinyTLSSlab* tls,
|
||||
TinySlabMeta* meta,
|
||||
size_t stride,
|
||||
uint32_t reserve,
|
||||
const char* stage) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
(void)tls; (void)meta; (void)stride; (void)reserve; (void)stage;
|
||||
return 1;
|
||||
#else
|
||||
if (!tls) return 0;
|
||||
size_t usable = (tls->slab_idx == 0)
|
||||
? SUPERSLAB_SLAB0_USABLE_SIZE
|
||||
: SUPERSLAB_SLAB_USABLE_SIZE;
|
||||
size_t needed = ((size_t)meta->carved + (size_t)reserve) * stride;
|
||||
if (__builtin_expect(needed > usable, 0)) {
|
||||
fprintf(stderr,
|
||||
"[LINEAR_GUARD] stage=%s cls=%d slab=%d carved=%u used=%u cap=%u "
|
||||
"stride=%zu reserve=%u needed=%zu usable=%zu\n",
|
||||
stage ? stage : "linear",
|
||||
tls->ss ? tls->ss->size_class : -1,
|
||||
tls->slab_idx,
|
||||
meta ? meta->carved : 0u,
|
||||
meta ? meta->used : 0u,
|
||||
meta ? meta->capacity : 0u,
|
||||
stride,
|
||||
reserve,
|
||||
needed,
|
||||
usable);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Refill a few nodes directly into TLS SLL from TLS-cached SuperSlab (owner-thread only)
|
||||
// Note: If HAKMEM_TINY_P0_BATCH_REFILL is enabled, sll_refill_batch_from_ss is used instead
|
||||
@ -196,6 +225,19 @@ __attribute__((noinline)) int sll_refill_small_from_ss(int class_idx, int max_ta
|
||||
static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
#endif
|
||||
if (!g_use_superslab || max_take <= 0) return 0;
|
||||
// ランタイムA/B: P0を有効化している場合はバッチrefillへ委譲
|
||||
do {
|
||||
// 既定: ON(HAKMEM_TINY_P0_ENABLE=0 で明示的にOFF)
|
||||
static int g_p0_enable = -1;
|
||||
if (__builtin_expect(g_p0_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_ENABLE");
|
||||
// 環境変数が'0'のときだけ無効、それ以外(未設定含む)は有効
|
||||
g_p0_enable = (e && *e && *e == '0') ? 0 : 1;
|
||||
}
|
||||
if (__builtin_expect(g_p0_enable, 1)) {
|
||||
return sll_refill_batch_from_ss(class_idx, max_take);
|
||||
}
|
||||
} while (0);
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
if (!tls->ss) {
|
||||
// Try to obtain a SuperSlab for this class
|
||||
@ -220,9 +262,13 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
size_t bs = g_tiny_class_sizes[class_idx] + ((class_idx != 7) ? 1 : 0);
|
||||
for (; taken < take;) {
|
||||
// Linear first (LIKELY for class7)
|
||||
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
|
||||
if (__builtin_expect(meta->freelist == NULL && meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "simple"), 0)) {
|
||||
abort();
|
||||
}
|
||||
uint8_t* base = tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
void* p = (void*)(base + ((size_t)meta->used * bs));
|
||||
void* p = (void*)(base + ((size_t)meta->carved * bs));
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
*(void**)p = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = p;
|
||||
@ -264,9 +310,13 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
p = meta->freelist; meta->freelist = *(void**)p; meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
ss_active_inc(tls->ss);
|
||||
} else if (__builtin_expect(meta->used < meta->capacity, 1)) {
|
||||
} else if (__builtin_expect(meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "general"), 0)) {
|
||||
abort();
|
||||
}
|
||||
void* slab_start = tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
p = (char*)slab_start + ((size_t)meta->used * bs);
|
||||
p = (char*)slab_start + ((size_t)meta->carved * bs);
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
ss_active_inc(tls->ss);
|
||||
@ -311,24 +361,29 @@ static inline void* superslab_tls_bump_fast(int class_idx) {
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
if (!meta || meta->freelist != NULL) return NULL; // linear mode only
|
||||
uint16_t used = meta->used;
|
||||
// Use monotonic 'carved' for window arming
|
||||
uint16_t carved = meta->carved;
|
||||
uint16_t cap = meta->capacity;
|
||||
if (used >= cap) return NULL;
|
||||
uint32_t avail = (uint32_t)cap - (uint32_t)used;
|
||||
if (carved >= cap) return NULL;
|
||||
uint32_t avail = (uint32_t)cap - (uint32_t)carved;
|
||||
uint32_t chunk = (g_bump_chunk > 0 ? (uint32_t)g_bump_chunk : 1u);
|
||||
if (chunk > avail) chunk = avail;
|
||||
size_t bs = g_tiny_class_sizes[tls->ss->size_class] + ((tls->ss->size_class != 7) ? 1 : 0);
|
||||
uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
uint8_t* start = base + ((size_t)used * bs);
|
||||
// Reserve the chunk once in header (keeps remote-free accounting valid)
|
||||
meta->used = (uint16_t)(used + (uint16_t)chunk);
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, chunk, "tls_bump"), 0)) {
|
||||
abort();
|
||||
}
|
||||
uint8_t* start = base + ((size_t)carved * bs);
|
||||
// Reserve the chunk: advance carved and used accordingly
|
||||
meta->carved = (uint16_t)(carved + (uint16_t)chunk);
|
||||
meta->used = (uint16_t)(meta->used + (uint16_t)chunk);
|
||||
// Account all reserved blocks as active in SuperSlab
|
||||
ss_active_add(tls->ss, chunk);
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
g_bump_arms[class_idx]++;
|
||||
#endif
|
||||
g_tls_bcur[class_idx] = start + bs;
|
||||
g_tls_bend[class_idx] = base + (size_t)chunk * bs;
|
||||
g_tls_bend[class_idx] = start + (size_t)chunk * bs;
|
||||
return (void*)start;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user