TLS SLL triage: add class mask gating (HAKMEM_TINY_SLL_C03_ONLY / HAKMEM_TINY_SLL_MASK), honor mask in inline POP/PUSH and tls_sll_box; SLL-off path stable. This gates SLL to C0..C3 for now to unblock shared SS triage.

This commit is contained in:
Moe Charm (CI)
2025-11-14 01:05:30 +09:00
parent fcf098857a
commit 3b05d0f048
4 changed files with 39 additions and 4 deletions

View File

@ -36,6 +36,7 @@
// External TLS SLL state (defined in hakmem_tiny.c or equivalent) // External TLS SLL state (defined in hakmem_tiny.c or equivalent)
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES]; extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
extern int g_tls_sll_class_mask; // bit i=1 → SLL allowed for class i
// ========== Debug guard ========== // ========== Debug guard ==========
@ -74,6 +75,11 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
{ {
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push"); HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
// Class mask gate (narrow triage): if disallowed, reject push
if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
return false;
}
// Capacity semantics: // Capacity semantics:
// - capacity == 0 → disabled (reject) // - capacity == 0 → disabled (reject)
// - capacity > 1<<20 → treat as "unbounded" sentinel (no limit) // - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
@ -160,6 +166,10 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
static inline bool tls_sll_pop(int class_idx, void** out) static inline bool tls_sll_pop(int class_idx, void** out)
{ {
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop"); HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
// Class mask gate: if disallowed, behave as empty
if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
return false;
}
atomic_fetch_add(&g_integrity_check_class_bounds, 1); atomic_fetch_add(&g_integrity_check_class_bounds, 1);
void* base = g_tls_sll_head[class_idx]; void* base = g_tls_sll_head[class_idx];

View File

@ -1219,6 +1219,8 @@ static __thread int g_tls_pt_inited;
// Frontend FastCache hit/miss counters (Small diagnostics) // Frontend FastCache hit/miss counters (Small diagnostics)
unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0}; unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0}; unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
// TLS SLL class mask: bit i = 1 allows SLL for class i. Default: all 8 classes enabled.
int g_tls_sll_class_mask = 0xFF;
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c) // Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) { inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {

View File

@ -187,10 +187,24 @@ void hak_tiny_init(void) {
g_tls_list_enable = (atoi(tlslist_env) != 0) ? 1 : 0; g_tls_list_enable = (atoi(tlslist_env) != 0) ? 1 : 0;
} }
} }
// Phase 9.4: TLS SLL toggle (default ON) // Phase 9.4: TLS SLL toggle (default ON) + class mask
char* sll_env = getenv("HAKMEM_TINY_TLS_SLL"); {
if (sll_env && atoi(sll_env) == 0) { char* sll_env = getenv("HAKMEM_TINY_TLS_SLL");
g_tls_sll_enable = 0; if (sll_env && atoi(sll_env) == 0) {
g_tls_sll_enable = 0;
}
// Restrict SLL usage to selected classes
extern int g_tls_sll_class_mask;
g_tls_sll_class_mask = 0xFF; // default all classes (0..7)
char* c03 = getenv("HAKMEM_TINY_SLL_C03_ONLY");
if (c03 && atoi(c03) != 0) {
g_tls_sll_class_mask = 0x0F; // classes 0..3 only
}
char* msk = getenv("HAKMEM_TINY_SLL_MASK");
if (msk && *msk) {
int v = (int)strtol(msk, NULL, 0);
if (v >= 0 && v <= 0xFF) g_tls_sll_class_mask = v;
}
} }
// Path debug enabled? // Path debug enabled?
{ {

View File

@ -44,6 +44,11 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// Result: 5-10 fewer instructions, better register allocation // Result: 5-10 fewer instructions, better register allocation
// //
#define TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr_out) do { \ #define TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr_out) do { \
extern int g_tls_sll_class_mask; \
if (__builtin_expect(((g_tls_sll_class_mask & (1u << (class_idx))) == 0), 0)) { \
(ptr_out) = NULL; \
break; \
} \
void* _head = g_tls_sll_head[(class_idx)]; \ void* _head = g_tls_sll_head[(class_idx)]; \
if (__builtin_expect(_head != NULL, 1)) { \ if (__builtin_expect(_head != NULL, 1)) { \
if (__builtin_expect((uintptr_t)_head == TINY_REMOTE_SENTINEL, 0)) { \ if (__builtin_expect((uintptr_t)_head == TINY_REMOTE_SENTINEL, 0)) { \
@ -88,6 +93,10 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// byte 0 for HEADER_MAGIC. Without restoration, it finds 0x00 → uses wrong offset → SEGV. // byte 0 for HEADER_MAGIC. Without restoration, it finds 0x00 → uses wrong offset → SEGV.
// COST: 1 byte write (~1-2 cycles per free, negligible). // COST: 1 byte write (~1-2 cycles per free, negligible).
#define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \ #define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \
extern int g_tls_sll_class_mask; \
if (__builtin_expect(((g_tls_sll_class_mask & (1u << (class_idx))) == 0), 0)) { \
break; \
} \
if (!(ptr)) break; \ if (!(ptr)) break; \
/* Phase E1-CORRECT: API ptr is USER pointer (= base+1). Convert back to BASE. */ \ /* Phase E1-CORRECT: API ptr is USER pointer (= base+1). Convert back to BASE. */ \
uint8_t* _base = (uint8_t*)(ptr) - 1; \ uint8_t* _base = (uint8_t*)(ptr) - 1; \