Files
hakmem/core/hakmem_tiny_ace_guard_box.inc
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

102 lines
4.0 KiB
C++

// ============================================================================
// ACE Learning Layer: Runtime parameter setters
// ============================================================================
void hkm_ace_set_drain_threshold(int class_idx, uint32_t threshold) {
// Validate inputs
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
if (threshold < 16 || threshold > 2048) {
return;
}
// Set per-class threshold (used by remote free drain logic)
g_remote_drain_thresh_per_class[class_idx] = (int)threshold;
}
#include "tiny_fc_api.h"
int tiny_fc_room(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return 0;
TinyFastCache* fc = &g_fast_cache[class_idx];
// Effective per-class cap comes from g_fast_cap (env-tunable),
// clamped by the static storage capacity TINY_FASTCACHE_CAP.
uint16_t eff_cap = g_fast_cap[class_idx];
if (eff_cap > TINY_FASTCACHE_CAP) eff_cap = TINY_FASTCACHE_CAP;
int room = (int)eff_cap - fc->top;
return room > 0 ? room : 0;
}
int tiny_fc_push_bulk(int class_idx, void** arr, int n) {
if (!arr || n <= 0) return 0;
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return 0;
TinyFastCache* fc = &g_fast_cache[class_idx];
uint16_t eff_cap = g_fast_cap[class_idx];
if (eff_cap > TINY_FASTCACHE_CAP) eff_cap = TINY_FASTCACHE_CAP;
int room = (int)eff_cap - fc->top;
if (room <= 0) return 0;
int take = n < room ? n : room;
// Forward fill with light unrolling to reduce branch overhead
int i = 0;
for (; i + 3 < take; i += 4) {
fc->items[fc->top++] = arr[i];
fc->items[fc->top++] = arr[i + 1];
fc->items[fc->top++] = arr[i + 2];
fc->items[fc->top++] = arr[i + 3];
}
for (; i < take; i++) {
fc->items[fc->top++] = arr[i];
}
return take;
}
// ========= Tiny Guard (targeted debug; low overhead when disabled) =========
static int g_tiny_guard_enabled = -1;
static int g_tiny_guard_class = 2;
static int g_tiny_guard_limit = 8;
static __thread int g_tiny_guard_seen = 0;
static inline int tiny_guard_enabled_runtime(void) {
if (__builtin_expect(g_tiny_guard_enabled == -1, 0)) {
// Enabled via HAKMEM_DEBUG_LEVEL >= 4 (DEBUG level)
// Legacy: HAKMEM_TINY_GUARD=1 still works for compatibility
g_tiny_guard_enabled = hak_debug_check_level("HAKMEM_TINY_GUARD", 4);
// Special-purpose parameters (kept for targeted debugging)
const char* ec = getenv("HAKMEM_TINY_GUARD_CLASS");
if (ec && *ec) g_tiny_guard_class = atoi(ec);
const char* el = getenv("HAKMEM_TINY_GUARD_MAX");
if (el && *el) g_tiny_guard_limit = atoi(el);
if (g_tiny_guard_limit <= 0) g_tiny_guard_limit = 8;
}
return g_tiny_guard_enabled;
}
int tiny_guard_is_enabled(void) { return tiny_guard_enabled_runtime(); }
static void tiny_guard_dump_bytes(const char* tag, const uint8_t* p, size_t n) {
fprintf(stderr, "[TGUARD] %s:", tag);
for (size_t i = 0; i < n; i++) fprintf(stderr, " %02x", p[i]);
fprintf(stderr, "\n");
}
void tiny_guard_on_alloc(int cls, void* base, void* user, size_t stride) {
if (!tiny_guard_enabled_runtime() || cls != g_tiny_guard_class) return;
if (g_tiny_guard_seen++ >= g_tiny_guard_limit) return;
uint8_t* b = (uint8_t*)base;
fprintf(stderr, "[TGUARD] alloc cls=%d base=%p user=%p stride=%zu hdr=%02x\n",
cls, base, user, stride, b[0]);
// 隣接ヘッダ可視化(前後)
tiny_guard_dump_bytes("around_base", b, (stride >= 8 ? 8 : stride));
tiny_guard_dump_bytes("next_header", b + stride, 4);
}
void tiny_guard_on_invalid(void* user_ptr, uint8_t hdr) {
if (!tiny_guard_enabled_runtime()) return;
if (g_tiny_guard_seen++ >= g_tiny_guard_limit) return;
uint8_t* u = (uint8_t*)user_ptr;
fprintf(stderr, "[TGUARD] invalid header at user=%p hdr=%02x prev=%02x next=%02x\n",
user_ptr, hdr, *(u - 2), *(u));
tiny_guard_dump_bytes("dump_before", u - 8, 8);
tiny_guard_dump_bytes("dump_after", u, 8);
}