feat: Add ACE allocation failure tracing and debug hooks
This commit introduces a comprehensive tracing mechanism for allocation failures within the Adaptive Cache Engine (ACE) component. This feature allows for precise identification of the root cause for Out-Of-Memory (OOM) issues related to ACE allocations. Key changes include: - **ACE Tracing Implementation**: - Added environment variable to enable/disable detailed logging of allocation failures. - Instrumented , , and to distinguish between "Threshold" (size class mismatch), "Exhaustion" (pool depletion), and "MapFail" (OS memory allocation failure). - **Build System Fixes**: - Corrected to ensure is properly linked into , resolving an error. - **LD_PRELOAD Wrapper Adjustments**: - Investigated and understood the wrapper's behavior under , particularly its interaction with and checks. - Enabled debugging flags for environment to prevent unintended fallbacks to 's for non-tiny allocations, allowing comprehensive testing of the allocator. - **Debugging & Verification**: - Introduced temporary verbose logging to pinpoint execution flow issues within interception and routing. These temporary logs have been removed. - Created to facilitate testing of the tracing features. This feature will significantly aid in diagnosing and resolving allocation-related OOM issues in by providing clear insights into the failure pathways.
This commit is contained in:
@ -1,5 +1,6 @@
|
||||
#include <stdio.h>
|
||||
#include "hakmem_internal.h"
|
||||
#include "hakmem_config.h"
|
||||
#include "hakmem_ace.h"
|
||||
#include "hakmem_pool.h"
|
||||
#include "hakmem_l25_pool.h"
|
||||
@ -81,6 +82,13 @@ void* hkm_ace_alloc(size_t size, uintptr_t site_id, const FrozenPolicy* pol) {
|
||||
HKM_TIME_END(HKM_CAT_POOL_GET, t_mid_get);
|
||||
hkm_ace_stat_mid_attempt(p != NULL);
|
||||
if (p) return p;
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Exhaustion: size=%zu class=%zu (MidPool)\n", size, r);
|
||||
}
|
||||
} else {
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Threshold: size=%zu wmax=%.2f (MidPool)\n", size, wmax_mid);
|
||||
}
|
||||
}
|
||||
// If rounding not allowed or miss, fallthrough to large class rounding below
|
||||
}
|
||||
@ -94,6 +102,13 @@ void* hkm_ace_alloc(size_t size, uintptr_t site_id, const FrozenPolicy* pol) {
|
||||
HKM_TIME_END(HKM_CAT_L25_GET, t_l25_get);
|
||||
hkm_ace_stat_large_attempt(p != NULL);
|
||||
if (p) return p;
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Exhaustion: size=%zu class=%zu (LargePool)\n", size, r);
|
||||
}
|
||||
} else {
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Threshold: size=%zu wmax=%.2f (LargePool)\n", size, wmax_large);
|
||||
}
|
||||
}
|
||||
} else if (size > POOL_MAX_SIZE && size < L25_MIN_SIZE) {
|
||||
// Gap 32–64KiB: try rounding up to 64KiB if permitted
|
||||
@ -104,6 +119,13 @@ void* hkm_ace_alloc(size_t size, uintptr_t site_id, const FrozenPolicy* pol) {
|
||||
HKM_TIME_END(HKM_CAT_L25_GET, t_l25_get2);
|
||||
hkm_ace_stat_large_attempt(p != NULL);
|
||||
if (p) return p;
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Exhaustion: size=%zu class=64KB (Gap)\n", size);
|
||||
}
|
||||
} else {
|
||||
if (g_hakem_config.ace_trace) {
|
||||
fprintf(stderr, "[ACE-FAIL] Threshold: size=%zu wmax=%.2f (Gap)\n", size, wmax_large);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user