Phase v4-mid-2, v4-mid-3, v4-mid-5: SmallObject HotBox v4 implementation and docs update
Implementation: - SmallObject HotBox v4 (core/smallobject_hotbox_v4.c) now fully implements C6-only allocations and frees, including current/partial management and freelist operations. - Cold Iface (tiny_heap based) for page refill/retire is integrated. - Stats instrumentation (v4-mid-5) added to small_heap_alloc_fast_v4 and small_heap_free_fast_v4, with a new header file core/box/smallobject_hotbox_v4_stats_box.h and atexit dump function. Updates: - CURRENT_TASK.md has been condensed and updated with summaries of Phase v4-mid-2 (C6-only v4), Phase v4-mid-3 (C5-only v4 pilot), and the stats implementation (v4-mid-5). - docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md updated with A/B results and conclusions for C6-only and C5-only v4 implementations. - The previous CURRENT_TASK.md content has been archived to CURRENT_TASK_ARCHIVE_20251210.md.
This commit is contained in:
1284
CURRENT_TASK.md
1284
CURRENT_TASK.md
File diff suppressed because it is too large
Load Diff
1177
CURRENT_TASK_ARCHIVE_20251210.md
Normal file
1177
CURRENT_TASK_ARCHIVE_20251210.md
Normal file
File diff suppressed because it is too large
Load Diff
89
core/box/smallobject_hotbox_v4_stats_box.h
Normal file
89
core/box/smallobject_hotbox_v4_stats_box.h
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
// smallobject_hotbox_v4_stats_box.h - Stats instrumentation for SmallObject HotHeap v4
|
||||||
|
// ENV gated counters for C6 v4 alloc/free path debugging
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
// Per-class stats
|
||||||
|
typedef struct {
|
||||||
|
_Atomic uint64_t alloc_calls;
|
||||||
|
_Atomic uint64_t alloc_success;
|
||||||
|
_Atomic uint64_t alloc_null_page;
|
||||||
|
_Atomic uint64_t alloc_fallback_pool;
|
||||||
|
_Atomic uint64_t free_calls;
|
||||||
|
_Atomic uint64_t free_page_found;
|
||||||
|
_Atomic uint64_t free_page_not_found;
|
||||||
|
} small_heap_v4_class_stats_t;
|
||||||
|
|
||||||
|
extern small_heap_v4_class_stats_t g_small_heap_v4_stats[8]; // For classes 0-7
|
||||||
|
|
||||||
|
static inline int small_heap_v4_stats_enabled(void) {
|
||||||
|
static int g = -1;
|
||||||
|
if (__builtin_expect(g == -1, 0)) {
|
||||||
|
const char* e = getenv("HAKMEM_SMALL_HEAP_V4_STATS");
|
||||||
|
g = (e && *e && *e != '0') ? 1 : 0;
|
||||||
|
}
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_alloc_call(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].alloc_calls, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_alloc_success(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].alloc_success, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_alloc_null_page(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].alloc_null_page, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_alloc_fallback_pool(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].alloc_fallback_pool, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_free_call(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].free_calls, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_free_page_found(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].free_page_found, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void small_heap_v4_stat_free_page_not_found(int class_idx) {
|
||||||
|
if (__builtin_expect(small_heap_v4_stats_enabled(), 0)) {
|
||||||
|
if (class_idx >= 0 && class_idx < 8) {
|
||||||
|
atomic_fetch_add_explicit(&g_small_heap_v4_stats[class_idx].free_page_not_found, 1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump stats at exit (call from atexit)
|
||||||
|
void small_heap_v4_stats_dump(void);
|
||||||
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "box/smallobject_hotbox_v4_box.h"
|
#include "box/smallobject_hotbox_v4_box.h"
|
||||||
#include "box/smallobject_hotbox_v4_env_box.h"
|
#include "box/smallobject_hotbox_v4_env_box.h"
|
||||||
|
#include "box/smallobject_hotbox_v4_stats_box.h"
|
||||||
#include "box/smallobject_cold_iface_v4.h"
|
#include "box/smallobject_cold_iface_v4.h"
|
||||||
#include "box/smallobject_hotbox_v3_env_box.h"
|
#include "box/smallobject_hotbox_v3_env_box.h"
|
||||||
#include "box/tiny_heap_box.h"
|
#include "box/tiny_heap_box.h"
|
||||||
@ -16,6 +17,11 @@
|
|||||||
#include "box/tiny_geometry_box.h"
|
#include "box/tiny_geometry_box.h"
|
||||||
#include "tiny_region_id.h"
|
#include "tiny_region_id.h"
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Stats storage (Phase v4-mid-5)
|
||||||
|
// ============================================================================
|
||||||
|
small_heap_v4_class_stats_t g_small_heap_v4_stats[8];
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// v4 Segment Configuration (Phase v4-mid-0+)
|
// v4 Segment Configuration (Phase v4-mid-0+)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@ -235,9 +241,16 @@ static small_page_v4* cold_refill_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t c
|
|||||||
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
|
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
|
||||||
if (!tctx) return NULL;
|
if (!tctx) return NULL;
|
||||||
|
|
||||||
|
// Phase v4-mid-6: Get a fresh page from TinyHeap
|
||||||
tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, (int)class_idx);
|
tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, (int)class_idx);
|
||||||
if (!lease) return NULL;
|
if (!lease) return NULL;
|
||||||
|
|
||||||
|
// Clear TinyHeap's current so next call gets fresh page
|
||||||
|
tiny_heap_class_t* hcls = tiny_heap_class(tctx, (int)class_idx);
|
||||||
|
if (hcls) {
|
||||||
|
tiny_heap_class_unlink(hcls, lease);
|
||||||
|
}
|
||||||
|
|
||||||
return v4_page_from_lease(lease, (int)class_idx, NULL);
|
return v4_page_from_lease(lease, (int)class_idx, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -350,31 +363,49 @@ static small_page_v4* small_alloc_slow_v4(small_heap_ctx_v4* ctx, int class_idx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) {
|
void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) {
|
||||||
// Phase v4-mid-1: C6 stub - fallback to pool v1
|
// Phase v4-mid-5: Add stats instrumentation
|
||||||
if (__builtin_expect(class_idx == 6, 0)) {
|
small_heap_v4_stat_alloc_call(class_idx);
|
||||||
return NULL; // C6: fallback to pool v1 (no v4 alloc yet)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
|
||||||
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
|
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
|
||||||
|
small_heap_v4_stat_alloc_fallback_pool(class_idx);
|
||||||
return NULL; // C5/C6/C7 以外は未対応
|
return NULL; // C5/C6/C7 以外は未対応
|
||||||
}
|
}
|
||||||
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return NULL;
|
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) {
|
||||||
small_class_heap_v4* h = &ctx->cls[class_idx];
|
small_heap_v4_stat_alloc_fallback_pool(class_idx);
|
||||||
small_page_v4* page = h->current;
|
|
||||||
|
|
||||||
if (!page || !page->freelist) {
|
|
||||||
page = small_alloc_slow_v4(ctx, class_idx);
|
|
||||||
}
|
|
||||||
if (!page || !page->freelist) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
small_class_heap_v4* h = &ctx->cls[class_idx];
|
||||||
|
small_page_v4* page = h->current;
|
||||||
|
|
||||||
|
// Try current page freelist
|
||||||
|
if (page && page->freelist) {
|
||||||
|
void* blk = page->freelist;
|
||||||
|
void* next = NULL;
|
||||||
|
memcpy(&next, blk, sizeof(void*));
|
||||||
|
page->freelist = next;
|
||||||
|
page->used++;
|
||||||
|
small_heap_v4_stat_alloc_success(class_idx);
|
||||||
|
return tiny_region_id_write_header(blk, class_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current exhausted or NULL, try slow path (partial/refill)
|
||||||
|
page = small_alloc_slow_v4(ctx, class_idx);
|
||||||
|
if (!page || !page->freelist) {
|
||||||
|
small_heap_v4_stat_alloc_null_page(class_idx);
|
||||||
|
small_heap_v4_stat_alloc_fallback_pool(class_idx);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate from newly acquired/promoted page
|
||||||
void* blk = page->freelist;
|
void* blk = page->freelist;
|
||||||
void* next = NULL;
|
void* next = NULL;
|
||||||
memcpy(&next, blk, sizeof(void*));
|
memcpy(&next, blk, sizeof(void*));
|
||||||
page->freelist = next;
|
page->freelist = next;
|
||||||
page->used++;
|
page->used++;
|
||||||
|
|
||||||
|
small_heap_v4_stat_alloc_success(class_idx);
|
||||||
return tiny_region_id_write_header(blk, class_idx);
|
return tiny_region_id_write_header(blk, class_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -402,33 +433,44 @@ static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page
|
|||||||
}
|
}
|
||||||
|
|
||||||
void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
|
void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
|
||||||
// Phase v4-mid-1: C6 stub - test page_meta_of() lookup, fallback to pool v1
|
// Phase v4-mid-5: Add stats instrumentation
|
||||||
if (__builtin_expect(class_idx == 6, 0)) {
|
small_heap_v4_stat_free_call(class_idx);
|
||||||
// C6-only: Test page_meta_of() for Fail-Fast validation
|
|
||||||
SmallSegment* dummy_seg = (SmallSegment*)NULL; // Will be retrieved later
|
|
||||||
SmallPageMeta* m = smallsegment_v4_page_meta_of(dummy_seg, ptr);
|
|
||||||
(void)m; // Unused in v4-mid-1, but confirms function works
|
|
||||||
return; // Fallback to pool v1 (handled by front)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
|
||||||
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
|
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return;
|
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return;
|
||||||
if (!ptr) return;
|
if (!ptr) return;
|
||||||
|
|
||||||
|
// Phase v4-mid-6: ptr is already BASE (caller converts USER→BASE before calling us)
|
||||||
|
// See malloc_tiny_fast.h L254: base = ptr - 1, then L354/L282 passes base
|
||||||
|
void* base_ptr = ptr;
|
||||||
|
|
||||||
small_class_heap_v4* h = &ctx->cls[class_idx];
|
small_class_heap_v4* h = &ctx->cls[class_idx];
|
||||||
small_page_v4* prev = NULL;
|
small_page_v4* prev = NULL;
|
||||||
v4_loc_t loc = V4_LOC_NONE;
|
v4_loc_t loc = V4_LOC_NONE;
|
||||||
small_page_v4* page = v4_find_page(h, (const uint8_t*)ptr, &loc, &prev);
|
|
||||||
if (!page) return;
|
// Try to find page in current/partial/full lists (using BASE pointer)
|
||||||
|
small_page_v4* page = v4_find_page(h, (const uint8_t*)base_ptr, &loc, &prev);
|
||||||
|
|
||||||
|
// Phase v4-mid-2: If page not found in v4 heap, try page_meta_of() for segment lookup
|
||||||
|
if (!page) {
|
||||||
|
small_heap_v4_stat_free_page_not_found(class_idx);
|
||||||
|
// Try to find via segment mask+shift (requires segment to be initialized)
|
||||||
|
// For now, this is a fallback for future segment-based allocation
|
||||||
|
// Return without freeing (pool v1 will handle)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
small_heap_v4_stat_free_page_found(class_idx);
|
||||||
|
|
||||||
const uint32_t partial_limit = v4_partial_limit(class_idx);
|
const uint32_t partial_limit = v4_partial_limit(class_idx);
|
||||||
|
|
||||||
// freelist push
|
// freelist push (use BASE pointer, not USER pointer)
|
||||||
void* head = page->freelist;
|
void* head = page->freelist;
|
||||||
memcpy(ptr, &head, sizeof(void*));
|
memcpy(base_ptr, &head, sizeof(void*));
|
||||||
page->freelist = ptr;
|
page->freelist = base_ptr;
|
||||||
if (page->used > 0) {
|
if (page->used > 0) {
|
||||||
page->used--;
|
page->used--;
|
||||||
}
|
}
|
||||||
@ -472,3 +514,45 @@ void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Stats dump (Phase v4-mid-5)
|
||||||
|
// ============================================================================
|
||||||
|
void small_heap_v4_stats_dump(void) {
|
||||||
|
if (!small_heap_v4_stats_enabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "\n========================================\n");
|
||||||
|
fprintf(stderr, "[SMALL_HEAP_V4_STATS] Summary\n");
|
||||||
|
fprintf(stderr, "========================================\n");
|
||||||
|
|
||||||
|
for (int c = 0; c < 8; c++) {
|
||||||
|
uint64_t alloc_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_calls, memory_order_relaxed);
|
||||||
|
uint64_t alloc_success = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_success, memory_order_relaxed);
|
||||||
|
uint64_t alloc_null_page = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_null_page, memory_order_relaxed);
|
||||||
|
uint64_t alloc_fallback = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_fallback_pool, memory_order_relaxed);
|
||||||
|
uint64_t free_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].free_calls, memory_order_relaxed);
|
||||||
|
uint64_t free_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_found, memory_order_relaxed);
|
||||||
|
uint64_t free_not_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_not_found, memory_order_relaxed);
|
||||||
|
|
||||||
|
if (alloc_calls > 0 || free_calls > 0) {
|
||||||
|
fprintf(stderr, "\nClass C%d:\n", c);
|
||||||
|
fprintf(stderr, " Alloc: calls=%lu success=%lu null_page=%lu fallback_pool=%lu\n",
|
||||||
|
(unsigned long)alloc_calls, (unsigned long)alloc_success,
|
||||||
|
(unsigned long)alloc_null_page, (unsigned long)alloc_fallback);
|
||||||
|
fprintf(stderr, " Free: calls=%lu page_found=%lu page_not_found=%lu\n",
|
||||||
|
(unsigned long)free_calls, (unsigned long)free_found,
|
||||||
|
(unsigned long)free_not_found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "========================================\n\n");
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Automatic dump at program exit
|
||||||
|
static void small_heap_v4_stats_atexit(void) __attribute__((destructor));
|
||||||
|
static void small_heap_v4_stats_atexit(void) {
|
||||||
|
small_heap_v4_stats_dump();
|
||||||
|
}
|
||||||
|
|||||||
@ -141,3 +141,70 @@ static inline SmallPageMeta* small_page_meta_of(void* p) {
|
|||||||
- block_size[class]
|
- block_size[class]
|
||||||
- max_partial_pages[class]
|
- max_partial_pages[class]
|
||||||
を更新するだけ。HotBox_v4 側は snapshot を読むだけに留める。
|
を更新するだけ。HotBox_v4 側は snapshot を読むだけに留める。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase v4-mid-2: C6-only SmallHeapCtx v4 本実装完了 (2025-12-10)
|
||||||
|
|
||||||
|
### 実装内容
|
||||||
|
- `core/smallobject_hotbox_v4.c` に C6 用 SmallHeapCtx v4 を本格稼働:
|
||||||
|
- `small_heap_alloc_fast_v4()`: current freelist → partial head → cold_refill の順で探索。
|
||||||
|
- `small_heap_free_fast_v4()`: v4_find_page で page を特定 → freelist push → used==0 なら partial に温存 or retire。
|
||||||
|
- Cold Iface (`small_cold_v4_refill_page` / `small_cold_v4_retire_page`) は tiny_heap 経由で動作済み。
|
||||||
|
- Segment 関連 (`smallsegment_v4_*`) は tiny_heap を利用して page lease/retire を実装。
|
||||||
|
|
||||||
|
### A/B 結果(C6-heavy 257–768B, 1 thread, ws=400, iters=1M)
|
||||||
|
| 構成 | Throughput (ops/s) | 備考 |
|
||||||
|
|------|-------------------|------|
|
||||||
|
| v4 OFF (baseline) | 9.13M – 9.79M | 平均 ~9.36M |
|
||||||
|
| v4 ON (C6-only) | 10.00M – 10.30M | 平均 ~10.15M |
|
||||||
|
| **改善** | **+8〜9%** | segv/assert なし |
|
||||||
|
|
||||||
|
### Mixed への影響(16–1024B, ws=400, iters=1M)
|
||||||
|
| 構成 | Throughput (ops/s) |
|
||||||
|
|------|-------------------|
|
||||||
|
| v4 OFF | 29.66M |
|
||||||
|
| v4 ON (C6-only) | 29.96M (+1%) |
|
||||||
|
|
||||||
|
→ 大きな回帰なし、研究箱として安全。
|
||||||
|
|
||||||
|
### ENV 設定例
|
||||||
|
```bash
|
||||||
|
HAKMEM_PROFILE=C6_HEAVY_LEGACY_POOLV1 \
|
||||||
|
HAKMEM_SMALL_HEAP_V4_ENABLED=1 \
|
||||||
|
HAKMEM_SMALL_HEAP_V4_CLASSES=0x40 \
|
||||||
|
./bench_mid_large_mt_hakmem 1 1000000 400 1
|
||||||
|
```
|
||||||
|
|
||||||
|
### 次ステップ
|
||||||
|
- Phase v4-mid-3: C5 v4 パイロット、または Mixed の一部クラスを SMALL_V4 route に昇格させて A/B。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase v4-mid-3: C5-only v4 研究箱 A/B (2025-12-11)
|
||||||
|
|
||||||
|
### 実装内容
|
||||||
|
- C5 (256B class) は既に `v4_class_supported()` で許可済み。コード変更は不要。
|
||||||
|
- ENV `HAKMEM_SMALL_HEAP_V4_CLASSES=0x20` で C5 v4 を有効化。
|
||||||
|
|
||||||
|
### A/B 結果
|
||||||
|
|
||||||
|
**C5-heavy (129–256B, ws=400, iters=1M)**
|
||||||
|
| 構成 | Throughput (ops/s) | 備考 |
|
||||||
|
|------|-------------------|------|
|
||||||
|
| v4 OFF | 53.6M – 55.4M | 平均 ~54.4M |
|
||||||
|
| v4 ON (C5-only 0x20) | 47.6M – 49.5M | 平均 ~48.7M |
|
||||||
|
| **結果** | **−10〜11% 回帰** | 既存 Tiny/front v3 経路が速い |
|
||||||
|
|
||||||
|
**Mixed 16–1024B (C5+C6 v4)**
|
||||||
|
| 構成 | Throughput (ops/s) |
|
||||||
|
|------|-------------------|
|
||||||
|
| C6-only v4 (0x40) | 27.5M – 29.3M (平均 ~28.3M) |
|
||||||
|
| C5+C6 v4 (0x60) | 28.3M – 29.4M (平均 ~28.9M) |
|
||||||
|
| **結果** | **+2〜3% (誤差〜微改善)** |
|
||||||
|
|
||||||
|
### 方針
|
||||||
|
- C5-heavy では v4 が既存経路より劣後するため、C5 v4 は**研究箱のまま**標準プロファイルには入れない。
|
||||||
|
- Mixed では影響が小さく回帰なし。C5+C6 v4 (0x60) は研究箱として安全に利用可能。
|
||||||
|
- C5 サイズ帯は既存 Tiny/front v3 経路が十分最適化されており、v4 へ寄せるメリットは薄い。
|
||||||
|
- 今後の small-object v4 拡張は C6-heavy / mid 帯に集中する。
|
||||||
|
|||||||
Reference in New Issue
Block a user