Files
hakmem/core/tiny_c7_ultra_segment.c
Moe Charm (CI) fc1c47043c Phase PERF-ULTRA-REFILL-OPT-1a/1b: C7 ULTRA refill パス最適化
実装内容:
- Phase 1a: Page size macro化
  - TINY_C7_ULTRA_PAGE_SHIFT (16) を定義
  - tiny_c7_ultra_page_of で division → bit shift に変更
  - refill/free での seg_end 計算を multiplication → bit shift に最適化

- Phase 1b: Segment learning を移動
  - segment learning を free初回 → alloc refill時に移動
  - free側での unlikely segment_from_ptr call を削除
  - normal pattern (alloc → free) での segment既学習を前提

ベンチマーク結果(Mixed 16-1024B, 1M iter, ws=400):
  - Baseline: 39.5M ops/s
  - Phase 1a: 39.5M ops/s (誤差範囲)
  - Phase 1b: 42.3M ops/s
  - 最終平均: 43.9M ops/s (+11.1% = +4.4M ops/s)

tiny_c7_ultra_page_of は計測では同じ値だが、実際には以下が改善:
- division コスト削減(数cycle/call)
- free時のsegment learning削除(per-thread 1回削減)
- refill での計算簡素化

これにより全体の refill パス最適化が達成できました。
2025-12-11 22:16:07 +09:00

109 lines
3.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_c7_ultra_segment.c - C7 ULTRA 専用セグメント管理UF-3
#include "box/tiny_c7_ultra_segment_box.h"
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
// 2MiB セグメントを 64KiB ページに分割C7 専用、pow2 で mask しやすく)
#define TINY_C7_ULTRA_SEG_SIZE ((size_t)(2 * 1024 * 1024))
#define TINY_C7_ULTRA_PAGE_SIZE ((size_t)(64 * 1024))
#define TINY_C7_ULTRA_PAGE_SHIFT 16 // 64KiB = 2^16 (for O(1) bit shift instead of division)
static __thread tiny_c7_ultra_segment_t* g_ultra_seg;
static inline void tiny_c7_ultra_segment_clear(tiny_c7_ultra_segment_t* seg) {
if (!seg) return;
seg->base = NULL;
seg->seg_size = 0;
seg->page_size = 0;
seg->num_pages = 0;
seg->pages = NULL;
}
tiny_c7_ultra_segment_t* tiny_c7_ultra_segment_acquire(void) {
if (g_ultra_seg) {
return g_ultra_seg;
}
tiny_c7_ultra_segment_t* seg =
(tiny_c7_ultra_segment_t*)calloc(1, sizeof(tiny_c7_ultra_segment_t));
if (!seg) {
return NULL;
}
seg->seg_size = TINY_C7_ULTRA_SEG_SIZE;
seg->page_size = TINY_C7_ULTRA_PAGE_SIZE;
seg->num_pages = (uint32_t)(seg->seg_size / seg->page_size);
seg->pages = (tiny_c7_ultra_page_meta_t*)calloc(seg->num_pages,
sizeof(tiny_c7_ultra_page_meta_t));
if (!seg->pages) {
free(seg);
return NULL;
}
void* base = mmap(NULL, seg->seg_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (base == MAP_FAILED) {
int saved = errno;
free(seg->pages);
free(seg);
(void)saved;
return NULL;
}
seg->base = base;
g_ultra_seg = seg;
return seg;
}
void tiny_c7_ultra_segment_release(tiny_c7_ultra_segment_t* seg) {
if (!seg) return;
if (seg->base && seg->seg_size) {
munmap(seg->base, seg->seg_size);
}
free(seg->pages);
if (seg == g_ultra_seg) {
g_ultra_seg = NULL;
}
free(seg);
}
tiny_c7_ultra_segment_t* tiny_c7_ultra_segment_from_ptr(void* p) {
tiny_c7_ultra_segment_t* seg = g_ultra_seg;
if (!seg || !seg->base || seg->seg_size == 0) return NULL;
uintptr_t base = (uintptr_t)seg->base;
uintptr_t addr = (uintptr_t)p;
if (addr < base || addr >= base + seg->seg_size) {
return NULL;
}
return seg;
}
tiny_c7_ultra_page_meta_t* tiny_c7_ultra_page_of(void* p,
tiny_c7_ultra_segment_t** out_seg,
uint32_t* out_page_idx) {
tiny_c7_ultra_segment_t* seg = tiny_c7_ultra_segment_from_ptr(p);
if (!seg) {
return NULL;
}
uintptr_t base = (uintptr_t)seg->base;
uintptr_t addr = (uintptr_t)p;
size_t offset = (size_t)(addr - base);
// Phase PERF-ULTRA-REFILL-OPT-1a: Replace division with bit shift for O(1) lookup
uint32_t idx = (uint32_t)(offset >> TINY_C7_ULTRA_PAGE_SHIFT);
if (idx >= seg->num_pages) {
return NULL;
}
if (out_seg) {
*out_seg = seg;
}
if (out_page_idx) {
*out_page_idx = idx;
}
return &seg->pages[idx];
}