144 lines
5.0 KiB
C
144 lines
5.0 KiB
C
|
|
#include "super_reg_box.h"
|
|||
|
|
|
|||
|
|
#include <stdatomic.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
#include <stdlib.h>
|
|||
|
|
#include <string.h>
|
|||
|
|
|
|||
|
|
#include "hakmem_super_registry.h"
|
|||
|
|
|
|||
|
|
// プロファイル別の実容量・論理上限
|
|||
|
|
static _Atomic int g_super_reg_effective_size = SUPER_REG_SIZE;
|
|||
|
|
static _Atomic int g_super_reg_effective_mask = SUPER_REG_MASK;
|
|||
|
|
static _Atomic int g_super_reg_effective_per_class = SUPER_REG_PER_CLASS;
|
|||
|
|
static _Atomic int g_super_reg_profile_inited = 0;
|
|||
|
|
|
|||
|
|
// 動的に確保する実配列
|
|||
|
|
static SuperRegEntry* g_super_reg_entries = NULL;
|
|||
|
|
static SuperSlab** g_super_reg_by_class_slots = NULL;
|
|||
|
|
static int g_super_reg_by_class_stride = SUPER_REG_PER_CLASS;
|
|||
|
|
static _Atomic int g_super_reg_allocated = 0;
|
|||
|
|
|
|||
|
|
static inline int super_reg_clamp_power_of_two(int requested, int fallback) {
|
|||
|
|
// SUPER_REG_SIZE は 2 のべき乗なので、requested もそれ未満のべき乗に丸める。
|
|||
|
|
if (requested <= 0 || requested > SUPER_REG_SIZE) {
|
|||
|
|
return fallback;
|
|||
|
|
}
|
|||
|
|
// 丸め: 最上位ビットだけを残す(2 のべき乗に丸め下げ)
|
|||
|
|
int v = requested;
|
|||
|
|
v |= v >> 1;
|
|||
|
|
v |= v >> 2;
|
|||
|
|
v |= v >> 4;
|
|||
|
|
v |= v >> 8;
|
|||
|
|
v |= v >> 16;
|
|||
|
|
v = v - (v >> 1);
|
|||
|
|
// 有効値は最低でも 1024 にしておく
|
|||
|
|
if (v < 1024) {
|
|||
|
|
v = 1024;
|
|||
|
|
}
|
|||
|
|
return v;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
static void super_reg_apply_profile(const char* profile) {
|
|||
|
|
if (g_super_reg_profile_inited) {
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
|
|||
|
|
const int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
|
|||
|
|
|
|||
|
|
int eff_size = SUPER_REG_SIZE;
|
|||
|
|
int eff_per_class = SUPER_REG_PER_CLASS;
|
|||
|
|
|
|||
|
|
if (is_bench) {
|
|||
|
|
// 論理上の利用範囲だけ縮める(配列は従来サイズのまま)
|
|||
|
|
eff_size = SUPER_REG_SIZE >> 3; // 1/8 に論理制限
|
|||
|
|
eff_per_class = SUPER_REG_PER_CLASS >> 4; // 1/16
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
eff_size = super_reg_clamp_power_of_two(eff_size, SUPER_REG_SIZE);
|
|||
|
|
eff_per_class = eff_per_class > 0 ? eff_per_class : SUPER_REG_PER_CLASS;
|
|||
|
|
|
|||
|
|
atomic_store_explicit(&g_super_reg_effective_size, eff_size, memory_order_relaxed);
|
|||
|
|
atomic_store_explicit(&g_super_reg_effective_mask, eff_size - 1, memory_order_relaxed);
|
|||
|
|
atomic_store_explicit(&g_super_reg_effective_per_class,
|
|||
|
|
eff_per_class,
|
|||
|
|
memory_order_relaxed);
|
|||
|
|
atomic_store_explicit(&g_super_reg_profile_inited, 1, memory_order_release);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void super_reg_init(SuperRegBox* box, const char* profile) {
|
|||
|
|
(void)box;
|
|||
|
|
super_reg_apply_profile(profile);
|
|||
|
|
|
|||
|
|
if (atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int eff_size = super_reg_effective_size();
|
|||
|
|
int per_class = super_reg_effective_per_class();
|
|||
|
|
|
|||
|
|
// Allocate registry table
|
|||
|
|
size_t reg_bytes = (size_t)eff_size * sizeof(SuperRegEntry);
|
|||
|
|
g_super_reg_entries = (SuperRegEntry*)calloc(eff_size, sizeof(SuperRegEntry));
|
|||
|
|
if (!g_super_reg_entries) {
|
|||
|
|
fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for registry\n", reg_bytes);
|
|||
|
|
abort();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Allocate per-class table (contiguous 1D block)
|
|||
|
|
size_t per_class_bytes = (size_t)TINY_NUM_CLASSES * (size_t)per_class * sizeof(SuperSlab*);
|
|||
|
|
g_super_reg_by_class_slots = (SuperSlab**)calloc(TINY_NUM_CLASSES * (size_t)per_class,
|
|||
|
|
sizeof(SuperSlab*));
|
|||
|
|
if (!g_super_reg_by_class_slots) {
|
|||
|
|
fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for per-class registry\n",
|
|||
|
|
per_class_bytes);
|
|||
|
|
abort();
|
|||
|
|
}
|
|||
|
|
g_super_reg_by_class_stride = per_class;
|
|||
|
|
|
|||
|
|
atomic_store_explicit(&g_super_reg_allocated, 1, memory_order_release);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int super_reg_effective_size(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
|
|||
|
|
super_reg_apply_profile(NULL);
|
|||
|
|
}
|
|||
|
|
return atomic_load_explicit(&g_super_reg_effective_size, memory_order_relaxed);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int super_reg_effective_mask(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
|
|||
|
|
super_reg_apply_profile(NULL);
|
|||
|
|
}
|
|||
|
|
return atomic_load_explicit(&g_super_reg_effective_mask, memory_order_relaxed);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int super_reg_effective_per_class(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
|
|||
|
|
super_reg_apply_profile(NULL);
|
|||
|
|
}
|
|||
|
|
return atomic_load_explicit(&g_super_reg_effective_per_class, memory_order_relaxed);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
SuperRegEntry* super_reg_entries(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
|
|||
|
|
super_reg_init(NULL, NULL);
|
|||
|
|
}
|
|||
|
|
return g_super_reg_entries;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
SuperSlab** super_reg_by_class_slots(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
|
|||
|
|
super_reg_init(NULL, NULL);
|
|||
|
|
}
|
|||
|
|
return g_super_reg_by_class_slots;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int super_reg_by_class_stride(void) {
|
|||
|
|
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
|
|||
|
|
super_reg_init(NULL, NULL);
|
|||
|
|
}
|
|||
|
|
return g_super_reg_by_class_stride;
|
|||
|
|
}
|