Fix C0 (8B) next pointer overflow and optimize with bitmask lookup
Problem: Class 0 (8B stride) was using offset 1 for next pointer storage, but 8B stride cannot fit [1B header][8B next pointer] - it overflows by 1 byte into the adjacent block. Fix: Use offset 0 for C0 (same as C7), allowing the header to be overwritten. This is safe because: 1. class_map provides out-of-band class_idx lookup (header not needed for free) 2. P3 skips header write by default (header byte is unused anyway) Optimization: Replace branching with bitmask lookup for zero-cost abstraction. - Old: (class_idx == 0 || class_idx == 7) ? 0u : 1u (branch) - New: (0x7Eu >> class_idx) & 1u (branchless) Bit pattern: C0=0, C1-C6=1, C7=0 → 0b01111110 = 0x7E Performance results: - 8B: 85.19M → 85.61M (+0.5%) - 16B: 137.43M → 147.31M (+7.2%) - 64B: 84.21M → 84.90M (+0.8%) Thanks to ChatGPT for spotting the g_tiny_class_sizes vs tiny_nextptr.h mismatch! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1,17 +1,18 @@
|
||||
// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
|
||||
//
|
||||
// Finalized Phase E1-CORRECT spec (物理制約込み):
|
||||
// P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
|
||||
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
|
||||
//
|
||||
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
|
||||
//
|
||||
// Class 0:
|
||||
// [1B header][15B payload] (total 16B)
|
||||
// → headerは保持し、next は header直後 base+1 に格納
|
||||
// → next_off = 1
|
||||
// [1B header][7B payload] (total 8B stride)
|
||||
// → 8B stride に 1B header + 8B next pointer は収まらない(1B溢れる)
|
||||
// → next は base+0 に格納(headerを上書き)
|
||||
// → next_off = 0
|
||||
//
|
||||
// Class 1〜6:
|
||||
// [1B header][payload >= 8B]
|
||||
// [1B header][payload >= 15B] (stride >= 16B)
|
||||
// → headerは保持し、next は header直後 base+1 に格納
|
||||
// → next_off = 1
|
||||
//
|
||||
@ -45,12 +46,16 @@
|
||||
#include <execinfo.h> // backtrace for rare misalign diagnostics
|
||||
|
||||
// Compute freelist next-pointer offset within a block for the given class.
|
||||
// P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
|
||||
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
|
||||
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
|
||||
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// C0 (8B): offset 0 (8B stride too small for header + 8B pointer - would overflow)
|
||||
// C7 (2048B): offset 0 (overwrites header in freelist - largest class can tolerate)
|
||||
// C0-C6: offset 1 (header preserved - user data is not disturbed)
|
||||
return (class_idx == 7) ? 0u : 1u;
|
||||
// C1-C6: offset 1 (header preserved - user data is not disturbed)
|
||||
// Optimized: Use bitmask lookup instead of branching
|
||||
// Bit pattern: C0=0, C1-C6=1, C7=0 → 0b01111110 = 0x7E
|
||||
return (0x7Eu >> class_idx) & 1u;
|
||||
#else
|
||||
(void)class_idx;
|
||||
return 0u;
|
||||
|
||||
Reference in New Issue
Block a user