Files
hakmem/archive/tools/find_24_bytes.c

134 lines
6.1 KiB
C
Raw Normal View History

#include <stdio.h>
int main() {
printf("=== WHERE DOES 24.4 BYTES/ALLOCATION COME FROM? ===\n\n");
// For 16B allocations (class 1)
int blocks_per_slab = 4096;
int slab_size = 64 * 1024;
printf("Slab configuration (16B class):\n");
printf(" Blocks per slab: %d\n", blocks_per_slab);
printf(" Slab size: %d KB\n\n", slab_size / 1024);
// Calculate per-block metadata overhead
printf("Per-block overhead breakdown:\n\n");
// 1. Primary bitmap
double bitmap_per_block = 1.0 / 8.0; // 1 bit per block = 0.125 bytes
printf("1. Primary bitmap: 1 bit/block = %.3f bytes\n", bitmap_per_block);
// 2. Summary bitmap
// 64 bitmap words → 1 summary word
// 4096 blocks → 64 bitmap words → 1 summary word (64 bits)
double summary_per_block = 64.0 / (blocks_per_slab * 8.0);
printf("2. Summary bitmap: %.3f bytes\n", summary_per_block);
// 3. TinySlab metadata
// 88 bytes per slab / 4096 blocks
double slab_meta_per_block = 88.0 / blocks_per_slab;
printf("3. TinySlab struct: 88B / %d = %.3f bytes\n", blocks_per_slab, slab_meta_per_block);
// 4. Registry entry (amortized)
// Assume 1 registry entry per slab
double registry_per_block = 16.0 / blocks_per_slab;
printf("4. Registry entry: 16B / %d = %.3f bytes\n", blocks_per_slab, registry_per_block);
// 5. TLS Magazine
// This is tricky - it's per-thread, not per-block
// But in single-threaded case: 128 KB / 1M blocks
double tls_mag_per_block = (128.0 * 1024) / 1000000.0;
printf("5. TLS Magazine: 128KB / 1M blocks = %.3f bytes (amortized)\n", tls_mag_per_block);
// 6. HIDDEN COST: Slab fragmentation
// Each slab wastes space due to 64KB alignment
int blocks_used = 1000000 % blocks_per_slab; // Last slab: partially filled
if (blocks_used == 0) blocks_used = blocks_per_slab;
int blocks_wasted_last_slab = blocks_per_slab - blocks_used;
printf("\n=== THE REAL CULPRIT ===\n\n");
// Calculate how much space is wasted
int slabs_needed = (1000000 + blocks_per_slab - 1) / blocks_per_slab; // 245 slabs
int total_blocks_allocated = slabs_needed * blocks_per_slab; // 245 * 4096 = 1,003,520
int wasted_blocks = total_blocks_allocated - 1000000; // 3,520 blocks
printf("Slab allocation analysis:\n");
printf(" Blocks needed: 1,000,000\n");
printf(" Slabs allocated: %d × %d blocks = %d total blocks\n",
slabs_needed, blocks_per_slab, total_blocks_allocated);
printf(" Wasted blocks: %d (%.1f%% waste)\n", wasted_blocks,
wasted_blocks * 100.0 / total_blocks_allocated);
printf(" Wasted space: %d blocks × 16B = %.2f KB\n\n",
wasted_blocks, wasted_blocks * 16.0 / 1024);
// But the real issue: oversized slabs!
printf("ROOT CAUSE: Oversized slab allocation\n");
printf(" Each slab: 64 KB (data + metadata + waste)\n");
printf(" Each slab actually uses: %d blocks × 16B = %.1f KB of data\n",
blocks_per_slab, blocks_per_slab * 16.0 / 1024);
printf(" Per-slab overhead: 64 KB - %.1f KB = %.1f KB\n\n",
blocks_per_slab * 16.0 / 1024, 64 - blocks_per_slab * 16.0 / 1024);
// Wait, that doesn't make sense for 16B class
// 4096 × 16 = 65536 = 64 KB exactly!
printf("Wait... 4096 × 16B = %d bytes = 64 KB exactly!\n", blocks_per_slab * 16);
printf("So there's NO wasted space in the slab data region.\n\n");
printf("=== RETHINKING THE PROBLEM ===\n\n");
// Let me check if TLS Magazine is the issue
printf("TLS Magazine deep dive:\n");
printf(" Capacity: 2048 items per class\n");
printf(" Classes: 8\n");
printf(" Size per item: 8 bytes (pointer)\n");
printf(" Total per thread: 2048 × 8B × 8 = %.0f KB\n", 2048 * 8 * 8 / 1024.0);
printf(" For 1 thread: %.0f KB = %.2f MB\n\n", 2048 * 8 * 8 / 1024.0, 2048 * 8 * 8 / (1024.0 * 1024));
// This is 128 KB per thread - matches our calculation
// But spread over 1M allocations, that's only 0.13 bytes per allocation!
printf("=== MYSTERY: Where are the other 24 bytes? ===\n\n");
// Let me check if it's ACTIVE allocations vs TOTAL allocations
printf("Hypothesis: TLS Magazine is HOLDING allocations\n");
printf(" If TLS Magazine holds 2048 × 16B = %.1f KB per class\n", 2048 * 16.0 / 1024);
printf(" For class 1 (16B): 2048 items = %.1f KB of DATA\n", 2048 * 16.0 / 1024);
printf(" But we measured TOTAL RSS, which includes magazine contents!\n\n");
printf("Testing theory:\n");
printf(" At 1M allocations:\n");
printf(" - Active in program: 1M × 16B = 15.26 MB\n");
printf(" - Held in TLS mag: ~2048 × 16B × 8 classes = %.2f MB\n",
2048 * 16 * 8 / (1024.0 * 1024));
printf(" - But wait, TLS mag only holds FREED items, not allocated!\n\n");
// The real issue must be something else
printf("Let me check the init code...\n");
printf("From hakmem_tiny.c line 568-574:\n");
printf(" Pre-allocate slabs for classes 0-3 (8B, 16B, 32B, 64B)\n");
printf(" That's 4 × 64KB = 256 KB upfront!\n\n");
printf("Pre-allocation cost:\n");
printf(" 4 slabs × 64 KB = %.2f MB\n", 4 * 64 / 1024.0);
printf(" But this is FIXED, not per-allocation.\n\n");
printf("=== THE ANSWER ===\n");
printf("The 24.4 bytes/allocation must be in the PROGRAM's working set,\n");
printf("not HAKMEM's metadata. Let me check if it's the POINTER ARRAY!\n\n");
printf("Pointer array overhead:\n");
printf(" void** ptrs = malloc(1M × 8 bytes) = %.2f MB\n", 1000000 * 8 / (1024.0 * 1024));
printf(" This is 8 bytes per allocation!\n\n");
printf("Revised calculation:\n");
printf(" Data: 1M × 16B = 15.26 MB\n");
printf(" Pointer array: 1M × 8B = 7.63 MB\n");
printf(" Expected total (data + ptrs): 22.89 MB\n");
printf(" Actual measured: 39.60 MB\n");
printf(" Real overhead: 39.60 - 22.89 = 16.71 MB\n");
printf(" Per-allocation: 16.71 MB / 1M = %.1f bytes\n\n", 16.71 * 1024 * 1024 / 1000000.0);
return 0;
}