#include int main() { printf("=== WHERE DOES 24.4 BYTES/ALLOCATION COME FROM? ===\n\n"); // For 16B allocations (class 1) int blocks_per_slab = 4096; int slab_size = 64 * 1024; printf("Slab configuration (16B class):\n"); printf(" Blocks per slab: %d\n", blocks_per_slab); printf(" Slab size: %d KB\n\n", slab_size / 1024); // Calculate per-block metadata overhead printf("Per-block overhead breakdown:\n\n"); // 1. Primary bitmap double bitmap_per_block = 1.0 / 8.0; // 1 bit per block = 0.125 bytes printf("1. Primary bitmap: 1 bit/block = %.3f bytes\n", bitmap_per_block); // 2. Summary bitmap // 64 bitmap words → 1 summary word // 4096 blocks → 64 bitmap words → 1 summary word (64 bits) double summary_per_block = 64.0 / (blocks_per_slab * 8.0); printf("2. Summary bitmap: %.3f bytes\n", summary_per_block); // 3. TinySlab metadata // 88 bytes per slab / 4096 blocks double slab_meta_per_block = 88.0 / blocks_per_slab; printf("3. TinySlab struct: 88B / %d = %.3f bytes\n", blocks_per_slab, slab_meta_per_block); // 4. Registry entry (amortized) // Assume 1 registry entry per slab double registry_per_block = 16.0 / blocks_per_slab; printf("4. Registry entry: 16B / %d = %.3f bytes\n", blocks_per_slab, registry_per_block); // 5. TLS Magazine // This is tricky - it's per-thread, not per-block // But in single-threaded case: 128 KB / 1M blocks double tls_mag_per_block = (128.0 * 1024) / 1000000.0; printf("5. TLS Magazine: 128KB / 1M blocks = %.3f bytes (amortized)\n", tls_mag_per_block); // 6. HIDDEN COST: Slab fragmentation // Each slab wastes space due to 64KB alignment int blocks_used = 1000000 % blocks_per_slab; // Last slab: partially filled if (blocks_used == 0) blocks_used = blocks_per_slab; int blocks_wasted_last_slab = blocks_per_slab - blocks_used; printf("\n=== THE REAL CULPRIT ===\n\n"); // Calculate how much space is wasted int slabs_needed = (1000000 + blocks_per_slab - 1) / blocks_per_slab; // 245 slabs int total_blocks_allocated = slabs_needed * blocks_per_slab; // 245 * 4096 = 1,003,520 int wasted_blocks = total_blocks_allocated - 1000000; // 3,520 blocks printf("Slab allocation analysis:\n"); printf(" Blocks needed: 1,000,000\n"); printf(" Slabs allocated: %d × %d blocks = %d total blocks\n", slabs_needed, blocks_per_slab, total_blocks_allocated); printf(" Wasted blocks: %d (%.1f%% waste)\n", wasted_blocks, wasted_blocks * 100.0 / total_blocks_allocated); printf(" Wasted space: %d blocks × 16B = %.2f KB\n\n", wasted_blocks, wasted_blocks * 16.0 / 1024); // But the real issue: oversized slabs! printf("ROOT CAUSE: Oversized slab allocation\n"); printf(" Each slab: 64 KB (data + metadata + waste)\n"); printf(" Each slab actually uses: %d blocks × 16B = %.1f KB of data\n", blocks_per_slab, blocks_per_slab * 16.0 / 1024); printf(" Per-slab overhead: 64 KB - %.1f KB = %.1f KB\n\n", blocks_per_slab * 16.0 / 1024, 64 - blocks_per_slab * 16.0 / 1024); // Wait, that doesn't make sense for 16B class // 4096 × 16 = 65536 = 64 KB exactly! printf("Wait... 4096 × 16B = %d bytes = 64 KB exactly!\n", blocks_per_slab * 16); printf("So there's NO wasted space in the slab data region.\n\n"); printf("=== RETHINKING THE PROBLEM ===\n\n"); // Let me check if TLS Magazine is the issue printf("TLS Magazine deep dive:\n"); printf(" Capacity: 2048 items per class\n"); printf(" Classes: 8\n"); printf(" Size per item: 8 bytes (pointer)\n"); printf(" Total per thread: 2048 × 8B × 8 = %.0f KB\n", 2048 * 8 * 8 / 1024.0); printf(" For 1 thread: %.0f KB = %.2f MB\n\n", 2048 * 8 * 8 / 1024.0, 2048 * 8 * 8 / (1024.0 * 1024)); // This is 128 KB per thread - matches our calculation // But spread over 1M allocations, that's only 0.13 bytes per allocation! printf("=== MYSTERY: Where are the other 24 bytes? ===\n\n"); // Let me check if it's ACTIVE allocations vs TOTAL allocations printf("Hypothesis: TLS Magazine is HOLDING allocations\n"); printf(" If TLS Magazine holds 2048 × 16B = %.1f KB per class\n", 2048 * 16.0 / 1024); printf(" For class 1 (16B): 2048 items = %.1f KB of DATA\n", 2048 * 16.0 / 1024); printf(" But we measured TOTAL RSS, which includes magazine contents!\n\n"); printf("Testing theory:\n"); printf(" At 1M allocations:\n"); printf(" - Active in program: 1M × 16B = 15.26 MB\n"); printf(" - Held in TLS mag: ~2048 × 16B × 8 classes = %.2f MB\n", 2048 * 16 * 8 / (1024.0 * 1024)); printf(" - But wait, TLS mag only holds FREED items, not allocated!\n\n"); // The real issue must be something else printf("Let me check the init code...\n"); printf("From hakmem_tiny.c line 568-574:\n"); printf(" Pre-allocate slabs for classes 0-3 (8B, 16B, 32B, 64B)\n"); printf(" That's 4 × 64KB = 256 KB upfront!\n\n"); printf("Pre-allocation cost:\n"); printf(" 4 slabs × 64 KB = %.2f MB\n", 4 * 64 / 1024.0); printf(" But this is FIXED, not per-allocation.\n\n"); printf("=== THE ANSWER ===\n"); printf("The 24.4 bytes/allocation must be in the PROGRAM's working set,\n"); printf("not HAKMEM's metadata. Let me check if it's the POINTER ARRAY!\n\n"); printf("Pointer array overhead:\n"); printf(" void** ptrs = malloc(1M × 8 bytes) = %.2f MB\n", 1000000 * 8 / (1024.0 * 1024)); printf(" This is 8 bytes per allocation!\n\n"); printf("Revised calculation:\n"); printf(" Data: 1M × 16B = 15.26 MB\n"); printf(" Pointer array: 1M × 8B = 7.63 MB\n"); printf(" Expected total (data + ptrs): 22.89 MB\n"); printf(" Actual measured: 39.60 MB\n"); printf(" Real overhead: 39.60 - 22.89 = 16.71 MB\n"); printf(" Per-allocation: 16.71 MB / 1M = %.1f bytes\n\n", 16.71 * 1024 * 1024 / 1000000.0); return 0; }