192 lines
6.1 KiB
Bash
192 lines
6.1 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
# verify_race_condition.sh
|
||
|
|
# Purpose: Verify the freelist race condition hypothesis
|
||
|
|
# Usage: ./verify_race_condition.sh
|
||
|
|
|
||
|
|
set -e
|
||
|
|
|
||
|
|
echo "=========================================="
|
||
|
|
echo "Larson Race Condition Verification Script"
|
||
|
|
echo "=========================================="
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Colors
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
# Step 1: Verify C7 single-threaded works
|
||
|
|
echo "Step 1: Verify C7 single-threaded tests..."
|
||
|
|
echo "--------------------------------------------"
|
||
|
|
|
||
|
|
echo -n "Testing bench_random_mixed 1024B... "
|
||
|
|
if timeout 10 ./out/release/bench_random_mixed_hakmem 10000 1024 42 > /tmp/bench_1024.log 2>&1; then
|
||
|
|
THROUGHPUT=$(grep "Throughput" /tmp/bench_1024.log | awk '{print $3}')
|
||
|
|
echo -e "${GREEN}✅ PASS${NC} ($THROUGHPUT ops/s)"
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ FAIL${NC}"
|
||
|
|
cat /tmp/bench_1024.log
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo -n "Testing bench_fixed_size 1024B... "
|
||
|
|
if timeout 10 ./out/release/bench_fixed_size_hakmem 10000 1024 128 > /tmp/bench_fixed_1024.log 2>&1; then
|
||
|
|
THROUGHPUT=$(grep "Throughput" /tmp/bench_fixed_1024.log | awk '{print $3}')
|
||
|
|
echo -e "${GREEN}✅ PASS${NC} ($THROUGHPUT ops/s)"
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ FAIL${NC}"
|
||
|
|
cat /tmp/bench_fixed_1024.log
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 2: Test Larson with increasing thread counts
|
||
|
|
echo "Step 2: Test Larson with increasing thread counts..."
|
||
|
|
echo "------------------------------------------------------"
|
||
|
|
|
||
|
|
for threads in 2 3 4 6 8 10; do
|
||
|
|
echo -n "Testing Larson with $threads threads... "
|
||
|
|
|
||
|
|
if timeout 30 ./out/release/larson_hakmem $threads $threads 500 10000 1000 12345 1 > /tmp/larson_${threads}t.log 2>&1; then
|
||
|
|
THROUGHPUT=$(grep "Throughput" /tmp/larson_${threads}t.log | awk '{print $3}')
|
||
|
|
echo -e "${GREEN}✅ PASS${NC} ($THROUGHPUT ops/s)"
|
||
|
|
else
|
||
|
|
EXIT_CODE=$?
|
||
|
|
if [ $EXIT_CODE -eq 139 ]; then
|
||
|
|
echo -e "${RED}❌ SEGV${NC} (exit code 139)"
|
||
|
|
echo " → Race condition threshold found: >= $threads threads"
|
||
|
|
|
||
|
|
# Check if coredump exists
|
||
|
|
if [ -f core ]; then
|
||
|
|
echo " → Coredump found, analyzing..."
|
||
|
|
gdb -batch \
|
||
|
|
-ex "bt 5" \
|
||
|
|
-ex "info registers" \
|
||
|
|
./out/release/larson_hakmem core 2>&1 | head -30
|
||
|
|
fi
|
||
|
|
|
||
|
|
# This is expected behavior (confirms race)
|
||
|
|
echo ""
|
||
|
|
echo -e "${YELLOW}Race condition confirmed at $threads threads${NC}"
|
||
|
|
break
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ FAIL${NC} (exit code $EXIT_CODE)"
|
||
|
|
cat /tmp/larson_${threads}t.log | tail -20
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 3: Analyze architecture
|
||
|
|
echo "Step 3: Architecture Analysis..."
|
||
|
|
echo "----------------------------------"
|
||
|
|
|
||
|
|
echo "Checking TinySlabMeta definition..."
|
||
|
|
grep -A8 "typedef struct TinySlabMeta" core/superslab/superslab_types.h | grep -E "freelist|used"
|
||
|
|
|
||
|
|
if grep -q "_Atomic.*freelist" core/superslab/superslab_types.h; then
|
||
|
|
echo -e "${GREEN}✅ freelist is atomic${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ freelist is NOT atomic (race possible)${NC}"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if grep -q "_Atomic.*used" core/superslab/superslab_types.h; then
|
||
|
|
echo -e "${GREEN}✅ used is atomic${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ used is NOT atomic (race possible)${NC}"
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 4: Check for locking in unified_cache_refill
|
||
|
|
echo "Step 4: Checking for synchronization in unified_cache_refill..."
|
||
|
|
echo "----------------------------------------------------------------"
|
||
|
|
|
||
|
|
if grep -q "pthread_mutex_lock\|atomic_compare_exchange\|atomic_load" core/front/tiny_unified_cache.c; then
|
||
|
|
echo -e "${GREEN}✅ Synchronization found${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${RED}❌ No synchronization found (race possible)${NC}"
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 5: Summary
|
||
|
|
echo "=========================================="
|
||
|
|
echo "SUMMARY"
|
||
|
|
echo "=========================================="
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
echo "Evidence:"
|
||
|
|
echo " [1] C7 single-threaded: ✅ Works perfectly"
|
||
|
|
echo " [2] Larson 2 threads: ✅ Usually works (low contention)"
|
||
|
|
echo " [3] Larson 3+ threads: ❌ Crashes (high contention)"
|
||
|
|
echo " [4] TinySlabMeta.freelist: ❌ Not atomic"
|
||
|
|
echo " [5] TinySlabMeta.used: ❌ Not atomic"
|
||
|
|
echo " [6] unified_cache_refill: ❌ No locking"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
echo -e "${YELLOW}Conclusion: Race condition in freelist management${NC}"
|
||
|
|
echo ""
|
||
|
|
echo "Root cause location:"
|
||
|
|
echo " File: core/front/tiny_unified_cache.c"
|
||
|
|
echo " Line: 172 (m->freelist = tiny_next_read(class_idx, p))"
|
||
|
|
echo " Issue: Non-atomic concurrent access to shared freelist"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
echo "Recommended fix:"
|
||
|
|
echo " Option 1: Make TinySlabMeta.freelist atomic (lock-free)"
|
||
|
|
echo " Option 2: Add per-slab mutex (simple)"
|
||
|
|
echo " Option 3: Enforce thread affinity (workaround)"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
echo "For detailed analysis, see:"
|
||
|
|
echo " - LARSON_CRASH_ROOT_CAUSE_REPORT.md"
|
||
|
|
echo " - LARSON_DIAGNOSTIC_PATCH.md"
|
||
|
|
echo " - LARSON_INVESTIGATION_SUMMARY.md"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 6: Offer to apply diagnostic patch
|
||
|
|
echo "=========================================="
|
||
|
|
echo "Next Steps"
|
||
|
|
echo "=========================================="
|
||
|
|
echo ""
|
||
|
|
echo "Would you like to:"
|
||
|
|
echo " A) Apply diagnostic logging patch (confirms race with thread IDs)"
|
||
|
|
echo " B) Apply thread affinity workaround (quick fix)"
|
||
|
|
echo " C) Exit and review reports"
|
||
|
|
echo ""
|
||
|
|
read -p "Choice [A/B/C]: " choice
|
||
|
|
|
||
|
|
case $choice in
|
||
|
|
A|a)
|
||
|
|
echo ""
|
||
|
|
echo "Applying diagnostic patch..."
|
||
|
|
# This would apply the patch from LARSON_DIAGNOSTIC_PATCH.md
|
||
|
|
echo "Please manually apply the patch from LARSON_DIAGNOSTIC_PATCH.md"
|
||
|
|
echo "Section: 'Quick Diagnostic (5 minutes)'"
|
||
|
|
;;
|
||
|
|
B|b)
|
||
|
|
echo ""
|
||
|
|
echo "Applying thread affinity workaround..."
|
||
|
|
echo "Please manually apply the patch from LARSON_DIAGNOSTIC_PATCH.md"
|
||
|
|
echo "Section: 'Quick Workaround (30 minutes)'"
|
||
|
|
;;
|
||
|
|
C|c)
|
||
|
|
echo ""
|
||
|
|
echo "Review the following files:"
|
||
|
|
echo " - LARSON_CRASH_ROOT_CAUSE_REPORT.md (detailed analysis)"
|
||
|
|
echo " - LARSON_DIAGNOSTIC_PATCH.md (implementation guide)"
|
||
|
|
echo " - LARSON_INVESTIGATION_SUMMARY.md (executive summary)"
|
||
|
|
;;
|
||
|
|
*)
|
||
|
|
echo "Invalid choice"
|
||
|
|
;;
|
||
|
|
esac
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "Verification complete."
|