128 lines
4.4 KiB
Bash
128 lines
4.4 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
set -euo pipefail
|
|||
|
|
|
|||
|
|
# Generate a compact "free-path review packet" for sharing with ChatGPT Pro.
|
|||
|
|
# Output: Markdown to stdout (copy/paste).
|
|||
|
|
#
|
|||
|
|
# Usage:
|
|||
|
|
# scripts/make_chatgpt_pro_packet_free_path.sh > /tmp/free_path_packet.md
|
|||
|
|
#
|
|||
|
|
# Notes:
|
|||
|
|
# - Extracts key functions with a simple brace counter.
|
|||
|
|
# - Clips each snippet to keep it shareable.
|
|||
|
|
|
|||
|
|
root_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|||
|
|
cd "${root_dir}"
|
|||
|
|
|
|||
|
|
# Default clip is intentionally small; you can override via CLIP_LINES=...
|
|||
|
|
clip="${CLIP_LINES:-160}"
|
|||
|
|
|
|||
|
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "[packet] missing $1" >&2; exit 1; }; }
|
|||
|
|
need awk
|
|||
|
|
need sed
|
|||
|
|
|
|||
|
|
extract_func_n_clip() {
|
|||
|
|
local file="$1"
|
|||
|
|
local re="$2"
|
|||
|
|
local nth="$3"
|
|||
|
|
local clip_lines="$4"
|
|||
|
|
|
|||
|
|
awk -v re="${re}" -v nth="${nth}" '
|
|||
|
|
function count_char(s, c, i,n) { n=0; for (i=1;i<=length(s);i++) if (substr(s,i,1)==c) n++; return n }
|
|||
|
|
BEGIN { hit=0; started=0; depth=0; seen_open=0 }
|
|||
|
|
{
|
|||
|
|
if (!started) {
|
|||
|
|
if ($0 ~ re) {
|
|||
|
|
hit++;
|
|||
|
|
if (hit == nth) {
|
|||
|
|
started=1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if (started) {
|
|||
|
|
print $0;
|
|||
|
|
depth += count_char($0, "{");
|
|||
|
|
if (count_char($0, "{") > 0) seen_open=1;
|
|||
|
|
depth -= count_char($0, "}");
|
|||
|
|
if (seen_open && depth <= 0) exit 0;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
' "${file}" | sed -n "1,${clip_lines}p"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
extract_func() {
|
|||
|
|
extract_func_n_clip "$1" "$2" 1 "${clip}"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
md_code() {
|
|||
|
|
local lang="$1"
|
|||
|
|
local file="$2"
|
|||
|
|
echo ""
|
|||
|
|
echo "### \`${file}\`"
|
|||
|
|
echo "\`\`\`${lang}"
|
|||
|
|
cat
|
|||
|
|
echo "\`\`\`"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cat <<'MD'
|
|||
|
|
# Hakmem free-path review packet (compact)
|
|||
|
|
|
|||
|
|
Goal: understand remaining fixed costs vs mimalloc/tcmalloc, with Box Theory (single boundary, reversible ENV gates).
|
|||
|
|
|
|||
|
|
SSOT bench conditions (current practice):
|
|||
|
|
- `HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE`
|
|||
|
|
- `ITERS=20000000 WS=400 RUNS=10`
|
|||
|
|
- run via `scripts/run_mixed_10_cleanenv.sh`
|
|||
|
|
|
|||
|
|
Request:
|
|||
|
|
1) Where is the dominant fixed cost on free path now?
|
|||
|
|
2) What structural change would give +5–10% without breaking Box Theory?
|
|||
|
|
3) What NOT to do (layout tax pitfalls)?
|
|||
|
|
MD
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "## Code excerpts (clipped)"
|
|||
|
|
|
|||
|
|
# We focus on the hot tiny-free pipeline (the most actionable for instruction/branch work).
|
|||
|
|
# If the reviewer needs wrapper/registry code too, we can provide a larger packet.
|
|||
|
|
|
|||
|
|
# A) tiny_free_gate_try_fast(): user_ptr -> class_idx/base -> tiny_hot_free_fast()/fallback
|
|||
|
|
extract_func core/box/tiny_free_gate_box.h '^static inline int tiny_free_gate_try_fast\\(void\\* user_ptr\\)' | md_code c core/box/tiny_free_gate_box.h
|
|||
|
|
|
|||
|
|
# B) free_tiny_fast(): main Tiny free dispatcher (hot/cold + env snapshot)
|
|||
|
|
extract_func_n_clip core/front/malloc_tiny_fast.h '^static inline int free_tiny_fast\\(void\\* ptr\\)' 1 220 | md_code c core/front/malloc_tiny_fast.h
|
|||
|
|
|
|||
|
|
# C) tiny_hot_free_fast(): TLS unified cache push
|
|||
|
|
extract_func core/box/tiny_front_hot_box.h '^static inline int tiny_hot_free_fast\\(int class_idx, void\\* base\\)' | md_code c core/box/tiny_front_hot_box.h
|
|||
|
|
|
|||
|
|
# D) tiny_legacy_fallback_free_base_with_env(): inline-slots cascade + unified_cache_push(_fast)
|
|||
|
|
extract_func_n_clip core/box/tiny_legacy_fallback_box.h '^static inline void tiny_legacy_fallback_free_base_with_env\\(void\\* base, uint32_t class_idx, const HakmemEnvSnapshot\\* env\\)' 1 260 | md_code c core/box/tiny_legacy_fallback_box.h
|
|||
|
|
|
|||
|
|
cat <<'MD'
|
|||
|
|
|
|||
|
|
## Questions to answer (please be concrete)
|
|||
|
|
|
|||
|
|
1) In these snippets, which checks/branches are still "per-op fixed taxes" on the hot free path?
|
|||
|
|
- Please point to specific lines/conditions and estimate cost (branches/instructions or dependency chain).
|
|||
|
|
|
|||
|
|
2) Is `tiny_hot_free_fast()` already close to optimal, and the real bottleneck is upstream (user->base/classify/route)?
|
|||
|
|
- If yes, what’s the smallest structural refactor that removes that upstream fixed tax?
|
|||
|
|
|
|||
|
|
3) Should we introduce a "commit once" plan (freeze the chosen free path) — or is branch prediction already making lazy-init checks ~free here?
|
|||
|
|
- If "commit once", where should it live to avoid runtime gate overhead (bench_profile refresh boundary vs per-op)?
|
|||
|
|
|
|||
|
|
4) We have had many layout-tax regressions from code removal/reordering.
|
|||
|
|
- What patterns here are most likely to trigger layout tax if changed?
|
|||
|
|
- How would you stage a safe A/B (same binary, ENV toggle) for your proposal?
|
|||
|
|
|
|||
|
|
5) If you could change just ONE of:
|
|||
|
|
- pointer classification to base/class_idx,
|
|||
|
|
- route determination,
|
|||
|
|
- unified cache push/pop structure,
|
|||
|
|
which is highest ROI for +5–10% on WS=400?
|
|||
|
|
|
|||
|
|
MD
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "[packet] done"
|