v1.12 track B.1: codecontext sidecar container + HTTP shim

New /opt/boocode/codecontext/ directory holding the codecontext sidecar
that BooCode's tool wrappers (track B.2) will talk to. No BooCode-side
changes yet — this commit lands the sidecar standalone.

- Dockerfile: multi-stage golang:1.24-alpine → alpine:3.20. Clones
  codecontext at v3.2.1 from github.com/nmakod/codecontext (cgo build for
  tree-sitter bindings), builds the shim alongside (CGO_ENABLED=0).
- shim.go: stdlib-only Go HTTP server wrapping codecontext's stdio MCP
  child. Newline-delimited JSON framing per the MCP transport spec
  (NOT LSP-style Content-Length). 8 POST /v1/* endpoints, one per MCP
  tool, plus GET /health. Child supervised via child.Wait() goroutine
  that os.Exit's on death so the container's restart: unless-stopped
  policy fires (Signal(0) on a zombie returns nil and is not a liveness
  check — discovered during kill-restart testing).
- go.mod: no third-party deps; future Go security advisories don't apply.

docker-compose service: joins boocode_net (no host port), mounts
/opt:/opt:ro (BooCode projects live at /opt/<slug>, not exclusively
under /opt/projects), healthcheck on /health.

Verified: build clean, healthcheck reports healthy ~15s after up,
multi-project queries return valid markdown, target_dir swap works on
subtree paths. Kill-restart cycle completes in ~200ms with one failed
health poll observed (no misleading "ok" during the gap). Memory: 24.6
MiB after 5 search_symbols calls, 5.6 MiB after 30 min idle — codecontext
releases the per-call graph between target_dir swaps, so the shim doesn't
hold the indexed state.
This commit is contained in:
2026-05-21 12:30:48 +00:00
parent 5cd3f63df5
commit 4fae77e526
4 changed files with 512 additions and 0 deletions

40
codecontext/Dockerfile Normal file
View File

@@ -0,0 +1,40 @@
# v1.12 Track B — codecontext sidecar container.
#
# Multi-stage build: golang:1.24-alpine builder produces two binaries
# (codecontext from source + our HTTP shim), then a minimal alpine:3.20
# runtime holds both.
#
# No upstream Docker image exists for codecontext. We clone the repo
# directly because the module path declared in go.mod
# (github.com/nuthan-ms/codecontext) differs from the GitHub repo URL
# (github.com/nmakod/codecontext) — `go install` against the GitHub path
# wouldn't resolve. The tagged v3.2.1 source tree is the same either way.
FROM golang:1.24-alpine AS builder
WORKDIR /build
RUN apk add --no-cache git ca-certificates build-base
# Build codecontext from the v3.2.1 tag.
# CGO is required: codecontext binds tree-sitter via cgo.
RUN git clone --depth=1 --branch v3.2.1 https://github.com/nmakod/codecontext.git /build/codecontext
WORKDIR /build/codecontext
RUN CGO_ENABLED=1 GOOS=linux go build -o /build/codecontext-bin ./cmd/codecontext
# Build the shim. Stdlib-only — no go.sum needed.
WORKDIR /build/shim
COPY go.mod ./
COPY shim.go ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /build/shim-bin ./
# Runtime: alpine matches the build target so codecontext's cgo bindings
# resolve against the same musl libc.
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=builder /build/codecontext-bin /usr/local/bin/codecontext
COPY --from=builder /build/shim-bin /usr/local/bin/shim
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s \
CMD wget -qO- http://localhost:8080/health || exit 1
ENTRYPOINT ["/usr/local/bin/shim"]

3
codecontext/go.mod Normal file
View File

@@ -0,0 +1,3 @@
module github.com/indifferentketchup/boocode-codecontext-shim
go 1.24

442
codecontext/shim.go Normal file
View File

@@ -0,0 +1,442 @@
// boocode-codecontext-shim — wraps codecontext's stdio MCP server with an
// HTTP/JSON facade so the BooCode Node server can call codecontext over the
// container network instead of speaking MCP directly. One process per
// container, holds a single codecontext child via os/exec; concurrent HTTP
// requests are serialized onto the child because codecontext's internal
// CodeContextMCPServer.graph swaps per target_dir (see recon report
// 2026-05-21).
//
// MCP framing is newline-delimited JSON (NDJSON), not LSP-style
// Content-Length — per the MCP stdio transport spec:
// https://spec.modelcontextprotocol.io/specification/server/transports
//
// No third-party deps. Stdlib only.
package main
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"os/signal"
"sync"
"sync/atomic"
"syscall"
"time"
)
// ---- JSON-RPC types ----
// rpcMessage is shared by request, response, and notification. Notifications
// omit ID; requests omit Result/Error; responses omit Method/Params. omitempty
// + the zero int 0 sentinel works for ID because we never SEND id=0
// (nextID starts at 0 and atomic.AddInt32 returns 1 on the first call).
type rpcMessage struct {
JSONRPC string `json:"jsonrpc"`
ID int `json:"id,omitempty"`
Method string `json:"method,omitempty"`
Params json.RawMessage `json:"params,omitempty"`
Result json.RawMessage `json:"result,omitempty"`
Error *rpcError `json:"error,omitempty"`
}
type rpcError struct {
Code int `json:"code"`
Message string `json:"message"`
}
// callToolResult is the MCP tools/call response shape. codecontext returns
// markdown wrapped in a TextContent entry.
type callToolResult struct {
Content []struct {
Type string `json:"type"`
Text string `json:"text"`
} `json:"content"`
IsError bool `json:"isError,omitempty"`
}
// ---- Globals ----
var (
child *exec.Cmd
childStdin io.WriteCloser
childStdout *bufio.Reader
// Serialize tools/call so codecontext's per-call graph rebuild doesn't
// race itself when concurrent HTTP requests target different projects.
// Initialize/notifications/initialized run before HTTP starts so they
// don't need this lock.
callMu sync.Mutex
pendingMu sync.Mutex
pending = make(map[int]chan *rpcMessage)
nextID int32
)
// ---- MCP framing (NDJSON) ----
func writeMessage(w io.Writer, msg *rpcMessage) error {
body, err := json.Marshal(msg)
if err != nil {
return err
}
// Single write keeps the message atomic across concurrent writers.
// (We don't actually have concurrent writers here — callMu serializes —
// but the +'\n' append needs to be in one syscall regardless.)
_, err = w.Write(append(body, '\n'))
return err
}
func readerLoop(r *bufio.Reader) {
for {
line, err := r.ReadBytes('\n')
if err != nil {
if errors.Is(err, io.EOF) {
log.Printf("reader: EOF (child closed stdout)")
} else {
log.Printf("reader: %v", err)
}
return
}
var msg rpcMessage
if err := json.Unmarshal(line, &msg); err != nil {
log.Printf("reader: malformed JSON: %v (line=%q)", err, line)
continue
}
if msg.ID == 0 {
// Server-initiated notification or progress update; nothing to
// dispatch. codecontext doesn't currently send these but the
// MCP spec allows them.
continue
}
pendingMu.Lock()
ch, ok := pending[msg.ID]
if ok {
delete(pending, msg.ID)
}
pendingMu.Unlock()
if ok {
ch <- &msg
}
}
}
func call(ctx context.Context, method string, params any) (*rpcMessage, error) {
id := int(atomic.AddInt32(&nextID, 1))
ch := make(chan *rpcMessage, 1)
pendingMu.Lock()
pending[id] = ch
pendingMu.Unlock()
paramsJSON, err := json.Marshal(params)
if err != nil {
pendingMu.Lock()
delete(pending, id)
pendingMu.Unlock()
return nil, err
}
msg := &rpcMessage{
JSONRPC: "2.0",
ID: id,
Method: method,
Params: paramsJSON,
}
if err := writeMessage(childStdin, msg); err != nil {
pendingMu.Lock()
delete(pending, id)
pendingMu.Unlock()
return nil, fmt.Errorf("write: %w", err)
}
select {
case resp := <-ch:
return resp, nil
case <-ctx.Done():
pendingMu.Lock()
delete(pending, id)
pendingMu.Unlock()
return nil, ctx.Err()
}
}
func notify(method string, params any) error {
paramsJSON, err := json.Marshal(params)
if err != nil {
return err
}
msg := &rpcMessage{
JSONRPC: "2.0",
Method: method,
Params: paramsJSON,
}
return writeMessage(childStdin, msg)
}
// ---- Child lifecycle ----
func startChild() error {
// `codecontext mcp` with --watch=true (the default) keeps fsnotify
// running on the indexed directory; the per-call target_dir swap
// invalidates and re-indexes on demand. `--target=/opt/projects` is the
// initial scan target — codecontext rebuilds the graph against whatever
// target_dir each call carries, so this is just a valid bootstrap path
// (the default "." is the alpine root and trips on transient /proc fds).
child = exec.Command("codecontext", "mcp", "--target=/opt/projects", "--watch=true")
var err error
childStdin, err = child.StdinPipe()
if err != nil {
return fmt.Errorf("stdin pipe: %w", err)
}
stdout, err := child.StdoutPipe()
if err != nil {
return fmt.Errorf("stdout pipe: %w", err)
}
childStdout = bufio.NewReader(stdout)
// codecontext's own log.SetOutput(os.Stderr) keeps its diagnostic noise
// off the JSON-RPC channel; we just pass-through to our own stderr.
child.Stderr = os.Stderr
if err := child.Start(); err != nil {
return fmt.Errorf("start: %w", err)
}
log.Printf("started codecontext pid=%d", child.Process.Pid)
go readerLoop(childStdout)
// Supervise the child. When codecontext exits (crash, OOM, externally
// pkill'd), child.Wait() returns and we tear the shim down so the
// container's `restart: unless-stopped` policy recreates us with a
// fresh child. Without this goroutine the dead child becomes a zombie
// (Signal(0) on a zombie returns nil, so the health endpoint would lie)
// and HTTP requests would queue forever waiting on responses that will
// never come. Discovered during B.1 kill-restart testing.
go func() {
err := child.Wait()
log.Printf("codecontext exited: %v — shim shutting down", err)
os.Exit(1)
}()
return nil
}
func killChild() {
if child == nil || child.Process == nil {
return
}
log.Printf("killing codecontext pid=%d", child.Process.Pid)
_ = child.Process.Signal(syscall.SIGTERM)
done := make(chan error, 1)
go func() { done <- child.Wait() }()
select {
case <-done:
log.Printf("codecontext exited")
case <-time.After(5 * time.Second):
log.Printf("codecontext did not exit on SIGTERM; sending SIGKILL")
_ = child.Process.Kill()
<-done
}
}
// MCP handshake: client sends initialize, server replies, client follows
// with the notifications/initialized notification. After that, tools/call
// is accepted.
func initializeMCP(ctx context.Context) error {
initParams := map[string]any{
"protocolVersion": "2024-11-05",
"capabilities": map[string]any{},
"clientInfo": map[string]any{
"name": "boocode-codecontext-shim",
"version": "0.1.0",
},
}
resp, err := call(ctx, "initialize", initParams)
if err != nil {
return fmt.Errorf("initialize: %w", err)
}
if resp.Error != nil {
return fmt.Errorf("initialize error %d: %s", resp.Error.Code, resp.Error.Message)
}
if err := notify("notifications/initialized", map[string]any{}); err != nil {
return fmt.Errorf("notifications/initialized: %w", err)
}
log.Printf("MCP handshake complete (server result=%s)", string(resp.Result))
return nil
}
// ---- HTTP ----
func writeJSON(w http.ResponseWriter, status int, body any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(body)
}
func handleHealth(w http.ResponseWriter, r *http.Request) {
if child == nil || child.Process == nil {
http.Error(w, "no child", http.StatusServiceUnavailable)
return
}
// Signal 0 doesn't actually deliver — it just returns an error if the
// process is gone. Cheaper than parsing /proc.
if err := child.Process.Signal(syscall.Signal(0)); err != nil {
http.Error(w, "child dead: "+err.Error(), http.StatusServiceUnavailable)
return
}
_, _ = io.WriteString(w, "ok")
}
func makeToolHandler(toolName string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
targetDir := "-"
status := "ok"
defer func() {
log.Printf("%s target_dir=%q duration_ms=%d status=%s",
toolName, targetDir, time.Since(start).Milliseconds(), status)
}()
var args json.RawMessage
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
status = "bad_request"
writeJSON(w, http.StatusBadRequest, map[string]any{
"result": nil,
"error": "invalid JSON body: " + err.Error(),
})
return
}
// Sniff target_dir purely for the access log; pass args through opaque.
var argsMap map[string]any
if json.Unmarshal(args, &argsMap) == nil {
if td, ok := argsMap["target_dir"].(string); ok {
targetDir = td
}
}
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
defer cancel()
callMu.Lock()
resp, err := call(ctx, "tools/call", map[string]any{
"name": toolName,
"arguments": args,
})
callMu.Unlock()
if err != nil {
status = "rpc_error"
writeJSON(w, http.StatusBadGateway, map[string]any{
"result": nil,
"error": err.Error(),
})
return
}
if resp.Error != nil {
status = "mcp_error"
writeJSON(w, http.StatusOK, map[string]any{
"result": nil,
"error": resp.Error.Message,
})
return
}
var ctr callToolResult
if err := json.Unmarshal(resp.Result, &ctr); err != nil {
status = "parse_error"
writeJSON(w, http.StatusOK, map[string]any{
"result": nil,
"error": "parse result: " + err.Error(),
})
return
}
// codecontext only emits text content. Concatenate (single-entry in
// practice, but the schema allows multiple).
var buf []byte
for _, c := range ctr.Content {
if c.Type == "text" {
buf = append(buf, c.Text...)
}
}
text := string(buf)
if ctr.IsError {
status = "tool_error"
writeJSON(w, http.StatusOK, map[string]any{
"result": nil,
"error": text,
})
return
}
writeJSON(w, http.StatusOK, map[string]any{
"result": text,
"error": nil,
})
}
}
// ---- main ----
func main() {
log.SetOutput(os.Stderr)
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
log.Println("boocode-codecontext-shim starting")
if err := startChild(); err != nil {
log.Fatalf("startChild: %v", err)
}
initCtx, initCancel := context.WithTimeout(context.Background(), 30*time.Second)
if err := initializeMCP(initCtx); err != nil {
initCancel()
killChild()
log.Fatalf("initializeMCP: %v", err)
}
initCancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT)
mux := http.NewServeMux()
// Go 1.22+ method-prefix routing. Any non-listed method → 405 automatically.
mux.HandleFunc("GET /health", handleHealth)
mux.HandleFunc("POST /v1/get_codebase_overview", makeToolHandler("get_codebase_overview"))
mux.HandleFunc("POST /v1/get_file_analysis", makeToolHandler("get_file_analysis"))
mux.HandleFunc("POST /v1/get_symbol_info", makeToolHandler("get_symbol_info"))
mux.HandleFunc("POST /v1/search_symbols", makeToolHandler("search_symbols"))
mux.HandleFunc("POST /v1/get_dependencies", makeToolHandler("get_dependencies"))
mux.HandleFunc("POST /v1/watch_changes", makeToolHandler("watch_changes"))
mux.HandleFunc("POST /v1/get_semantic_neighborhoods", makeToolHandler("get_semantic_neighborhoods"))
mux.HandleFunc("POST /v1/get_framework_analysis", makeToolHandler("get_framework_analysis"))
server := &http.Server{
Addr: ":8080",
Handler: mux,
ReadHeaderTimeout: 5 * time.Second,
}
go func() {
log.Println("listening on :8080")
if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
log.Fatalf("ListenAndServe: %v", err)
}
}()
<-sigChan
log.Println("shutdown signal received")
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
_ = server.Shutdown(shutdownCtx)
shutdownCancel()
killChild()
log.Println("exit")
}

View File

@@ -55,6 +55,33 @@ services:
networks: networks:
- boocode_net - boocode_net
# v1.12 Track B: codecontext sidecar. Stdio MCP server wrapped by a small
# HTTP shim (see ./codecontext/). No host port — reached from boocode at
# http://codecontext:8080 over the boocode_net bridge.
#
# Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live
# at /opt/<slug> on the host, not exclusively under /opt/projects. The
# mount must cover anywhere a project.path could resolve to. Read-only
# because codecontext only analyzes — never writes. The model can't
# arbitrarily set target_dir to a sensitive subtree because the B.2
# wrappers validate target_dir against project.path before calling the
# shim, and the shim isn't reachable from outside boocode_net.
codecontext:
build:
context: ./codecontext
container_name: boocode_codecontext
restart: unless-stopped
networks:
- boocode_net
volumes:
- /opt:/opt:ro
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:8080/health || exit 1"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
volumes: volumes:
boocode_pgdata: boocode_pgdata: