diff --git a/codecontext/Dockerfile b/codecontext/Dockerfile new file mode 100644 index 0000000..b61704b --- /dev/null +++ b/codecontext/Dockerfile @@ -0,0 +1,40 @@ +# v1.12 Track B — codecontext sidecar container. +# +# Multi-stage build: golang:1.24-alpine builder produces two binaries +# (codecontext from source + our HTTP shim), then a minimal alpine:3.20 +# runtime holds both. +# +# No upstream Docker image exists for codecontext. We clone the repo +# directly because the module path declared in go.mod +# (github.com/nuthan-ms/codecontext) differs from the GitHub repo URL +# (github.com/nmakod/codecontext) — `go install` against the GitHub path +# wouldn't resolve. The tagged v3.2.1 source tree is the same either way. + +FROM golang:1.24-alpine AS builder +WORKDIR /build + +RUN apk add --no-cache git ca-certificates build-base + +# Build codecontext from the v3.2.1 tag. +# CGO is required: codecontext binds tree-sitter via cgo. +RUN git clone --depth=1 --branch v3.2.1 https://github.com/nmakod/codecontext.git /build/codecontext +WORKDIR /build/codecontext +RUN CGO_ENABLED=1 GOOS=linux go build -o /build/codecontext-bin ./cmd/codecontext + +# Build the shim. Stdlib-only — no go.sum needed. +WORKDIR /build/shim +COPY go.mod ./ +COPY shim.go ./ +RUN CGO_ENABLED=0 GOOS=linux go build -o /build/shim-bin ./ + +# Runtime: alpine matches the build target so codecontext's cgo bindings +# resolve against the same musl libc. +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates +COPY --from=builder /build/codecontext-bin /usr/local/bin/codecontext +COPY --from=builder /build/shim-bin /usr/local/bin/shim + +EXPOSE 8080 +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s \ + CMD wget -qO- http://localhost:8080/health || exit 1 +ENTRYPOINT ["/usr/local/bin/shim"] diff --git a/codecontext/go.mod b/codecontext/go.mod new file mode 100644 index 0000000..9a38632 --- /dev/null +++ b/codecontext/go.mod @@ -0,0 +1,3 @@ +module github.com/indifferentketchup/boocode-codecontext-shim + +go 1.24 diff --git a/codecontext/shim.go b/codecontext/shim.go new file mode 100644 index 0000000..891c364 --- /dev/null +++ b/codecontext/shim.go @@ -0,0 +1,442 @@ +// boocode-codecontext-shim — wraps codecontext's stdio MCP server with an +// HTTP/JSON facade so the BooCode Node server can call codecontext over the +// container network instead of speaking MCP directly. One process per +// container, holds a single codecontext child via os/exec; concurrent HTTP +// requests are serialized onto the child because codecontext's internal +// CodeContextMCPServer.graph swaps per target_dir (see recon report +// 2026-05-21). +// +// MCP framing is newline-delimited JSON (NDJSON), not LSP-style +// Content-Length — per the MCP stdio transport spec: +// https://spec.modelcontextprotocol.io/specification/server/transports +// +// No third-party deps. Stdlib only. + +package main + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "os/signal" + "sync" + "sync/atomic" + "syscall" + "time" +) + +// ---- JSON-RPC types ---- + +// rpcMessage is shared by request, response, and notification. Notifications +// omit ID; requests omit Result/Error; responses omit Method/Params. omitempty +// + the zero int 0 sentinel works for ID because we never SEND id=0 +// (nextID starts at 0 and atomic.AddInt32 returns 1 on the first call). +type rpcMessage struct { + JSONRPC string `json:"jsonrpc"` + ID int `json:"id,omitempty"` + Method string `json:"method,omitempty"` + Params json.RawMessage `json:"params,omitempty"` + Result json.RawMessage `json:"result,omitempty"` + Error *rpcError `json:"error,omitempty"` +} + +type rpcError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// callToolResult is the MCP tools/call response shape. codecontext returns +// markdown wrapped in a TextContent entry. +type callToolResult struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + IsError bool `json:"isError,omitempty"` +} + +// ---- Globals ---- + +var ( + child *exec.Cmd + childStdin io.WriteCloser + childStdout *bufio.Reader + + // Serialize tools/call so codecontext's per-call graph rebuild doesn't + // race itself when concurrent HTTP requests target different projects. + // Initialize/notifications/initialized run before HTTP starts so they + // don't need this lock. + callMu sync.Mutex + + pendingMu sync.Mutex + pending = make(map[int]chan *rpcMessage) + + nextID int32 +) + +// ---- MCP framing (NDJSON) ---- + +func writeMessage(w io.Writer, msg *rpcMessage) error { + body, err := json.Marshal(msg) + if err != nil { + return err + } + // Single write keeps the message atomic across concurrent writers. + // (We don't actually have concurrent writers here — callMu serializes — + // but the +'\n' append needs to be in one syscall regardless.) + _, err = w.Write(append(body, '\n')) + return err +} + +func readerLoop(r *bufio.Reader) { + for { + line, err := r.ReadBytes('\n') + if err != nil { + if errors.Is(err, io.EOF) { + log.Printf("reader: EOF (child closed stdout)") + } else { + log.Printf("reader: %v", err) + } + return + } + var msg rpcMessage + if err := json.Unmarshal(line, &msg); err != nil { + log.Printf("reader: malformed JSON: %v (line=%q)", err, line) + continue + } + if msg.ID == 0 { + // Server-initiated notification or progress update; nothing to + // dispatch. codecontext doesn't currently send these but the + // MCP spec allows them. + continue + } + pendingMu.Lock() + ch, ok := pending[msg.ID] + if ok { + delete(pending, msg.ID) + } + pendingMu.Unlock() + if ok { + ch <- &msg + } + } +} + +func call(ctx context.Context, method string, params any) (*rpcMessage, error) { + id := int(atomic.AddInt32(&nextID, 1)) + ch := make(chan *rpcMessage, 1) + pendingMu.Lock() + pending[id] = ch + pendingMu.Unlock() + + paramsJSON, err := json.Marshal(params) + if err != nil { + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, err + } + + msg := &rpcMessage{ + JSONRPC: "2.0", + ID: id, + Method: method, + Params: paramsJSON, + } + + if err := writeMessage(childStdin, msg); err != nil { + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, fmt.Errorf("write: %w", err) + } + + select { + case resp := <-ch: + return resp, nil + case <-ctx.Done(): + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, ctx.Err() + } +} + +func notify(method string, params any) error { + paramsJSON, err := json.Marshal(params) + if err != nil { + return err + } + msg := &rpcMessage{ + JSONRPC: "2.0", + Method: method, + Params: paramsJSON, + } + return writeMessage(childStdin, msg) +} + +// ---- Child lifecycle ---- + +func startChild() error { + // `codecontext mcp` with --watch=true (the default) keeps fsnotify + // running on the indexed directory; the per-call target_dir swap + // invalidates and re-indexes on demand. `--target=/opt/projects` is the + // initial scan target — codecontext rebuilds the graph against whatever + // target_dir each call carries, so this is just a valid bootstrap path + // (the default "." is the alpine root and trips on transient /proc fds). + child = exec.Command("codecontext", "mcp", "--target=/opt/projects", "--watch=true") + var err error + childStdin, err = child.StdinPipe() + if err != nil { + return fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := child.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + childStdout = bufio.NewReader(stdout) + // codecontext's own log.SetOutput(os.Stderr) keeps its diagnostic noise + // off the JSON-RPC channel; we just pass-through to our own stderr. + child.Stderr = os.Stderr + + if err := child.Start(); err != nil { + return fmt.Errorf("start: %w", err) + } + log.Printf("started codecontext pid=%d", child.Process.Pid) + + go readerLoop(childStdout) + + // Supervise the child. When codecontext exits (crash, OOM, externally + // pkill'd), child.Wait() returns and we tear the shim down so the + // container's `restart: unless-stopped` policy recreates us with a + // fresh child. Without this goroutine the dead child becomes a zombie + // (Signal(0) on a zombie returns nil, so the health endpoint would lie) + // and HTTP requests would queue forever waiting on responses that will + // never come. Discovered during B.1 kill-restart testing. + go func() { + err := child.Wait() + log.Printf("codecontext exited: %v — shim shutting down", err) + os.Exit(1) + }() + return nil +} + +func killChild() { + if child == nil || child.Process == nil { + return + } + log.Printf("killing codecontext pid=%d", child.Process.Pid) + _ = child.Process.Signal(syscall.SIGTERM) + done := make(chan error, 1) + go func() { done <- child.Wait() }() + select { + case <-done: + log.Printf("codecontext exited") + case <-time.After(5 * time.Second): + log.Printf("codecontext did not exit on SIGTERM; sending SIGKILL") + _ = child.Process.Kill() + <-done + } +} + +// MCP handshake: client sends initialize, server replies, client follows +// with the notifications/initialized notification. After that, tools/call +// is accepted. +func initializeMCP(ctx context.Context) error { + initParams := map[string]any{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]any{}, + "clientInfo": map[string]any{ + "name": "boocode-codecontext-shim", + "version": "0.1.0", + }, + } + resp, err := call(ctx, "initialize", initParams) + if err != nil { + return fmt.Errorf("initialize: %w", err) + } + if resp.Error != nil { + return fmt.Errorf("initialize error %d: %s", resp.Error.Code, resp.Error.Message) + } + if err := notify("notifications/initialized", map[string]any{}); err != nil { + return fmt.Errorf("notifications/initialized: %w", err) + } + log.Printf("MCP handshake complete (server result=%s)", string(resp.Result)) + return nil +} + +// ---- HTTP ---- + +func writeJSON(w http.ResponseWriter, status int, body any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(body) +} + +func handleHealth(w http.ResponseWriter, r *http.Request) { + if child == nil || child.Process == nil { + http.Error(w, "no child", http.StatusServiceUnavailable) + return + } + // Signal 0 doesn't actually deliver — it just returns an error if the + // process is gone. Cheaper than parsing /proc. + if err := child.Process.Signal(syscall.Signal(0)); err != nil { + http.Error(w, "child dead: "+err.Error(), http.StatusServiceUnavailable) + return + } + _, _ = io.WriteString(w, "ok") +} + +func makeToolHandler(toolName string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + targetDir := "-" + status := "ok" + defer func() { + log.Printf("%s target_dir=%q duration_ms=%d status=%s", + toolName, targetDir, time.Since(start).Milliseconds(), status) + }() + + var args json.RawMessage + if err := json.NewDecoder(r.Body).Decode(&args); err != nil { + status = "bad_request" + writeJSON(w, http.StatusBadRequest, map[string]any{ + "result": nil, + "error": "invalid JSON body: " + err.Error(), + }) + return + } + + // Sniff target_dir purely for the access log; pass args through opaque. + var argsMap map[string]any + if json.Unmarshal(args, &argsMap) == nil { + if td, ok := argsMap["target_dir"].(string); ok { + targetDir = td + } + } + + ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) + defer cancel() + + callMu.Lock() + resp, err := call(ctx, "tools/call", map[string]any{ + "name": toolName, + "arguments": args, + }) + callMu.Unlock() + + if err != nil { + status = "rpc_error" + writeJSON(w, http.StatusBadGateway, map[string]any{ + "result": nil, + "error": err.Error(), + }) + return + } + if resp.Error != nil { + status = "mcp_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": resp.Error.Message, + }) + return + } + + var ctr callToolResult + if err := json.Unmarshal(resp.Result, &ctr); err != nil { + status = "parse_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": "parse result: " + err.Error(), + }) + return + } + + // codecontext only emits text content. Concatenate (single-entry in + // practice, but the schema allows multiple). + var buf []byte + for _, c := range ctr.Content { + if c.Type == "text" { + buf = append(buf, c.Text...) + } + } + text := string(buf) + + if ctr.IsError { + status = "tool_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": text, + }) + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "result": text, + "error": nil, + }) + } +} + +// ---- main ---- + +func main() { + log.SetOutput(os.Stderr) + log.SetFlags(log.LstdFlags | log.Lmicroseconds) + log.Println("boocode-codecontext-shim starting") + + if err := startChild(); err != nil { + log.Fatalf("startChild: %v", err) + } + + initCtx, initCancel := context.WithTimeout(context.Background(), 30*time.Second) + if err := initializeMCP(initCtx); err != nil { + initCancel() + killChild() + log.Fatalf("initializeMCP: %v", err) + } + initCancel() + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) + + mux := http.NewServeMux() + // Go 1.22+ method-prefix routing. Any non-listed method → 405 automatically. + mux.HandleFunc("GET /health", handleHealth) + mux.HandleFunc("POST /v1/get_codebase_overview", makeToolHandler("get_codebase_overview")) + mux.HandleFunc("POST /v1/get_file_analysis", makeToolHandler("get_file_analysis")) + mux.HandleFunc("POST /v1/get_symbol_info", makeToolHandler("get_symbol_info")) + mux.HandleFunc("POST /v1/search_symbols", makeToolHandler("search_symbols")) + mux.HandleFunc("POST /v1/get_dependencies", makeToolHandler("get_dependencies")) + mux.HandleFunc("POST /v1/watch_changes", makeToolHandler("watch_changes")) + mux.HandleFunc("POST /v1/get_semantic_neighborhoods", makeToolHandler("get_semantic_neighborhoods")) + mux.HandleFunc("POST /v1/get_framework_analysis", makeToolHandler("get_framework_analysis")) + + server := &http.Server{ + Addr: ":8080", + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + } + + go func() { + log.Println("listening on :8080") + if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + log.Fatalf("ListenAndServe: %v", err) + } + }() + + <-sigChan + log.Println("shutdown signal received") + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second) + _ = server.Shutdown(shutdownCtx) + shutdownCancel() + killChild() + log.Println("exit") +} diff --git a/docker-compose.yml b/docker-compose.yml index a5253f6..073ccac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -55,6 +55,33 @@ services: networks: - boocode_net + # v1.12 Track B: codecontext sidecar. Stdio MCP server wrapped by a small + # HTTP shim (see ./codecontext/). No host port — reached from boocode at + # http://codecontext:8080 over the boocode_net bridge. + # + # Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live + # at /opt/ on the host, not exclusively under /opt/projects. The + # mount must cover anywhere a project.path could resolve to. Read-only + # because codecontext only analyzes — never writes. The model can't + # arbitrarily set target_dir to a sensitive subtree because the B.2 + # wrappers validate target_dir against project.path before calling the + # shim, and the shim isn't reachable from outside boocode_net. + codecontext: + build: + context: ./codecontext + container_name: boocode_codecontext + restart: unless-stopped + networks: + - boocode_net + volumes: + - /opt:/opt:ro + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:8080/health || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s + volumes: boocode_pgdata: