From a0dc392ef48063e1fb0b7d2e02a2009799454f91 Mon Sep 17 00:00:00 2001 From: Ameno Osman Date: Sun, 7 Jun 2026 14:40:44 -0700 Subject: [PATCH 1/4] fix(conncontroller): skip connserver dev-log lines when reading wsh version In --dev mode connserver logs to stderr, which is merged into the same pipe the conn controller parses for the 'wsh version' line. Those [PID:...]-prefixed log lines can race ahead of the version line over SSH (stdout and stderr are delivered on independent channels), causing 'unexpected version format' and a failed connection. Skip dev-log lines until the real version line is read. Co-Authored-By: Claude Opus 4.8 --- pkg/remote/conncontroller/conncontroller.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pkg/remote/conncontroller/conncontroller.go b/pkg/remote/conncontroller/conncontroller.go index a24a789009..1ca030efef 100644 --- a/pkg/remote/conncontroller/conncontroller.go +++ b/pkg/remote/conncontroller/conncontroller.go @@ -487,10 +487,23 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR return false, "", "", fmt.Errorf("unable to start conn controller command: %w", err) } linesChan := utilfn.StreamToLinesChan(pipeRead) - versionLine, err := utilfn.ReadLineWithTimeout(linesChan, utilfn.TimeoutFromContext(ctx, 30*time.Second)) - if err != nil { - sshSession.Close() - return false, "", "", fmt.Errorf("error reading wsh version: %w", err) + // In --dev mode connserver logs to stderr, which is merged into this same + // pipe. Those lines (prefixed "[PID:...]") can race ahead of the "wsh + // version" stdout line, so skip them until we reach the real version line. + var versionLine string + versionDeadline := time.Now().Add(utilfn.TimeoutFromContext(ctx, 30*time.Second)) + for { + line, err := utilfn.ReadLineWithTimeout(linesChan, time.Until(versionDeadline)) + if err != nil { + sshSession.Close() + return false, "", "", fmt.Errorf("error reading wsh version: %w", err) + } + if strings.HasPrefix(strings.TrimSpace(line), "[PID:") { + conn.Infof(ctx, "skipping connserver dev-log line before version: %s\n", strings.TrimSpace(line)) + continue + } + versionLine = line + break } conn.Infof(ctx, "actual connnserverversion: %q\n", versionLine) conn.Infof(ctx, "got connserver version: %s\n", strings.TrimSpace(versionLine)) From 4d02c7cadd7c2069964e6408b834ded4d81bb1f7 Mon Sep 17 00:00:00 2001 From: Ameno Osman Date: Sun, 7 Jun 2026 14:42:01 -0700 Subject: [PATCH 2/4] feat(conncontroller): stdio (--router) transport for SSH connserver Adds a stdio upstream transport for the SSH conn controller, mirroring the mechanism the WSL path already uses (wshutil.HandleStdIOClient): the connserver RPC link is carried over the SSH session's stdin/stdout instead of a forwarded socket, so no listening socket/port is opened on the remote and the SSH channel itself is the auth boundary (no JWT exchange). A relay goroutine feeds the ConnMonitor on each line so health does not flip to 'degraded'. Selected via the RouterTransport setting (default stdio); the domain-socket path remains for the non-stdio case. Co-Authored-By: Claude Opus 4.8 --- pkg/remote/conncontroller/conncontroller.go | 115 ++++++++++++++++---- 1 file changed, 95 insertions(+), 20 deletions(-) diff --git a/pkg/remote/conncontroller/conncontroller.go b/pkg/remote/conncontroller/conncontroller.go index 1ca030efef..280bac0958 100644 --- a/pkg/remote/conncontroller/conncontroller.go +++ b/pkg/remote/conncontroller/conncontroller.go @@ -100,6 +100,32 @@ var ConnServerCmdTemplate = strings.TrimSpace( "exec %s connserver --conn %s %s %s", }, "\n")) +// RouterTransportMode selects how connserver's upstream RPC link is carried. +type RouterTransportMode string + +const ( + // RouterTransportStdio carries the upstream RPC over the SSH session's + // stdin/stdout (the "--router" flag). No listening socket or port is + // opened on the remote for the upstream link, so the SSH channel itself + // is the authentication boundary. This matches the WSL implementation and + // avoids the sshd Unix-socket-forward permission bug entirely. + RouterTransportStdio RouterTransportMode = "stdio" + // RouterTransportTCP carries the upstream RPC over a reverse-forwarded + // loopback TCP port (the "--router-tcp" flag). Kept as a fallback. + RouterTransportTCP RouterTransportMode = "tcp" +) + +// RouterTransport selects the upstream transport for SSH connserver router +// mode. Defaults to stdio (most secure); set to RouterTransportTCP to fall +// back to the reverse-forwarded loopback port. Override at runtime with +// WAVETERM_ROUTER_TRANSPORT=tcp (or =stdio) for A/B comparison. +var RouterTransport = func() RouterTransportMode { + if strings.ToLower(os.Getenv("WAVETERM_ROUTER_TRANSPORT")) == "tcp" { + return RouterTransportTCP + } + return RouterTransportStdio +}() + func IsLocalConnName(connName string) bool { return strings.HasPrefix(connName, "local:") || connName == "local" || connName == "" } @@ -437,6 +463,10 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR if !allowed { return false, "", "", fmt.Errorf("cannot start conn server for %q when status is %q", conn.GetName(), conn.GetStatus()) } + // stdio router mode carries the upstream RPC over the SSH session's + // stdin/stdout. The SSH channel is the auth boundary, so connserver does + // not ask for (and we do not send) a JWT in this mode. + isStdio := useRouterMode && RouterTransport == RouterTransportStdio client := conn.GetClient() wshPath := conn.getWshPath() sockName := conn.GetDomainSocketName() @@ -454,9 +484,13 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR Conn: conn.GetName(), } } - jwtToken, err := wshutil.MakeClientJWTToken(rpcCtx) - if err != nil { - return false, "", "", fmt.Errorf("unable to create jwt token for conn controller: %w", err) + var jwtToken string + if !isStdio { + var err error + jwtToken, err = wshutil.MakeClientJWTToken(rpcCtx) + if err != nil { + return false, "", "", fmt.Errorf("unable to create jwt token for conn controller: %w", err) + } } conn.Infof(ctx, "SSH-NEWSESSION (StartConnServer)\n") sshSession, err := client.NewSession() @@ -476,7 +510,11 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR } routerFlag := "" if useRouterMode { - routerFlag = "--router-domainsocket" + if isStdio { + routerFlag = "--router" + } else { + routerFlag = "--router-tcp" + } } cmdStr := fmt.Sprintf(ConnServerCmdTemplate, wshPath, wshPath, shellutil.HardQuote(conn.GetName()), devFlag, routerFlag) log.Printf("starting conn controller: %q\n", cmdStr) @@ -521,19 +559,24 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR sshSession.Close() return true, clientVersion, osArchStr, nil } - jwtLine, err := utilfn.ReadLineWithTimeout(linesChan, 3*time.Second) - if err != nil { - sshSession.Close() - return false, clientVersion, "", fmt.Errorf("error reading jwt status line: %w", err) - } - conn.Infof(ctx, "got jwt status line: %s\n", jwtLine) - if strings.TrimSpace(jwtLine) == wavebase.NeedJwtConst { - // write the jwt - conn.Infof(ctx, "writing jwt token to connserver\n") - _, err = fmt.Fprintf(stdinPipe, "%s\n", jwtToken) + // stdio router mode does not use a JWT exchange: serverRunRouter() switches + // straight to the packet protocol on stdin/stdout after the version line. + // Sending a JWT line here would corrupt that stream, so skip it. + if !isStdio { + jwtLine, err := utilfn.ReadLineWithTimeout(linesChan, 3*time.Second) if err != nil { sshSession.Close() - return false, clientVersion, "", fmt.Errorf("failed to write JWT token: %w", err) + return false, clientVersion, "", fmt.Errorf("error reading jwt status line: %w", err) + } + conn.Infof(ctx, "got jwt status line: %s\n", jwtLine) + if strings.TrimSpace(jwtLine) == wavebase.NeedJwtConst { + // write the jwt + conn.Infof(ctx, "writing jwt token to connserver\n") + _, err = fmt.Fprintf(stdinPipe, "%s\n", jwtToken) + if err != nil { + sshSession.Close() + return false, clientVersion, "", fmt.Errorf("failed to write JWT token: %w", err) + } } } conn.WithLock(func() { @@ -563,6 +606,33 @@ func (conn *SSHConn) StartConnServer(ctx context.Context, afterUpdate bool, useR defer func() { panichandler.PanicHandler("conncontroller:sshSession-output", recover()) }() + if isStdio { + // stdio router mode: the remaining stdout is the upstream RPC + // packet stream. Register it (and stdin) with the default router, + // exactly like the WSL connserver path. + logName := fmt.Sprintf("conncontroller:%s", conn.GetName()) + // HandleStdIOClient has no readCallback hook, so relay the lines + // through a goroutine that feeds the health monitor on each one -- + // mirroring RunWshRpcOverListener's callback in the socket path. + // Without this the monitor never arms and flips to "degraded". + stdioCh := linesChan + if monitor := conn.GetMonitor(); monitor != nil { + relayCh := make(chan utilfn.LineOutput, wshutil.DefaultInputChSize) + go func() { + defer func() { + panichandler.PanicHandler("conncontroller:stdioActivityRelay", recover()) + }() + defer close(relayCh) + for lo := range linesChan { + monitor.UpdateLastActivityTime() + relayCh <- lo + } + }() + stdioCh = relayCh + } + wshutil.HandleStdIOClient(logName, stdioCh, stdinPipe) + return + } for output := range linesChan { if output.Error != nil { log.Printf("[conncontroller:%s:output] error: %v\n", conn.GetName(), output.Error) @@ -895,11 +965,16 @@ func (conn *SSHConn) tryEnableWsh(ctx context.Context, clientDisplayName string) return WshCheckResult{NoWshReason: "user selected not to install wsh extensions", NoWshCode: NoWshCode_UserDeclined} } } - err := conn.OpenDomainSocketListener(ctx) - if err != nil { - conn.Infof(ctx, "ERROR opening domain socket listener: %v\n", err) - err = fmt.Errorf("error opening domain socket listener: %w", err) - return WshCheckResult{NoWshReason: "error opening domain socket", NoWshCode: NoWshCode_DomainSocketError, WshError: err} + // stdio router mode carries the upstream RPC over the SSH session's + // stdin/stdout, so no reverse-forwarded listener is needed. Only the TCP + // fallback requires the forwarded loopback port. + if RouterTransport == RouterTransportTCP { + err := conn.OpenDomainSocketListener(ctx) + if err != nil { + conn.Infof(ctx, "ERROR opening domain socket listener: %v\n", err) + err = fmt.Errorf("error opening domain socket listener: %w", err) + return WshCheckResult{NoWshReason: "error opening domain socket", NoWshCode: NoWshCode_DomainSocketError, WshError: err} + } } needsInstall, clientVersion, osArchStr, err := conn.StartConnServer(ctx, false, true) if err != nil { From be533dbba2d22a586ad1d94b2940c1db9978d245 Mon Sep 17 00:00:00 2001 From: Ameno Osman Date: Sun, 7 Jun 2026 14:42:22 -0700 Subject: [PATCH 3/4] feat(connserver): --router-tcp loopback fallback transport (local) Local-only escape hatch: carry the connserver upstream RPC over a reverse-forwarded loopback TCP port instead of a Unix domain socket, selected with WAVETERM_ROUTER_TRANSPORT=tcp. Works around SSH servers that create the forwarded Unix socket with the wrong owner (e.g. Tailscale SSH creates it root:root 0600). stdio is the default and preferred transport; this is kept only as a fallback and is not intended for upstream. Co-Authored-By: Claude Opus 4.8 --- cmd/wsh/cmd/wshcmd-connserver.go | 124 ++++++++++++++++++++ pkg/remote/conncontroller/conncontroller.go | 17 ++- 2 files changed, 132 insertions(+), 9 deletions(-) diff --git a/cmd/wsh/cmd/wshcmd-connserver.go b/cmd/wsh/cmd/wshcmd-connserver.go index 1f892a24ce..76d7bf0493 100644 --- a/cmd/wsh/cmd/wshcmd-connserver.go +++ b/cmd/wsh/cmd/wshcmd-connserver.go @@ -46,6 +46,7 @@ const ( var connServerRouter bool var connServerRouterDomainSocket bool +var connServerRouterTCP bool var connServerConnName string var connServerDev bool var ConnServerWshRouter *wshutil.WshRouter @@ -54,6 +55,7 @@ var connServerInitialEnv map[string]string func init() { serverCmd.Flags().BoolVar(&connServerRouter, "router", false, "run in local router mode (stdio upstream)") serverCmd.Flags().BoolVar(&connServerRouterDomainSocket, "router-domainsocket", false, "run in local router mode (domain socket upstream)") + serverCmd.Flags().BoolVar(&connServerRouterTCP, "router-tcp", false, "run in local router mode (tcp upstream)") serverCmd.Flags().StringVar(&connServerConnName, "conn", "", "connection name") serverCmd.Flags().BoolVar(&connServerDev, "dev", false, "enable dev mode with file logging and PID in logs") rootCmd.AddCommand(serverCmd) @@ -394,6 +396,114 @@ func serverRunRouterDomainSocket(jwtToken string) error { select {} } +func serverRunRouterTCP(jwtToken string) error { + log.Printf("starting connserver router (tcp upstream)") + + // extract tcp address from JWT token (unverified - we're on the client side) + tcpAddr, err := wshutil.ExtractUnverifiedSocketName(jwtToken) + if err != nil { + return fmt.Errorf("error extracting tcp address from JWT: %v", err) + } + + // connect to the forwarded tcp port + conn, err := net.Dial("tcp", tcpAddr) + if err != nil { + return fmt.Errorf("error connecting to tcp upstream %s: %v", tcpAddr, err) + } + + // create router + router := wshutil.NewWshRouter() + ConnServerWshRouter = router + + // create proxy for the tcp connection + upstreamProxy := wshutil.MakeRpcProxy("connserver-upstream") + + // goroutine to write to the tcp connection + go func() { + defer func() { + panichandler.PanicHandler("serverRunRouterTCP:WriteLoop", recover()) + }() + writeErr := wshutil.AdaptOutputChToStream(upstreamProxy.ToRemoteCh, conn) + if writeErr != nil { + log.Printf("error writing to upstream tcp connection: %v\n", writeErr) + } + }() + + // goroutine to read from the tcp connection + go func() { + defer func() { + panichandler.PanicHandler("serverRunRouterTCP:ReadLoop", recover()) + }() + defer func() { + log.Printf("upstream tcp connection closed, shutting down") + wshutil.DoShutdown("", 0, true) + }() + wshutil.AdaptStreamToMsgCh(conn, upstreamProxy.FromRemoteCh, nil) + }() + + // register the tcp connection as upstream + router.RegisterUpstream(upstreamProxy) + + // use the router's control RPC to authenticate with upstream + controlRpc := router.GetControlRpc() + + // authenticate with the upstream router using the JWT + _, err = wshclient.AuthenticateCommand(controlRpc, jwtToken, &wshrpc.RpcOpts{Route: wshutil.ControlRootRoute}) + if err != nil { + return fmt.Errorf("error authenticating with upstream: %v", err) + } + log.Printf("authenticated with upstream router") + + // fetch and set JWT public key + log.Printf("trying to get JWT public key") + jwtPublicKeyB64, err := wshclient.GetJwtPublicKeyCommand(controlRpc, nil) + if err != nil { + return fmt.Errorf("error getting jwt public key: %v", err) + } + jwtPublicKeyBytes, err := base64.StdEncoding.DecodeString(jwtPublicKeyB64) + if err != nil { + return fmt.Errorf("error decoding jwt public key: %v", err) + } + err = wavejwt.SetPublicKey(jwtPublicKeyBytes) + if err != nil { + return fmt.Errorf("error setting jwt public key: %v", err) + } + log.Printf("got JWT public key") + + // now setup the connserver rpc client + client, bareRouteId, err := setupConnServerRpcClientWithRouter(router, tcpAddr) + if err != nil { + return fmt.Errorf("error setting up connserver rpc client: %v", err) + } + wshfs.RpcClient = client + wshfs.RpcClientRouteId = bareRouteId + + // set up the local domain socket listener for local wsh commands + unixListener, err := MakeRemoteUnixListener() + if err != nil { + return fmt.Errorf("cannot create unix listener: %v", err) + } + log.Printf("unix listener started") + go func() { + defer func() { + panichandler.PanicHandler("serverRunRouterTCP:runListener", recover()) + }() + runListener(unixListener, router) + }() + + // run the sysinfo loop + go func() { + defer func() { + panichandler.PanicHandler("serverRunRouterTCP:RunSysInfoLoop", recover()) + }() + wshremote.RunSysInfoLoop(client, connServerConnName) + }() + startJobLogCleanup() + + log.Printf("running server (router-tcp mode), successfully started") + select {} +} + func serverRunNormal(jwtToken string) error { sockName, err := wshutil.ExtractUnverifiedSocketName(jwtToken) if err != nil { @@ -491,6 +601,20 @@ func serverRun(cmd *cobra.Command, args []string) error { } return err } + if connServerRouterTCP { + jwtToken, err := askForJwtToken() + if err != nil { + if logFile != nil { + fmt.Fprintf(logFile, "askForJwtToken error: %v\n", err) + } + return err + } + err = serverRunRouterTCP(jwtToken) + if err != nil && logFile != nil { + fmt.Fprintf(logFile, "serverRunRouterTCP error: %v\n", err) + } + return err + } jwtToken, err := askForJwtToken() if err != nil { if logFile != nil { diff --git a/pkg/remote/conncontroller/conncontroller.go b/pkg/remote/conncontroller/conncontroller.go index 280bac0958..be8696099f 100644 --- a/pkg/remote/conncontroller/conncontroller.go +++ b/pkg/remote/conncontroller/conncontroller.go @@ -302,21 +302,20 @@ func (conn *SSHConn) OpenDomainSocketListener(ctx context.Context) error { return fmt.Errorf("cannot open domain socket for %q when status is %q", conn.GetName(), conn.GetStatus()) } client := conn.GetClient() - randStr, err := utilfn.RandomHexString(16) // 64-bits of randomness + // Use TCP forwarding instead of Unix socket forwarding. + // sshd creates Unix socket forwards as root:root 0600 (pre-privilege-separation), + // making them inaccessible to the connecting user. TCP listeners are kernel-managed + // with no file permissions, so this avoids the permission denied error entirely. + listener, err := client.Listen("tcp", "127.0.0.1:0") if err != nil { - return fmt.Errorf("error generating random string: %w", err) - } - sockName := fmt.Sprintf("/tmp/waveterm-%s.sock", randStr) - conn.Infof(ctx, "generated domain socket name %s\n", sockName) - listener, err := client.ListenUnix(sockName) - if err != nil { - return fmt.Errorf("unable to request connection domain socket: %v", err) + return fmt.Errorf("unable to request tcp connection forward: %v", err) } + sockName := listener.Addr().String() // e.g. "127.0.0.1:54321" conn.WithLock(func() { conn.DomainSockName = sockName conn.DomainSockListener = listener }) - conn.Infof(ctx, "successfully connected domain socket\n") + conn.Infof(ctx, "successfully connected tcp forward on %s\n", sockName) go func() { defer func() { panichandler.PanicHandler("conncontroller:OpenDomainSocketListener", recover()) From 26f1f8c91f1762c305467910d93cc191312ce225 Mon Sep 17 00:00:00 2001 From: Ameno Osman Date: Mon, 8 Jun 2026 09:18:26 -0700 Subject: [PATCH 4/4] =?UTF-8?q?feat(ai):=20companion=20mode=20=E2=80=94=20?= =?UTF-8?q?term=5Fsend=5Fcommand=20tool=20+=20honest=20system=20prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add term_send_command tool that sends a shell command to an open terminal widget via ControllerInputCommand (the same PTY write path used by keystrokes). Requires user approval before execution. Optionally waits 2s and returns the first 50 lines of scrollback so the AI can report the result inline. Also enable the existing (commented-out) term_command_output tool which returns the output of the last completed command when shell integration is active. Replace the system prompt language that told the model it "cannot execute shell commands" (factually wrong with the new tool registered) with accurate capability description: use term_send_command when a terminal is open; ask the user to open one if not. Co-Authored-By: Claude Sonnet 4.6 --- pkg/aiusechat/tools.go | 3 +- pkg/aiusechat/tools_term.go | 115 +++++++++++++++++++++++++++++++ pkg/aiusechat/usechat-prompts.go | 5 +- 3 files changed, 119 insertions(+), 4 deletions(-) diff --git a/pkg/aiusechat/tools.go b/pkg/aiusechat/tools.go index 550fec95cb..5b2b46a42d 100644 --- a/pkg/aiusechat/tools.go +++ b/pkg/aiusechat/tools.go @@ -190,7 +190,8 @@ func GenerateTabStateAndTools(ctx context.Context, tabid string, widgetAccess bo } if viewTypes["term"] { tools = append(tools, GetTermGetScrollbackToolDefinition(tabid)) - // tools = append(tools, GetTermCommandOutputToolDefinition(tabid)) + tools = append(tools, GetTermCommandOutputToolDefinition(tabid)) + tools = append(tools, GetTermSendCommandToolDefinition(tabid)) } if viewTypes["web"] { tools = append(tools, GetWebNavigateToolDefinition(tabid)) diff --git a/pkg/aiusechat/tools_term.go b/pkg/aiusechat/tools_term.go index 7d0ad90507..074d3f9266 100644 --- a/pkg/aiusechat/tools_term.go +++ b/pkg/aiusechat/tools_term.go @@ -5,6 +5,7 @@ package aiusechat import ( "context" + "encoding/base64" "encoding/json" "fmt" "strings" @@ -241,6 +242,120 @@ func parseTermCommandOutputInput(input any) (*TermCommandOutputToolInput, error) return result, nil } +type TermSendCommandToolInput struct { + WidgetId string `json:"widget_id"` + Command string `json:"command"` + WaitForOutput bool `json:"wait_for_output,omitempty"` +} + +func parseTermSendCommandInput(input any) (*TermSendCommandToolInput, error) { + result := &TermSendCommandToolInput{} + if input == nil { + return nil, fmt.Errorf("widget_id and command are required") + } + inputBytes, err := json.Marshal(input) + if err != nil { + return nil, fmt.Errorf("failed to marshal input: %w", err) + } + if err := json.Unmarshal(inputBytes, result); err != nil { + return nil, fmt.Errorf("failed to unmarshal input: %w", err) + } + if result.WidgetId == "" { + return nil, fmt.Errorf("widget_id is required") + } + if result.Command == "" { + return nil, fmt.Errorf("command is required") + } + return result, nil +} + +func GetTermSendCommandToolDefinition(tabId string) uctypes.ToolDefinition { + return uctypes.ToolDefinition{ + Name: "term_send_command", + DisplayName: "Run Command in Terminal", + Description: "Execute a shell command in an open terminal widget. Sends the command text followed by Enter. If wait_for_output is true, returns the terminal scrollback after a short delay so you can see the result. Requires user approval before execution.", + ToolLogName: "term:sendcommand", + InputSchema: map[string]any{ + "type": "object", + "properties": map[string]any{ + "widget_id": map[string]any{ + "type": "string", + "description": "8-character widget ID of the terminal widget to run the command in", + }, + "command": map[string]any{ + "type": "string", + "description": "The shell command to execute", + }, + "wait_for_output": map[string]any{ + "type": "boolean", + "description": "If true, wait briefly and return terminal output after the command runs (default: true)", + }, + }, + "required": []string{"widget_id", "command"}, + "additionalProperties": false, + }, + ToolCallDesc: func(input any, output any, toolUseData *uctypes.UIMessageDataToolUse) string { + parsed, err := parseTermSendCommandInput(input) + if err != nil { + return fmt.Sprintf("error parsing input: %v", err) + } + return fmt.Sprintf("running in terminal %s: %s", parsed.WidgetId, parsed.Command) + }, + ToolApproval: func(input any) string { + return uctypes.ApprovalNeedsApproval + }, + ToolAnyCallback: func(input any, toolUseData *uctypes.UIMessageDataToolUse) (any, error) { + parsed, err := parseTermSendCommandInput(input) + if err != nil { + return nil, err + } + + ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelFn() + + fullBlockId, err := wcore.ResolveBlockIdFromPrefix(ctx, tabId, parsed.WidgetId) + if err != nil { + return nil, fmt.Errorf("terminal widget %q not found: %w", parsed.WidgetId, err) + } + + inputBytes := []byte(parsed.Command + "\r") + inputData64 := base64.StdEncoding.EncodeToString(inputBytes) + + rpcClient := wshclient.GetBareRpcClient() + err = wshclient.ControllerInputCommand( + rpcClient, + wshrpc.CommandBlockInputData{ + BlockId: fullBlockId, + InputData64: inputData64, + }, + &wshrpc.RpcOpts{}, + ) + if err != nil { + return nil, fmt.Errorf("failed to send command to terminal: %w", err) + } + + waitForOutput := parsed.WaitForOutput || true + if waitForOutput { + time.Sleep(2 * time.Second) + output, err := getTermScrollbackOutput( + tabId, + parsed.WidgetId, + wshrpc.CommandTermGetScrollbackLinesData{ + LineStart: 0, + LineEnd: 50, + }, + ) + if err != nil { + return map[string]any{"sent": true, "note": "command sent; could not read output"}, nil + } + return map[string]any{"sent": true, "output": output}, nil + } + + return map[string]any{"sent": true}, nil + }, + } +} + func GetTermCommandOutputToolDefinition(tabId string) uctypes.ToolDefinition { return uctypes.ToolDefinition{ Name: "term_command_output", diff --git a/pkg/aiusechat/usechat-prompts.go b/pkg/aiusechat/usechat-prompts.go index 2d479c5246..97c20056b7 100644 --- a/pkg/aiusechat/usechat-prompts.go +++ b/pkg/aiusechat/usechat-prompts.go @@ -35,9 +35,8 @@ var SystemPromptText_OpenAI = strings.Join([]string{ `If a request would execute dangerous or destructive actions, warn briefly and provide a safer alternative.`, `If output is very long, prefer a brief summary plus a copy-ready fenced block or offer a follow-up chunking strategy.`, - `You can write and edit local files on disk using available tools, but you cannot execute shell commands, run code in the terminal, or read/write remote files.`, - `If the user asks you to execute commands or run code, or deal with remote files say that these features aren't available yet AND make sure to say that they are coming soon (stay tuned for updates).`, - `Instead, show them exactly what command or code they could copy-paste to run manually.`, + `You can read and write local files, capture screenshots, browse the web, read terminal scrollback, and execute shell commands in open terminal widgets using the term_send_command tool. Always prefer to gather context (read files, get scrollback) before acting. For destructive or side-effectful commands, briefly explain what the command will do before calling the tool — the user will be shown an approval prompt before execution.`, + `If the user asks you to run a command and a terminal widget is available, call term_send_command rather than just showing them the command to copy-paste. If no terminal widget is open, tell them to open one and then re-ask.`, // Final reminder `You have NO API access to widgets or Wave unless provided via an explicit tool.`,