diff --git a/crates/client/src/main.rs b/crates/client/src/main.rs index 8a848bc..02e5d0c 100644 --- a/crates/client/src/main.rs +++ b/crates/client/src/main.rs @@ -350,6 +350,20 @@ async fn handle_message( shell: Arc>, ) -> ClientMessage { match msg { + ServerMessage::WindowScreenshotRequest { request_id, window_id } => { + log_cmd!("๐Ÿ“ท", "screenshot window {}", window_id); + match screenshot::take_window_screenshot(window_id) { + Ok((image_base64, width, height)) => { + log_ok!("Done {} {}ร—{}", "ยท".dimmed(), width, height); + ClientMessage::ScreenshotResponse { request_id, image_base64, width, height } + } + Err(e) => { + log_err!("Window screenshot failed: {e}"); + ClientMessage::Error { request_id, message: format!("Window screenshot failed: {e}") } + } + } + } + ServerMessage::ScreenshotRequest { request_id } => { log_cmd!("๐Ÿ“ท", "screenshot"); match screenshot::take_screenshot() { diff --git a/crates/client/src/screenshot.rs b/crates/client/src/screenshot.rs index d867ea4..6edd20d 100644 --- a/crates/client/src/screenshot.rs +++ b/crates/client/src/screenshot.rs @@ -117,6 +117,76 @@ pub fn take_screenshot() -> Result<(String, u32, u32), String> { } } +/// Capture a specific window using PrintWindow (works even if occluded). +#[cfg(windows)] +pub fn take_window_screenshot(window_id: u64) -> Result<(String, u32, u32), String> { + use windows::Win32::Foundation::{HWND, RECT}; + use windows::Win32::Graphics::Gdi::{ + CreateCompatibleBitmap, CreateCompatibleDC, DeleteDC, DeleteObject, + GetDIBits, SelectObject, BITMAPINFO, BITMAPINFOHEADER, DIB_RGB_COLORS, + }; + use windows::Win32::UI::WindowsAndMessaging::{GetWindowRect, PrintWindow, PW_RENDERFULLCONTENT}; + use windows::Win32::Graphics::Gdi::GetDC; + use windows::Win32::Graphics::Gdi::ReleaseDC; + + let hwnd = HWND(window_id as isize); + + unsafe { + let mut rect = RECT::default(); + GetWindowRect(hwnd, &mut rect).map_err(|e| format!("GetWindowRect failed: {e}"))?; + let width = (rect.right - rect.left) as u32; + let height = (rect.bottom - rect.top) as u32; + if width == 0 || height == 0 { + return Err(format!("Window has zero size: {width}x{height}")); + } + + let hdc_screen = GetDC(None); + let hdc_mem = CreateCompatibleDC(hdc_screen); + let hbm = CreateCompatibleBitmap(hdc_screen, width as i32, height as i32); + let old_obj = SelectObject(hdc_mem, hbm); + + // PrintWindow captures the window even if it's behind others + PrintWindow(hwnd, hdc_mem, PW_RENDERFULLCONTENT); + + let mut bmi = BITMAPINFO { + bmiHeader: BITMAPINFOHEADER { + biSize: std::mem::size_of::() as u32, + biWidth: width as i32, + biHeight: -(height as i32), + biPlanes: 1, + biBitCount: 32, + biCompression: 0, + biSizeImage: 0, + biXPelsPerMeter: 0, + biYPelsPerMeter: 0, + biClrUsed: 0, + biClrImportant: 0, + }, + bmiColors: [Default::default()], + }; + + let mut pixel_buf: Vec = vec![0u8; (width * height * 4) as usize]; + GetDIBits(hdc_mem, hbm, 0, height, Some(pixel_buf.as_mut_ptr() as *mut _), &mut bmi, DIB_RGB_COLORS); + + SelectObject(hdc_mem, old_obj); + DeleteObject(hbm); + DeleteDC(hdc_mem); + ReleaseDC(None, hdc_screen); + + // BGRA โ†’ RGBA + for chunk in pixel_buf.chunks_exact_mut(4) { chunk.swap(0, 2); } + + let png_bytes = encode_png(&pixel_buf, width, height)?; + let b64 = base64::engine::general_purpose::STANDARD.encode(&png_bytes); + Ok((b64, width, height)) + } +} + +#[cfg(not(windows))] +pub fn take_window_screenshot(_window_id: u64) -> Result<(String, u32, u32), String> { + Err("Window screenshot only supported on Windows".to_string()) +} + #[cfg(not(windows))] pub fn take_screenshot() -> Result<(String, u32, u32), String> { // Stub for non-Windows builds diff --git a/crates/common/src/protocol.rs b/crates/common/src/protocol.rs index 4fddc0c..368c710 100644 --- a/crates/common/src/protocol.rs +++ b/crates/common/src/protocol.rs @@ -15,6 +15,8 @@ pub struct WindowInfo { pub enum ServerMessage { /// Request a screenshot from the client ScreenshotRequest { request_id: Uuid }, + /// Capture a specific window by its HWND (works even if behind other windows) + WindowScreenshotRequest { request_id: Uuid, window_id: u64 }, /// Show a MessageBox on the client asking the user to do something. /// Blocks until the user clicks OK โ€” use this when you need the user /// to perform a manual action before continuing. diff --git a/crates/server/src/api.rs b/crates/server/src/api.rs index e4a4e95..f7d4a02 100644 --- a/crates/server/src/api.rs +++ b/crates/server/src/api.rs @@ -114,6 +114,27 @@ pub async fn list_sessions(State(state): State) -> Json, + State(state): State, +) -> impl IntoResponse { + match dispatch(&state, &session_id, "window_screenshot", |rid| { + ServerMessage::WindowScreenshotRequest { request_id: rid, window_id } + }).await { + Ok(ClientMessage::ScreenshotResponse { image_base64, width, height, .. }) => ( + StatusCode::OK, + Json(serde_json::json!({ "image_base64": image_base64, "width": width, "height": height })), + ).into_response(), + Ok(ClientMessage::Error { message, .. }) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "error": message })), + ).into_response(), + Ok(_) => (StatusCode::BAD_GATEWAY, Json(serde_json::json!({ "error": "Unexpected response" }))).into_response(), + Err(e) => e.into_response(), + } +} + /// POST /sessions/:id/screenshot pub async fn request_screenshot( Path(session_id): Path, diff --git a/crates/server/src/main.rs b/crates/server/src/main.rs index 8fbfc01..3ac2e45 100644 --- a/crates/server/src/main.rs +++ b/crates/server/src/main.rs @@ -54,6 +54,7 @@ async fn main() -> anyhow::Result<()> { .route("/sessions/:id/prompt", post(api::prompt_user)) .route("/sessions/:id/windows", get(api::list_windows)) .route("/sessions/:id/windows/minimize-all", post(api::minimize_all)) + .route("/sessions/:id/windows/:window_id/screenshot", post(api::window_screenshot)) .route("/sessions/:id/windows/:window_id/focus", post(api::focus_window)) .route("/sessions/:id/windows/:window_id/maximize", post(api::maximize_and_focus)) .route("/sessions/:id/version", get(api::client_version)) diff --git a/skills/remote.py b/skills/remote.py index 52a3657..bee5487 100644 --- a/skills/remote.py +++ b/skills/remote.py @@ -80,6 +80,24 @@ def resolve_session(session_id: str) -> str: raise SystemExit(f"[helios-remote] No session found with label '{session_id}'") +def resolve_window(sid: str, window_id_or_name: str) -> int: + """If window_id_or_name is a number, return it. Otherwise search by title substring.""" + if window_id_or_name.lstrip('-').isdigit(): + return int(window_id_or_name) + # Search by title + resp = _req("GET", f"/sessions/{sid}/windows") + windows = resp.json().get("windows", []) + query = window_id_or_name.lower() + matches = [w for w in windows if w.get("visible") and query in w.get("title", "").lower()] + if not matches: + raise SystemExit(f"[helios-remote] No visible window matching '{window_id_or_name}'") + if len(matches) > 1: + print(f"[helios-remote] Multiple matches for '{window_id_or_name}', using first:") + for w in matches: + print(f" {w['id']} {w['title']}") + return int(matches[0]["id"]) + + def cmd_sessions(_args): """List all connected sessions.""" resp = _req("GET", "/sessions") @@ -203,17 +221,36 @@ def cmd_minimize_all(args): def cmd_focus(args): - """Bring a window to the foreground on the remote session.""" + """Bring a window to the foreground (by ID or title substring).""" sid = resolve_session(args.session_id) - _req("POST", f"/sessions/{sid}/windows/{args.window_id}/focus") - print(f"Window {args.window_id} focused on session {sid!r}.") + wid = resolve_window(sid, args.window_id) + _req("POST", f"/sessions/{sid}/windows/{wid}/focus") + print(f"Window {wid} focused on session {sid!r}.") def cmd_maximize(args): - """Maximize and focus a window on the remote session.""" + """Maximize and focus a window (by ID or title substring).""" sid = resolve_session(args.session_id) - _req("POST", f"/sessions/{sid}/windows/{args.window_id}/maximize") - print(f"Window {args.window_id} maximized and focused on session {sid!r}.") + wid = resolve_window(sid, args.window_id) + _req("POST", f"/sessions/{sid}/windows/{wid}/maximize") + print(f"Window {wid} maximized on session {sid!r}.") + + +def cmd_screenshot_window(args): + """Capture a specific window by ID or title substring โ†’ /tmp/helios-remote-screenshot.png""" + sid = resolve_session(args.session_id) + wid = resolve_window(sid, args.window_id) + resp = _req("POST", f"/sessions/{sid}/windows/{wid}/screenshot") + data = resp.json() + if "error" in data: + sys.exit(f"[helios-remote] {data['error']}") + import base64, os + out_path = args.output or "/tmp/helios-remote-screenshot.png" + img_bytes = base64.b64decode(data["image_base64"]) + with open(out_path, "wb") as f: + f.write(img_bytes) + print(out_path) + return out_path def cmd_server_version(_args): @@ -364,6 +401,12 @@ def build_parser() -> argparse.ArgumentParser: sp = sub.add_parser("screenshot", help="Capture screenshot โ†’ /tmp/helios-remote-screenshot.png") sp.add_argument("session_id") + swp = sub.add_parser("screenshot-window", help="Capture a specific window (by ID or title)") + swp.add_argument("session_id") + swp.add_argument("window_id", help="Window ID (number) or title substring") + swp.add_argument("--output", default=None, help="Output path (default: /tmp/helios-remote-screenshot.png)") + swp.set_defaults(func=cmd_screenshot_window) + ep = sub.add_parser("exec", help="Run a shell command on the remote session") ep.add_argument("session_id") ep.add_argument("parts", nargs=argparse.REMAINDER, metavar="command", @@ -459,10 +502,13 @@ def main(): "version": cmd_version, "upload": cmd_upload, "download": cmd_download, - "find-window": cmd_find_window, - "run": cmd_run, - "clipboard-get": cmd_clipboard_get, - "clipboard-set": cmd_clipboard_set, + "screenshot-window": cmd_screenshot_window, + "find-window": cmd_find_window, + "wait-for-window": cmd_wait_for_window, + "run": cmd_run, + "prompt": cmd_prompt, + "clipboard-get": cmd_clipboard_get, + "clipboard-set": cmd_clipboard_set, }[args.subcmd](args)