feat: window screenshot (PrintWindow), name-based window resolution

This commit is contained in:
Helios Agent 2026-03-03 16:39:23 +01:00
parent 27b1ffc55b
commit efc9cab2c3
No known key found for this signature in database
GPG key ID: C8259547CD8309B5
6 changed files with 164 additions and 10 deletions

View file

@ -350,6 +350,20 @@ async fn handle_message(
shell: Arc<Mutex<shell::PersistentShell>>,
) -> ClientMessage {
match msg {
ServerMessage::WindowScreenshotRequest { request_id, window_id } => {
log_cmd!("📷", "screenshot window {}", window_id);
match screenshot::take_window_screenshot(window_id) {
Ok((image_base64, width, height)) => {
log_ok!("Done {} {}×{}", "·".dimmed(), width, height);
ClientMessage::ScreenshotResponse { request_id, image_base64, width, height }
}
Err(e) => {
log_err!("Window screenshot failed: {e}");
ClientMessage::Error { request_id, message: format!("Window screenshot failed: {e}") }
}
}
}
ServerMessage::ScreenshotRequest { request_id } => {
log_cmd!("📷", "screenshot");
match screenshot::take_screenshot() {

View file

@ -117,6 +117,76 @@ pub fn take_screenshot() -> Result<(String, u32, u32), String> {
}
}
/// Capture a specific window using PrintWindow (works even if occluded).
#[cfg(windows)]
pub fn take_window_screenshot(window_id: u64) -> Result<(String, u32, u32), String> {
use windows::Win32::Foundation::{HWND, RECT};
use windows::Win32::Graphics::Gdi::{
CreateCompatibleBitmap, CreateCompatibleDC, DeleteDC, DeleteObject,
GetDIBits, SelectObject, BITMAPINFO, BITMAPINFOHEADER, DIB_RGB_COLORS,
};
use windows::Win32::UI::WindowsAndMessaging::{GetWindowRect, PrintWindow, PW_RENDERFULLCONTENT};
use windows::Win32::Graphics::Gdi::GetDC;
use windows::Win32::Graphics::Gdi::ReleaseDC;
let hwnd = HWND(window_id as isize);
unsafe {
let mut rect = RECT::default();
GetWindowRect(hwnd, &mut rect).map_err(|e| format!("GetWindowRect failed: {e}"))?;
let width = (rect.right - rect.left) as u32;
let height = (rect.bottom - rect.top) as u32;
if width == 0 || height == 0 {
return Err(format!("Window has zero size: {width}x{height}"));
}
let hdc_screen = GetDC(None);
let hdc_mem = CreateCompatibleDC(hdc_screen);
let hbm = CreateCompatibleBitmap(hdc_screen, width as i32, height as i32);
let old_obj = SelectObject(hdc_mem, hbm);
// PrintWindow captures the window even if it's behind others
PrintWindow(hwnd, hdc_mem, PW_RENDERFULLCONTENT);
let mut bmi = BITMAPINFO {
bmiHeader: BITMAPINFOHEADER {
biSize: std::mem::size_of::<BITMAPINFOHEADER>() as u32,
biWidth: width as i32,
biHeight: -(height as i32),
biPlanes: 1,
biBitCount: 32,
biCompression: 0,
biSizeImage: 0,
biXPelsPerMeter: 0,
biYPelsPerMeter: 0,
biClrUsed: 0,
biClrImportant: 0,
},
bmiColors: [Default::default()],
};
let mut pixel_buf: Vec<u8> = vec![0u8; (width * height * 4) as usize];
GetDIBits(hdc_mem, hbm, 0, height, Some(pixel_buf.as_mut_ptr() as *mut _), &mut bmi, DIB_RGB_COLORS);
SelectObject(hdc_mem, old_obj);
DeleteObject(hbm);
DeleteDC(hdc_mem);
ReleaseDC(None, hdc_screen);
// BGRA → RGBA
for chunk in pixel_buf.chunks_exact_mut(4) { chunk.swap(0, 2); }
let png_bytes = encode_png(&pixel_buf, width, height)?;
let b64 = base64::engine::general_purpose::STANDARD.encode(&png_bytes);
Ok((b64, width, height))
}
}
#[cfg(not(windows))]
pub fn take_window_screenshot(_window_id: u64) -> Result<(String, u32, u32), String> {
Err("Window screenshot only supported on Windows".to_string())
}
#[cfg(not(windows))]
pub fn take_screenshot() -> Result<(String, u32, u32), String> {
// Stub for non-Windows builds

View file

@ -15,6 +15,8 @@ pub struct WindowInfo {
pub enum ServerMessage {
/// Request a screenshot from the client
ScreenshotRequest { request_id: Uuid },
/// Capture a specific window by its HWND (works even if behind other windows)
WindowScreenshotRequest { request_id: Uuid, window_id: u64 },
/// Show a MessageBox on the client asking the user to do something.
/// Blocks until the user clicks OK — use this when you need the user
/// to perform a manual action before continuing.

View file

@ -114,6 +114,27 @@ pub async fn list_sessions(State(state): State<AppState>) -> Json<serde_json::Va
Json(serde_json::json!({ "sessions": sessions }))
}
/// POST /sessions/:id/windows/:window_id/screenshot
pub async fn window_screenshot(
Path((session_id, window_id)): Path<(String, u64)>,
State(state): State<AppState>,
) -> impl IntoResponse {
match dispatch(&state, &session_id, "window_screenshot", |rid| {
ServerMessage::WindowScreenshotRequest { request_id: rid, window_id }
}).await {
Ok(ClientMessage::ScreenshotResponse { image_base64, width, height, .. }) => (
StatusCode::OK,
Json(serde_json::json!({ "image_base64": image_base64, "width": width, "height": height })),
).into_response(),
Ok(ClientMessage::Error { message, .. }) => (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": message })),
).into_response(),
Ok(_) => (StatusCode::BAD_GATEWAY, Json(serde_json::json!({ "error": "Unexpected response" }))).into_response(),
Err(e) => e.into_response(),
}
}
/// POST /sessions/:id/screenshot
pub async fn request_screenshot(
Path(session_id): Path<String>,

View file

@ -54,6 +54,7 @@ async fn main() -> anyhow::Result<()> {
.route("/sessions/:id/prompt", post(api::prompt_user))
.route("/sessions/:id/windows", get(api::list_windows))
.route("/sessions/:id/windows/minimize-all", post(api::minimize_all))
.route("/sessions/:id/windows/:window_id/screenshot", post(api::window_screenshot))
.route("/sessions/:id/windows/:window_id/focus", post(api::focus_window))
.route("/sessions/:id/windows/:window_id/maximize", post(api::maximize_and_focus))
.route("/sessions/:id/version", get(api::client_version))

View file

@ -80,6 +80,24 @@ def resolve_session(session_id: str) -> str:
raise SystemExit(f"[helios-remote] No session found with label '{session_id}'")
def resolve_window(sid: str, window_id_or_name: str) -> int:
"""If window_id_or_name is a number, return it. Otherwise search by title substring."""
if window_id_or_name.lstrip('-').isdigit():
return int(window_id_or_name)
# Search by title
resp = _req("GET", f"/sessions/{sid}/windows")
windows = resp.json().get("windows", [])
query = window_id_or_name.lower()
matches = [w for w in windows if w.get("visible") and query in w.get("title", "").lower()]
if not matches:
raise SystemExit(f"[helios-remote] No visible window matching '{window_id_or_name}'")
if len(matches) > 1:
print(f"[helios-remote] Multiple matches for '{window_id_or_name}', using first:")
for w in matches:
print(f" {w['id']} {w['title']}")
return int(matches[0]["id"])
def cmd_sessions(_args):
"""List all connected sessions."""
resp = _req("GET", "/sessions")
@ -203,17 +221,36 @@ def cmd_minimize_all(args):
def cmd_focus(args):
"""Bring a window to the foreground on the remote session."""
"""Bring a window to the foreground (by ID or title substring)."""
sid = resolve_session(args.session_id)
_req("POST", f"/sessions/{sid}/windows/{args.window_id}/focus")
print(f"Window {args.window_id} focused on session {sid!r}.")
wid = resolve_window(sid, args.window_id)
_req("POST", f"/sessions/{sid}/windows/{wid}/focus")
print(f"Window {wid} focused on session {sid!r}.")
def cmd_maximize(args):
"""Maximize and focus a window on the remote session."""
"""Maximize and focus a window (by ID or title substring)."""
sid = resolve_session(args.session_id)
_req("POST", f"/sessions/{sid}/windows/{args.window_id}/maximize")
print(f"Window {args.window_id} maximized and focused on session {sid!r}.")
wid = resolve_window(sid, args.window_id)
_req("POST", f"/sessions/{sid}/windows/{wid}/maximize")
print(f"Window {wid} maximized on session {sid!r}.")
def cmd_screenshot_window(args):
"""Capture a specific window by ID or title substring → /tmp/helios-remote-screenshot.png"""
sid = resolve_session(args.session_id)
wid = resolve_window(sid, args.window_id)
resp = _req("POST", f"/sessions/{sid}/windows/{wid}/screenshot")
data = resp.json()
if "error" in data:
sys.exit(f"[helios-remote] {data['error']}")
import base64, os
out_path = args.output or "/tmp/helios-remote-screenshot.png"
img_bytes = base64.b64decode(data["image_base64"])
with open(out_path, "wb") as f:
f.write(img_bytes)
print(out_path)
return out_path
def cmd_server_version(_args):
@ -364,6 +401,12 @@ def build_parser() -> argparse.ArgumentParser:
sp = sub.add_parser("screenshot", help="Capture screenshot → /tmp/helios-remote-screenshot.png")
sp.add_argument("session_id")
swp = sub.add_parser("screenshot-window", help="Capture a specific window (by ID or title)")
swp.add_argument("session_id")
swp.add_argument("window_id", help="Window ID (number) or title substring")
swp.add_argument("--output", default=None, help="Output path (default: /tmp/helios-remote-screenshot.png)")
swp.set_defaults(func=cmd_screenshot_window)
ep = sub.add_parser("exec", help="Run a shell command on the remote session")
ep.add_argument("session_id")
ep.add_argument("parts", nargs=argparse.REMAINDER, metavar="command",
@ -459,10 +502,13 @@ def main():
"version": cmd_version,
"upload": cmd_upload,
"download": cmd_download,
"find-window": cmd_find_window,
"run": cmd_run,
"clipboard-get": cmd_clipboard_get,
"clipboard-set": cmd_clipboard_set,
"screenshot-window": cmd_screenshot_window,
"find-window": cmd_find_window,
"wait-for-window": cmd_wait_for_window,
"run": cmd_run,
"prompt": cmd_prompt,
"clipboard-get": cmd_clipboard_get,
"clipboard-set": cmd_clipboard_set,
}[args.subcmd](args)