diff --git a/skills/SKILL.md b/skills/SKILL.md index 26f9e5b..160a723 100644 --- a/skills/SKILL.md +++ b/skills/SKILL.md @@ -80,9 +80,20 @@ python $SKILL_DIR/remote.py label "Moritz PC" "Neues Label" 4. `exec` / `click` / `type` → Aktion ausführen 5. `screenshot` → Ergebnis prüfen -## Wenn etwas geklickt werden muss das ich nicht finde +# Warten bis ein Fenster erscheint (z.B. nach Programmstart) +python $SKILL_DIR/remote.py wait-for-window "Moritz PC" "notepad" --timeout 10 +``` + +## ⚠️ Klick-Regel (wichtig!) + +**Niemals blind klicken.** Pixel-Koordinaten aus Screenshots sind unzuverlässig. + +Wenn ich auf einen Button oder UI-Element klicken muss: +1. Erst `prompt` benutzen um Moritz zu bitten es selbst zu klicken +2. Dann weitermachen sobald er OK drückt ```bash -python $SKILL_DIR/remote.py prompt "Moritz PC" "Bitte klicke auf [X], dann drücke OK" +python $SKILL_DIR/remote.py prompt "Moritz PC" "Bitte klicke auf [Speichern], dann OK drücken" ``` -→ Warte auf ACK, dann weitermachen. + +Ausnahme: wenn ich die exakten Koordinaten eines Elements kenne (z.B. durch wiederholte Nutzung desselben UIs). diff --git a/skills/remote.py b/skills/remote.py index be783a1..52a3657 100644 --- a/skills/remote.py +++ b/skills/remote.py @@ -302,6 +302,28 @@ def cmd_prompt(args): print(f"User confirmed prompt on session {sid!r}.") +def cmd_wait_for_window(args): + """Poll until a visible window with the given title appears (or timeout).""" + import time + sid = resolve_session(args.session_id) + query = args.title.lower() + deadline = time.time() + args.timeout + interval = 1.0 + print(f"Waiting for window matching '{args.title}' (timeout: {args.timeout}s)...") + while time.time() < deadline: + resp = _req("GET", f"/sessions/{sid}/windows") + windows = resp.json().get("windows", []) + matches = [w for w in windows if w.get("visible") and query in w.get("title", "").lower()] + if matches: + print(f"{'ID':<20} Title") + print("-" * 70) + for w in matches: + print(f"{str(w.get('id','?')):<20} {w.get('title','')}") + return + time.sleep(interval) + sys.exit(f"[helios-remote] Timeout: no window matching '{args.title}' appeared within {args.timeout}s") + + def cmd_run(args): """Launch a program on the remote session (fire-and-forget).""" sid = resolve_session(args.session_id) @@ -391,6 +413,12 @@ def build_parser() -> argparse.ArgumentParser: fwp.add_argument("session_id") fwp.add_argument("title", help="Substring to search for (case-insensitive)") + wfwp = sub.add_parser("wait-for-window", help="Poll until a window with given title appears") + wfwp.add_argument("session_id") + wfwp.add_argument("title", help="Substring to wait for (case-insensitive)") + wfwp.add_argument("--timeout", type=int, default=30, metavar="SECONDS", help="Max wait time (default: 30s)") + wfwp.set_defaults(func=cmd_wait_for_window) + pp = sub.add_parser("prompt", help="Show a MessageBox asking the user to do something manually") pp.add_argument("session_id") pp.add_argument("message", help="What to ask the user (e.g. 'Please click Save, then OK')")