Harvest PR #3051: voice input commands and hotbar integration
Port /voice, /voice-send, and /voice-control into the command strategy registry as groups/core/voice.rs. The handlers only flip App state (voice_enabled, voice_send_enabled, voice_control_enabled) and emit the new AppAction::VoiceCapture; the UI event loop performs the actual record + transcribe cycle so credentials come from the live Config (deepseek_api_key/deepseek_base_url) instead of auth fields cached on App, and no audio is ever recorded by the registry smoke tests. - voice.toggle hotbar action dispatches the real /voice command and reports voice_enabled as its active state, replacing the placeholder. - Recording uses sox/rec/arecord with RMS-based silence detection; transcription posts input_audio blocks to the provider chat completions API (async reqwest — the blocking client would panic inside the tokio event loop). - Transcripts insert at the composer cursor via App::insert_str. With /voice-send enabled, a transcript ending in "send it" / 发送 strips the suffix and submits; a bare "send it" submits the current composer content. With /voice-control enabled, transcription runs through the AI dictation pipeline that sees the composer text. - Failures (no recorder, no API key, short recording, network) surface as localized status messages and disarm voice input. - Localized command help and status strings for all seven shipped locales; /voice now appears in the command palette. Harvested from PR #3051 by @huqiantao Co-authored-by: huqiantao <huqiantao@users.noreply.github.com> Co-authored-by: Hunter B <hmbown@gmail.com> Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ mod hooks;
|
||||
mod provider;
|
||||
mod queue;
|
||||
mod stash;
|
||||
pub mod voice;
|
||||
|
||||
pub(in crate::commands) use self::core::reset_conversation_state;
|
||||
|
||||
@@ -43,6 +44,9 @@ impl CommandGroup for CoreCommands {
|
||||
Box::new(FunctionCommand::new(&PROFILE_INFO, run_profile)),
|
||||
Box::new(FunctionCommand::new(&RLM_INFO, run_rlm)),
|
||||
Box::new(FunctionCommand::new(&TRANSLATE_INFO, run_translate)),
|
||||
Box::new(FunctionCommand::new(&VOICE_INFO, run_voice)),
|
||||
Box::new(FunctionCommand::new(&VOICE_SEND_INFO, run_voice_send)),
|
||||
Box::new(FunctionCommand::new(&VOICE_CONTROL_INFO, run_voice_control)),
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -167,6 +171,24 @@ static TRANSLATE_INFO: CommandInfo = CommandInfo {
|
||||
usage: "/translate",
|
||||
description_id: MessageId::CmdTranslateDescription,
|
||||
};
|
||||
static VOICE_INFO: CommandInfo = CommandInfo {
|
||||
name: "voice",
|
||||
aliases: &["yuyin", "语音"],
|
||||
usage: "/voice",
|
||||
description_id: MessageId::CmdVoiceDescription,
|
||||
};
|
||||
static VOICE_SEND_INFO: CommandInfo = CommandInfo {
|
||||
name: "voicesend",
|
||||
aliases: &["voice-send", "yuyinsend", "语音发送"],
|
||||
usage: "/voicesend",
|
||||
description_id: MessageId::CmdVoiceSendDescription,
|
||||
};
|
||||
static VOICE_CONTROL_INFO: CommandInfo = CommandInfo {
|
||||
name: "voicecontrol",
|
||||
aliases: &["voice-control", "yuyincontrol", "语音控制"],
|
||||
usage: "/voicecontrol",
|
||||
description_id: MessageId::CmdVoiceControlDescription,
|
||||
};
|
||||
|
||||
fn run_registered(app: &mut App, name: &str, arg: Option<&str>) -> CommandResult {
|
||||
dispatch(app, name, arg).expect("registered core command should dispatch")
|
||||
@@ -232,6 +254,15 @@ fn run_rlm(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
fn run_translate(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
run_registered(app, "translate", arg)
|
||||
}
|
||||
fn run_voice(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
run_registered(app, "voice", arg)
|
||||
}
|
||||
fn run_voice_send(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
run_registered(app, "voicesend", arg)
|
||||
}
|
||||
fn run_voice_control(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
run_registered(app, "voicecontrol", arg)
|
||||
}
|
||||
|
||||
pub(in crate::commands) fn dispatch(
|
||||
app: &mut App,
|
||||
@@ -259,6 +290,11 @@ pub(in crate::commands) fn dispatch(
|
||||
"profile" | "dangan" => core::profile_switch(app, arg),
|
||||
"rlm" | "recursive" | "digui" => rlm(app, arg),
|
||||
"translate" | "translation" | "transale" => core::translate(app),
|
||||
"voice" | "yuyin" | "语音" => voice::voice(app),
|
||||
"voicesend" | "voice-send" | "yuyinsend" | "语音发送" => voice::voice_send(app),
|
||||
"voicecontrol" | "voice-control" | "yuyincontrol" | "语音控制" => {
|
||||
voice::voice_control(app)
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
Some(result)
|
||||
|
||||
@@ -0,0 +1,541 @@
|
||||
//! Voice input commands — `/voice`, `/voice-send`, `/voice-control`.
|
||||
//!
|
||||
//! Records audio from the default microphone, sends it to the configured
|
||||
//! provider's API for transcription, and inserts the transcribed text into
|
||||
//! the composer. The interaction model mirrors MiMo Code's voice UX:
|
||||
//!
|
||||
//! `/voice` — toggle voice input on/off (records when toggled on)
|
||||
//! `/voice-send` — toggle auto-send when the transcript ends with
|
||||
//! "send it" / "发送"
|
||||
//! `/voice-control` — toggle AI-assisted dictation that sees the current
|
||||
//! composer text
|
||||
//!
|
||||
//! The slash commands only flip state and emit [`AppAction::VoiceCapture`];
|
||||
//! the actual capture runs in the UI event loop where the live [`Config`]
|
||||
//! supplies provider credentials. That keeps the handlers side-effect free
|
||||
//! (the registry smoke tests execute every command) and avoids caching
|
||||
//! auth material on [`App`].
|
||||
//!
|
||||
//! ## Recording
|
||||
//!
|
||||
//! Uses platform-specific command-line tools (sox, rec, arecord) to capture
|
||||
//! 16kHz mono 16-bit PCM audio. Records until a silence gap is detected or
|
||||
//! the maximum duration is reached (default 10 s).
|
||||
|
||||
use std::process::{Command, Stdio};
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use crate::commands::CommandResult;
|
||||
use crate::config::Config;
|
||||
use crate::localization::{MessageId, tr};
|
||||
use crate::tui::app::{App, AppAction};
|
||||
|
||||
/// Transcription model requested from the provider's chat-completions API.
|
||||
const ASR_MODEL: &str = "mimo-v2.5-asr";
|
||||
/// Model used for the AI-assisted voice-control pipeline.
|
||||
const VOICE_CONTROL_MODEL: &str = "mimo-v2.5";
|
||||
|
||||
// --- Recorder detection ----------------------------------------------------
|
||||
|
||||
/// Platform-specific recorder definitions.
|
||||
#[derive(Debug, Clone)]
|
||||
struct Recorder {
|
||||
cmd: &'static str,
|
||||
/// CLI arguments for piping raw 16kHz mono S16_LE PCM to stdout.
|
||||
pipe_args: &'static [&'static str],
|
||||
}
|
||||
|
||||
fn detect_recorder() -> Option<Recorder> {
|
||||
let candidates: &[Recorder] = if cfg!(target_os = "macos") {
|
||||
&[
|
||||
Recorder {
|
||||
cmd: "sox",
|
||||
pipe_args: &["-d", "-r", "16000", "-c", "1", "-b", "16", "-t", "raw", "-"],
|
||||
},
|
||||
Recorder {
|
||||
cmd: "rec",
|
||||
pipe_args: &["-r", "16000", "-c", "1", "-b", "16", "-t", "raw", "-"],
|
||||
},
|
||||
]
|
||||
} else if cfg!(target_os = "linux") {
|
||||
&[
|
||||
Recorder {
|
||||
cmd: "arecord",
|
||||
pipe_args: &["-f", "S16_LE", "-r", "16000", "-c", "1", "-t", "raw"],
|
||||
},
|
||||
Recorder {
|
||||
cmd: "sox",
|
||||
pipe_args: &["-d", "-r", "16000", "-c", "1", "-b", "16", "-t", "raw", "-"],
|
||||
},
|
||||
]
|
||||
} else if cfg!(target_os = "windows") {
|
||||
&[Recorder {
|
||||
cmd: "sox",
|
||||
pipe_args: &["-d", "-r", "16000", "-c", "1", "-b", "16", "-t", "raw", "-"],
|
||||
}]
|
||||
} else {
|
||||
&[]
|
||||
};
|
||||
|
||||
candidates
|
||||
.iter()
|
||||
.find(|r| {
|
||||
Command::new(r.cmd)
|
||||
.arg("--version")
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.is_ok()
|
||||
})
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// Check whether voice recording is available on this system.
|
||||
pub fn is_available() -> bool {
|
||||
detect_recorder().is_some()
|
||||
}
|
||||
|
||||
// --- WAV encoding ----------------------------------------------------------
|
||||
|
||||
/// Encode raw 16kHz mono S16_LE PCM samples as a WAV buffer.
|
||||
fn encode_wav(samples: &[i16]) -> Vec<u8> {
|
||||
let data_size = (samples.len() * 2) as u32;
|
||||
let sample_rate: u32 = 16000;
|
||||
let mut buf = Vec::with_capacity(44 + data_size as usize);
|
||||
|
||||
// RIFF header
|
||||
buf.extend_from_slice(b"RIFF");
|
||||
buf.extend_from_slice(&(36 + data_size).to_le_bytes());
|
||||
buf.extend_from_slice(b"WAVE");
|
||||
|
||||
// fmt chunk
|
||||
buf.extend_from_slice(b"fmt ");
|
||||
buf.extend_from_slice(&16u32.to_le_bytes()); // chunk size
|
||||
buf.extend_from_slice(&1u16.to_le_bytes()); // PCM
|
||||
buf.extend_from_slice(&1u16.to_le_bytes()); // mono
|
||||
buf.extend_from_slice(&sample_rate.to_le_bytes());
|
||||
buf.extend_from_slice(&(sample_rate * 2).to_le_bytes()); // byte rate
|
||||
buf.extend_from_slice(&2u16.to_le_bytes()); // block align
|
||||
buf.extend_from_slice(&16u16.to_le_bytes()); // bits per sample
|
||||
|
||||
// data chunk
|
||||
buf.extend_from_slice(b"data");
|
||||
buf.extend_from_slice(&data_size.to_le_bytes());
|
||||
for &sample in samples {
|
||||
buf.extend_from_slice(&sample.to_le_bytes());
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
// --- Recording -------------------------------------------------------------
|
||||
|
||||
/// Maximum recording duration in seconds before auto-stopping.
|
||||
const MAX_RECORD_SECS: u64 = 10;
|
||||
/// Minimum segment duration in seconds to consider as valid speech.
|
||||
const MIN_SEGMENT_SECS: f64 = 0.3;
|
||||
|
||||
/// Record audio from the default microphone.
|
||||
///
|
||||
/// Returns raw 16kHz mono S16_LE PCM samples. Returns `None` if no recorder
|
||||
/// is available, the recording failed, or no speech was detected.
|
||||
fn record_audio() -> Option<(Vec<i16>, Duration)> {
|
||||
let recorder = detect_recorder()?;
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let mut child = Command::new(recorder.cmd)
|
||||
.args(recorder.pipe_args)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.ok()?;
|
||||
|
||||
let stdout = child.stdout.take()?;
|
||||
let mut reader = std::io::BufReader::new(stdout);
|
||||
let mut all_samples: Vec<i16> = Vec::with_capacity(16000 * MAX_RECORD_SECS as usize);
|
||||
|
||||
// Read until timeout or silence
|
||||
let mut buf = [0u8; 320]; // 10ms of 16kHz S16_LE
|
||||
let max_duration = Duration::from_secs(MAX_RECORD_SECS);
|
||||
let mut silence_samples = 0u32;
|
||||
let mut had_speech = false;
|
||||
let speech_threshold: i16 = 500; // RMS-based speech detection threshold
|
||||
let silence_duration_samples = 16000u32; // 1 second of silence to stop
|
||||
|
||||
loop {
|
||||
use std::io::Read;
|
||||
match reader.read_exact(&mut buf) {
|
||||
Ok(()) => {
|
||||
let chunk: Vec<i16> = buf
|
||||
.chunks_exact(2)
|
||||
.map(|b| i16::from_le_bytes([b[0], b[1]]))
|
||||
.collect();
|
||||
|
||||
// Simple RMS-based VAD
|
||||
let rms = (chunk.iter().map(|&s| (s as f64) * (s as f64)).sum::<f64>()
|
||||
/ chunk.len() as f64)
|
||||
.sqrt();
|
||||
let is_speech = rms > speech_threshold as f64;
|
||||
|
||||
if is_speech {
|
||||
had_speech = true;
|
||||
silence_samples = 0;
|
||||
} else if had_speech {
|
||||
silence_samples += chunk.len() as u32;
|
||||
}
|
||||
|
||||
if had_speech {
|
||||
all_samples.extend_from_slice(&chunk);
|
||||
}
|
||||
|
||||
if start.elapsed() > max_duration {
|
||||
let _ = child.kill();
|
||||
break;
|
||||
}
|
||||
if had_speech && silence_samples >= silence_duration_samples {
|
||||
let _ = child.kill();
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
|
||||
Err(_) => {
|
||||
let _ = child.kill();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = child.wait();
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
let min_samples = (MIN_SEGMENT_SECS * 16000.0) as usize;
|
||||
if all_samples.len() < min_samples {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((all_samples, elapsed))
|
||||
}
|
||||
|
||||
// --- Auto-send suffix ------------------------------------------------------
|
||||
|
||||
/// Matches an explicit send instruction at the end of transcribed text:
|
||||
/// "send it" (any spacing/case) or 发送/發送, with trailing punctuation.
|
||||
static SEND_SUFFIX_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"(?i)(?:^|[\s,,.。!!??]+)(?:send\s*it|发送|發送)[\s.。!!??]*$").unwrap()
|
||||
});
|
||||
|
||||
/// Split a transcript into the message remainder and whether it ended with an
|
||||
/// explicit send instruction. `"ship the fix, send it"` → `("ship the fix", true)`.
|
||||
fn split_send_suffix(text: &str) -> (&str, bool) {
|
||||
match SEND_SUFFIX_RE.find(text) {
|
||||
Some(found) => (text[..found.start()].trim(), true),
|
||||
None => (text.trim(), false),
|
||||
}
|
||||
}
|
||||
|
||||
// --- Transcription ---------------------------------------------------------
|
||||
|
||||
fn base64_encode(data: &[u8]) -> String {
|
||||
use base64::Engine;
|
||||
base64::engine::general_purpose::STANDARD.encode(data)
|
||||
}
|
||||
|
||||
fn chat_completions_url(base_url: &str) -> String {
|
||||
format!("{}/chat/completions", base_url.trim_end_matches('/'))
|
||||
}
|
||||
|
||||
async fn post_chat_completions(
|
||||
api_key: &str,
|
||||
base_url: &str,
|
||||
body: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let client = crate::tls::reqwest_client();
|
||||
let resp = client
|
||||
.post(chat_completions_url(base_url))
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.timeout(Duration::from_secs(30))
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("request failed: {e}"))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Err(format!("API returned status {}", resp.status()));
|
||||
}
|
||||
|
||||
resp.json()
|
||||
.await
|
||||
.map_err(|e| format!("failed to parse response: {e}"))
|
||||
}
|
||||
|
||||
/// Send audio to the provider's API for plain transcription.
|
||||
///
|
||||
/// Uses the chat completions endpoint with `input_audio` content blocks.
|
||||
async fn transcribe(
|
||||
api_key: &str,
|
||||
base_url: &str,
|
||||
audio_samples: &[i16],
|
||||
) -> Result<String, String> {
|
||||
let wav = encode_wav(audio_samples);
|
||||
let data_url = format!("data:audio/wav;base64,{}", base64_encode(&wav));
|
||||
|
||||
let body = serde_json::json!({
|
||||
"model": ASR_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {
|
||||
"data": data_url
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"asr_options": {
|
||||
"language": "auto"
|
||||
}
|
||||
});
|
||||
|
||||
let data = post_chat_completions(api_key, base_url, body).await?;
|
||||
data["choices"][0]["message"]["content"]
|
||||
.as_str()
|
||||
.map(|s| s.trim().to_string())
|
||||
.ok_or_else(|| "no transcription in response".to_string())
|
||||
}
|
||||
|
||||
/// Process audio through the voice-control pipeline: AI-assisted dictation
|
||||
/// that sees the current composer text, mirroring MiMo Code's
|
||||
/// `processVoiceControl`. Used when `/voice-control` is enabled.
|
||||
async fn process_voice_control(
|
||||
api_key: &str,
|
||||
base_url: &str,
|
||||
audio_samples: &[i16],
|
||||
current_text: &str,
|
||||
) -> Result<String, String> {
|
||||
let wav = encode_wav(audio_samples);
|
||||
let data_url = format!("data:audio/wav;base64,{}", base64_encode(&wav));
|
||||
|
||||
let user_context = serde_json::json!({
|
||||
"current_text": current_text,
|
||||
"cursor": "end",
|
||||
});
|
||||
|
||||
let body = serde_json::json!({
|
||||
"model": VOICE_CONTROL_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a voice input assistant. Transcribe the user's speech. Output JSON: {\"text\": \"transcribed text\"}."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "text", "text": user_context.to_string() },
|
||||
{ "type": "input_audio", "input_audio": { "data": data_url } }
|
||||
]
|
||||
}
|
||||
],
|
||||
"response_format": { "type": "json_object" }
|
||||
});
|
||||
|
||||
let data = post_chat_completions(api_key, base_url, body).await?;
|
||||
let content = data["choices"][0]["message"]["content"]
|
||||
.as_str()
|
||||
.ok_or_else(|| "no response content".to_string())?;
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(content)
|
||||
.map_err(|e| format!("failed to parse voice control JSON: {e}"))?;
|
||||
|
||||
parsed["text"]
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| "no text field in voice control response".to_string())
|
||||
}
|
||||
|
||||
// --- Capture orchestration (UI event loop) ---------------------------------
|
||||
|
||||
/// What the UI should do with a finished capture.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum VoiceCaptureOutcome {
|
||||
/// Insert the transcribed text into the composer at the cursor.
|
||||
Insert(String),
|
||||
/// Submit this text as a message (auto-send).
|
||||
Send(String),
|
||||
}
|
||||
|
||||
/// Perform a complete record + transcribe cycle.
|
||||
///
|
||||
/// Runs in the UI event loop (see [`AppAction::VoiceCapture`]) so provider
|
||||
/// credentials come from the live [`Config`] rather than state cached on
|
||||
/// [`App`]. Recording happens on a blocking thread; transcription uses the
|
||||
/// shared async HTTP client. Every failure path returns a localized message
|
||||
/// so callers can surface it as a status line.
|
||||
pub async fn capture_and_transcribe(
|
||||
app: &mut App,
|
||||
config: &Config,
|
||||
) -> Result<VoiceCaptureOutcome, String> {
|
||||
let locale = app.ui_locale;
|
||||
|
||||
if !is_available() {
|
||||
return Err(tr(locale, MessageId::VoiceErrNoRecorder).to_string());
|
||||
}
|
||||
let api_key = config
|
||||
.deepseek_api_key()
|
||||
.map_err(|_| tr(locale, MessageId::VoiceErrNoAuth).to_string())?;
|
||||
let base_url = config.deepseek_base_url();
|
||||
|
||||
app.status_message = Some(tr(locale, MessageId::VoiceRecording).to_string());
|
||||
let (samples, _duration) = tokio::task::spawn_blocking(record_audio)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
.ok_or_else(|| tr(locale, MessageId::VoiceErrTooShort).to_string())?;
|
||||
|
||||
app.status_message = Some(tr(locale, MessageId::VoiceProcessing).to_string());
|
||||
let text = if app.voice_control_enabled {
|
||||
process_voice_control(&api_key, &base_url, &samples, &app.composer.input).await
|
||||
} else {
|
||||
transcribe(&api_key, &base_url, &samples).await
|
||||
}
|
||||
.map_err(|e| format!("{}: {e}", tr(locale, MessageId::VoiceErrNetwork)))?;
|
||||
|
||||
let clean = text.trim();
|
||||
if app.voice_send_enabled {
|
||||
let (remainder, wants_send) = split_send_suffix(clean);
|
||||
if wants_send {
|
||||
// A bare "send it" submits whatever is already in the composer.
|
||||
let outgoing = if remainder.is_empty() {
|
||||
let existing = app.composer.input.trim().to_string();
|
||||
if !existing.is_empty() {
|
||||
app.clear_input();
|
||||
}
|
||||
existing
|
||||
} else {
|
||||
remainder.to_string()
|
||||
};
|
||||
if outgoing.is_empty() {
|
||||
return Err(tr(locale, MessageId::VoiceErrEmptySend).to_string());
|
||||
}
|
||||
return Ok(VoiceCaptureOutcome::Send(outgoing));
|
||||
}
|
||||
}
|
||||
if clean.is_empty() {
|
||||
return Err(tr(locale, MessageId::VoiceErrEmptySend).to_string());
|
||||
}
|
||||
Ok(VoiceCaptureOutcome::Insert(clean.to_string()))
|
||||
}
|
||||
|
||||
// --- Command handlers ------------------------------------------------------
|
||||
|
||||
/// Handle the `/voice` command: toggle voice input. Toggling on requests a
|
||||
/// one-shot recording + transcription via [`AppAction::VoiceCapture`].
|
||||
pub fn voice(app: &mut App) -> CommandResult {
|
||||
let locale = app.ui_locale;
|
||||
|
||||
if app.voice_enabled {
|
||||
app.voice_enabled = false;
|
||||
return CommandResult::message(tr(locale, MessageId::VoiceDisabled));
|
||||
}
|
||||
if !is_available() {
|
||||
return CommandResult::error(tr(locale, MessageId::VoiceErrNoRecorder));
|
||||
}
|
||||
app.voice_enabled = true;
|
||||
CommandResult::with_message_and_action(
|
||||
tr(locale, MessageId::VoiceEnabled),
|
||||
AppAction::VoiceCapture,
|
||||
)
|
||||
}
|
||||
|
||||
/// Handle the `/voice-send` command: toggle auto-send after transcription.
|
||||
pub fn voice_send(app: &mut App) -> CommandResult {
|
||||
let locale = app.ui_locale;
|
||||
app.voice_send_enabled = !app.voice_send_enabled;
|
||||
|
||||
let msg = if app.voice_send_enabled {
|
||||
tr(locale, MessageId::VoiceSendEnabled)
|
||||
} else {
|
||||
tr(locale, MessageId::VoiceSendDisabled)
|
||||
};
|
||||
CommandResult::message(msg)
|
||||
}
|
||||
|
||||
/// Handle the `/voice-control` command: toggle AI-assisted dictation.
|
||||
pub fn voice_control(app: &mut App) -> CommandResult {
|
||||
let locale = app.ui_locale;
|
||||
app.voice_control_enabled = !app.voice_control_enabled;
|
||||
|
||||
let msg = if app.voice_control_enabled {
|
||||
tr(locale, MessageId::VoiceControlEnabled)
|
||||
} else {
|
||||
tr(locale, MessageId::VoiceControlDisabled)
|
||||
};
|
||||
CommandResult::message(msg)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn wav_encoding_produces_valid_header() {
|
||||
let samples = vec![0i16; 16000]; // 1 second of silence
|
||||
let wav = encode_wav(&samples);
|
||||
assert_eq!(&wav[0..4], b"RIFF");
|
||||
assert_eq!(&wav[8..12], b"WAVE");
|
||||
assert_eq!(&wav[12..16], b"fmt ");
|
||||
// data size = 16000 * 2 = 32000
|
||||
assert_eq!(&wav[4..8], &(36 + 32000u32).to_le_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wav_encoding_empty_is_minimal() {
|
||||
let wav = encode_wav(&[]);
|
||||
assert_eq!(wav.len(), 44);
|
||||
assert_eq!(&wav[4..8], &36u32.to_le_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_suffix_detected_and_stripped() {
|
||||
assert_eq!(split_send_suffix("send it"), ("", true));
|
||||
assert_eq!(split_send_suffix("Send It!"), ("", true));
|
||||
assert_eq!(split_send_suffix("发送"), ("", true));
|
||||
assert_eq!(split_send_suffix("發送。"), ("", true));
|
||||
assert_eq!(
|
||||
split_send_suffix("ship the fix, send it"),
|
||||
("ship the fix", true)
|
||||
);
|
||||
assert_eq!(
|
||||
split_send_suffix("修复这个问题,发送"),
|
||||
("修复这个问题", true)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_suffix_leaves_plain_text_alone() {
|
||||
assert_eq!(split_send_suffix("send it now"), ("send it now", false));
|
||||
assert_eq!(
|
||||
split_send_suffix("帮我发送一封邮件"),
|
||||
("帮我发送一封邮件", false)
|
||||
);
|
||||
assert_eq!(split_send_suffix("发送邮件"), ("发送邮件", false));
|
||||
assert_eq!(
|
||||
split_send_suffix("resend it to the queue"),
|
||||
("resend it to the queue", false)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recorder_detection_does_not_crash() {
|
||||
// Just verify the function runs without panicking
|
||||
let _ = is_available();
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,9 @@ pub use traits::CommandInfo;
|
||||
// Long-standing public paths that predate the group layout.
|
||||
pub use groups::project::share;
|
||||
|
||||
// Voice capture plumbing shared with the hotbar and the UI event loop.
|
||||
pub use groups::core::voice;
|
||||
|
||||
use crate::tui::app::{App, AppAction};
|
||||
|
||||
/// Result of executing a command
|
||||
@@ -664,6 +667,49 @@ mod tests {
|
||||
assert!(result.message.unwrap().contains("off"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn voice_send_and_voice_control_commands_toggle_state() {
|
||||
let mut app = create_test_app();
|
||||
assert!(!app.voice_send_enabled);
|
||||
assert!(!app.voice_control_enabled);
|
||||
|
||||
for invocation in ["/voicesend", "/voice-send", "/yuyinsend", "/语音发送"] {
|
||||
let result = execute(invocation, &mut app);
|
||||
assert!(!result.is_error, "{invocation} should toggle cleanly");
|
||||
assert!(result.action.is_none());
|
||||
assert!(result.message.is_some());
|
||||
}
|
||||
// Four toggles land back at disabled.
|
||||
assert!(!app.voice_send_enabled);
|
||||
|
||||
let result = execute("/voicecontrol", &mut app);
|
||||
assert!(!result.is_error);
|
||||
assert!(app.voice_control_enabled);
|
||||
let result = execute("/voice-control", &mut app);
|
||||
assert!(!result.is_error);
|
||||
assert!(!app.voice_control_enabled);
|
||||
}
|
||||
|
||||
/// `/voice` defers the actual capture to the UI event loop via
|
||||
/// `AppAction::VoiceCapture`, so executing it never records audio.
|
||||
/// On hosts without a recorder it must fail gracefully instead.
|
||||
#[test]
|
||||
fn voice_command_toggles_on_and_off_or_fails_gracefully() {
|
||||
let mut app = create_test_app();
|
||||
let result = execute("/voice", &mut app);
|
||||
if app.voice_enabled {
|
||||
assert!(!result.is_error);
|
||||
assert!(matches!(result.action, Some(AppAction::VoiceCapture)));
|
||||
let off = execute("/voice", &mut app);
|
||||
assert!(!off.is_error);
|
||||
assert!(off.action.is_none());
|
||||
assert!(!app.voice_enabled);
|
||||
} else {
|
||||
assert!(result.is_error);
|
||||
assert!(result.action.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execute_sidebar_toggles_visibility() {
|
||||
let mut app = create_test_app();
|
||||
|
||||
@@ -636,6 +636,24 @@ pub enum MessageId {
|
||||
ToolFamilyVerify,
|
||||
ToolFamilyThink,
|
||||
ToolFamilyGeneric,
|
||||
// Voice commands (/voice, /voice-send, /voice-control)
|
||||
CmdVoiceDescription,
|
||||
CmdVoiceSendDescription,
|
||||
CmdVoiceControlDescription,
|
||||
VoiceEnabled,
|
||||
VoiceDisabled,
|
||||
VoiceSendEnabled,
|
||||
VoiceSendDisabled,
|
||||
VoiceControlEnabled,
|
||||
VoiceControlDisabled,
|
||||
VoiceErrNoAuth,
|
||||
VoiceErrNoRecorder,
|
||||
VoiceErrNetwork,
|
||||
VoiceErrEmptySend,
|
||||
VoiceErrTooShort,
|
||||
VoiceRecording,
|
||||
VoiceProcessing,
|
||||
VoiceTranscribed,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -1041,6 +1059,23 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[
|
||||
MessageId::ToolFamilyVerify,
|
||||
MessageId::ToolFamilyThink,
|
||||
MessageId::ToolFamilyGeneric,
|
||||
MessageId::CmdVoiceDescription,
|
||||
MessageId::CmdVoiceSendDescription,
|
||||
MessageId::CmdVoiceControlDescription,
|
||||
MessageId::VoiceEnabled,
|
||||
MessageId::VoiceDisabled,
|
||||
MessageId::VoiceSendEnabled,
|
||||
MessageId::VoiceSendDisabled,
|
||||
MessageId::VoiceControlEnabled,
|
||||
MessageId::VoiceControlDisabled,
|
||||
MessageId::VoiceErrNoAuth,
|
||||
MessageId::VoiceErrNoRecorder,
|
||||
MessageId::VoiceErrNetwork,
|
||||
MessageId::VoiceErrEmptySend,
|
||||
MessageId::VoiceErrTooShort,
|
||||
MessageId::VoiceRecording,
|
||||
MessageId::VoiceProcessing,
|
||||
MessageId::VoiceTranscribed,
|
||||
];
|
||||
|
||||
pub fn tr(locale: Locale, id: MessageId) -> &'static str {
|
||||
@@ -1774,6 +1809,32 @@ fn english(id: MessageId) -> &'static str {
|
||||
MessageId::ToolFamilyVerify => "verify",
|
||||
MessageId::ToolFamilyThink => "think",
|
||||
MessageId::ToolFamilyGeneric => "tool",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => {
|
||||
"Toggle voice input: record speech and transcribe into the composer"
|
||||
}
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"Toggle voice auto-send: submit when the transcript ends with \"send it\""
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"Toggle voice control: AI-assisted dictation aware of the composer text"
|
||||
}
|
||||
MessageId::VoiceEnabled => "Voice input enabled. Speak to record.",
|
||||
MessageId::VoiceDisabled => "Voice input disabled.",
|
||||
MessageId::VoiceSendEnabled => "Voice auto-send enabled.",
|
||||
MessageId::VoiceSendDisabled => "Voice auto-send disabled.",
|
||||
MessageId::VoiceControlEnabled => "Voice control enabled.",
|
||||
MessageId::VoiceControlDisabled => "Voice control disabled.",
|
||||
MessageId::VoiceErrNoAuth => "Voice: no API key configured for the active provider",
|
||||
MessageId::VoiceErrNoRecorder => {
|
||||
"Voice: no recording tool found. Install sox, arecord, or rec."
|
||||
}
|
||||
MessageId::VoiceErrNetwork => "Voice: transcription request failed",
|
||||
MessageId::VoiceErrEmptySend => "Voice: nothing to send",
|
||||
MessageId::VoiceErrTooShort => "Voice: no speech detected, recording too short",
|
||||
MessageId::VoiceRecording => "🎙 Recording... speak now",
|
||||
MessageId::VoiceProcessing => "🎙 Transcribing...",
|
||||
MessageId::VoiceTranscribed => "🎙 Transcribed",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2375,6 +2436,32 @@ fn vietnamese(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "xác minh",
|
||||
MessageId::ToolFamilyThink => "suy nghĩ",
|
||||
MessageId::ToolFamilyGeneric => "công cụ",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => {
|
||||
"Bật/tắt nhập liệu bằng giọng nói: ghi âm và chuyển thành văn bản"
|
||||
}
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"Bật/tắt tự gửi bằng giọng nói: gửi khi bản ghi kết thúc bằng \"send it\""
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"Bật/tắt điều khiển giọng nói: đọc chính tả có AI hỗ trợ"
|
||||
}
|
||||
MessageId::VoiceEnabled => "Đã bật nhập liệu bằng giọng nói. Hãy nói để ghi âm.",
|
||||
MessageId::VoiceDisabled => "Đã tắt nhập liệu bằng giọng nói.",
|
||||
MessageId::VoiceSendEnabled => "Đã bật tự gửi bằng giọng nói.",
|
||||
MessageId::VoiceSendDisabled => "Đã tắt tự gửi bằng giọng nói.",
|
||||
MessageId::VoiceControlEnabled => "Đã bật điều khiển giọng nói.",
|
||||
MessageId::VoiceControlDisabled => "Đã tắt điều khiển giọng nói.",
|
||||
MessageId::VoiceErrNoAuth => "Giọng nói: nhà cung cấp hiện tại chưa có khóa API",
|
||||
MessageId::VoiceErrNoRecorder => {
|
||||
"Giọng nói: không tìm thấy công cụ ghi âm. Hãy cài sox, arecord hoặc rec."
|
||||
}
|
||||
MessageId::VoiceErrNetwork => "Giọng nói: yêu cầu chuyển giọng nói thất bại",
|
||||
MessageId::VoiceErrEmptySend => "Giọng nói: không có nội dung để gửi",
|
||||
MessageId::VoiceErrTooShort => "Giọng nói: không phát hiện giọng nói, bản ghi quá ngắn",
|
||||
MessageId::VoiceRecording => "🎙 Đang ghi âm... hãy nói",
|
||||
MessageId::VoiceProcessing => "🎙 Đang chuyển thành văn bản...",
|
||||
MessageId::VoiceTranscribed => "🎙 Đã chuyển xong",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2530,6 +2617,28 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "驗證",
|
||||
MessageId::ToolFamilyThink => "思考",
|
||||
MessageId::ToolFamilyGeneric => "工具",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => "切換語音輸入:錄製語音並轉錄為文字",
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"切換語音自動傳送:轉錄以「發送」或「send it」結尾時自動提交"
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"切換語音控制:AI 輔助的語音聽寫(結合當前輸入內容)"
|
||||
}
|
||||
MessageId::VoiceEnabled => "語音輸入已開啟,開始說話即可錄製",
|
||||
MessageId::VoiceDisabled => "語音輸入已關閉",
|
||||
MessageId::VoiceSendEnabled => "語音自動傳送已開啟",
|
||||
MessageId::VoiceSendDisabled => "語音自動傳送已關閉",
|
||||
MessageId::VoiceControlEnabled => "語音控制已開啟",
|
||||
MessageId::VoiceControlDisabled => "語音控制已關閉",
|
||||
MessageId::VoiceErrNoAuth => "語音:目前供應商未設定 API 金鑰",
|
||||
MessageId::VoiceErrNoRecorder => "語音:未找到錄音工具,請安裝 sox、arecord 或 rec",
|
||||
MessageId::VoiceErrNetwork => "語音:轉錄請求失敗",
|
||||
MessageId::VoiceErrEmptySend => "語音:沒有可傳送的內容",
|
||||
MessageId::VoiceErrTooShort => "語音:未偵測到有效語音,錄製時間過短",
|
||||
MessageId::VoiceRecording => "🎙 正在錄音...請說話",
|
||||
MessageId::VoiceProcessing => "🎙 正在轉錄...",
|
||||
MessageId::VoiceTranscribed => "🎙 轉錄完成",
|
||||
other => chinese_simplified(other)?,
|
||||
})
|
||||
}
|
||||
@@ -3090,6 +3199,32 @@ fn japanese(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "検証",
|
||||
MessageId::ToolFamilyThink => "思考",
|
||||
MessageId::ToolFamilyGeneric => "ツール",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => "音声入力の切替:音声を録音してテキストに変換",
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"音声自動送信の切替:転写が「send it」で終わると自動送信"
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"音声コントロールの切替:入力欄を考慮した AI 音声ディクテーション"
|
||||
}
|
||||
MessageId::VoiceEnabled => "音声入力を有効にしました。話すと録音されます。",
|
||||
MessageId::VoiceDisabled => "音声入力を無効にしました。",
|
||||
MessageId::VoiceSendEnabled => "音声自動送信を有効にしました。",
|
||||
MessageId::VoiceSendDisabled => "音声自動送信を無効にしました。",
|
||||
MessageId::VoiceControlEnabled => "音声コントロールを有効にしました。",
|
||||
MessageId::VoiceControlDisabled => "音声コントロールを無効にしました。",
|
||||
MessageId::VoiceErrNoAuth => {
|
||||
"音声:アクティブなプロバイダーに API キーが設定されていません"
|
||||
}
|
||||
MessageId::VoiceErrNoRecorder => {
|
||||
"音声:録音ツールが見つかりません。sox、arecord、rec のいずれかをインストールしてください"
|
||||
}
|
||||
MessageId::VoiceErrNetwork => "音声:文字起こしリクエストに失敗しました",
|
||||
MessageId::VoiceErrEmptySend => "音声:送信する内容がありません",
|
||||
MessageId::VoiceErrTooShort => "音声:音声が検出されませんでした。録音が短すぎます",
|
||||
MessageId::VoiceRecording => "🎙 録音中...お話しください",
|
||||
MessageId::VoiceProcessing => "🎙 文字起こし中...",
|
||||
MessageId::VoiceTranscribed => "🎙 文字起こし完了",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3585,6 +3720,28 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "验证",
|
||||
MessageId::ToolFamilyThink => "思考",
|
||||
MessageId::ToolFamilyGeneric => "工具",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => "切换语音输入:录制语音并转录为文字",
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"切换语音自动发送:转录以「发送」或「send it」结尾时自动提交"
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"切换语音控制:AI 辅助的语音听写(结合当前输入内容)"
|
||||
}
|
||||
MessageId::VoiceEnabled => "语音输入已开启,开始说话即可录制",
|
||||
MessageId::VoiceDisabled => "语音输入已关闭",
|
||||
MessageId::VoiceSendEnabled => "语音自动发送已开启",
|
||||
MessageId::VoiceSendDisabled => "语音自动发送已关闭",
|
||||
MessageId::VoiceControlEnabled => "语音控制已开启",
|
||||
MessageId::VoiceControlDisabled => "语音控制已关闭",
|
||||
MessageId::VoiceErrNoAuth => "语音:当前提供商未配置 API 密钥",
|
||||
MessageId::VoiceErrNoRecorder => "语音:未找到录音工具,请安装 sox、arecord 或 rec",
|
||||
MessageId::VoiceErrNetwork => "语音:转录请求失败",
|
||||
MessageId::VoiceErrEmptySend => "语音:没有可发送的内容",
|
||||
MessageId::VoiceErrTooShort => "语音:未检测到有效语音,录制时间过短",
|
||||
MessageId::VoiceRecording => "🎙 正在录音...请说话",
|
||||
MessageId::VoiceProcessing => "🎙 正在转录...",
|
||||
MessageId::VoiceTranscribed => "🎙 转录完成",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4170,6 +4327,32 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "verificar",
|
||||
MessageId::ToolFamilyThink => "pensar",
|
||||
MessageId::ToolFamilyGeneric => "ferramenta",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => {
|
||||
"Alternar entrada de voz: gravar fala e transcrever para texto"
|
||||
}
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"Alternar envio automático por voz: envia quando a transcrição termina com \"send it\""
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"Alternar controle por voz: ditado assistido por IA"
|
||||
}
|
||||
MessageId::VoiceEnabled => "Entrada de voz ativada. Fale para gravar.",
|
||||
MessageId::VoiceDisabled => "Entrada de voz desativada.",
|
||||
MessageId::VoiceSendEnabled => "Envio automático por voz ativado.",
|
||||
MessageId::VoiceSendDisabled => "Envio automático por voz desativado.",
|
||||
MessageId::VoiceControlEnabled => "Controle por voz ativado.",
|
||||
MessageId::VoiceControlDisabled => "Controle por voz desativado.",
|
||||
MessageId::VoiceErrNoAuth => "Voz: nenhuma chave de API configurada para o provedor ativo",
|
||||
MessageId::VoiceErrNoRecorder => {
|
||||
"Voz: nenhuma ferramenta de gravação encontrada. Instale sox, arecord ou rec."
|
||||
}
|
||||
MessageId::VoiceErrNetwork => "Voz: falha na solicitação de transcrição",
|
||||
MessageId::VoiceErrEmptySend => "Voz: nada para enviar",
|
||||
MessageId::VoiceErrTooShort => "Voz: nenhuma fala detectada, gravação muito curta",
|
||||
MessageId::VoiceRecording => "🎙 Gravando... fale agora",
|
||||
MessageId::VoiceProcessing => "🎙 Transcrevendo...",
|
||||
MessageId::VoiceTranscribed => "🎙 Transcrito",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4765,6 +4948,34 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> {
|
||||
MessageId::ToolFamilyVerify => "verificar",
|
||||
MessageId::ToolFamilyThink => "pensar",
|
||||
MessageId::ToolFamilyGeneric => "herramienta",
|
||||
// Voice commands
|
||||
MessageId::CmdVoiceDescription => {
|
||||
"Alternar entrada de voz: grabar voz y transcribir a texto"
|
||||
}
|
||||
MessageId::CmdVoiceSendDescription => {
|
||||
"Alternar envío automático por voz: envía cuando la transcripción termina con \"send it\""
|
||||
}
|
||||
MessageId::CmdVoiceControlDescription => {
|
||||
"Alternar control por voz: dictado asistido por IA"
|
||||
}
|
||||
MessageId::VoiceEnabled => "Entrada de voz activada. Habla para grabar.",
|
||||
MessageId::VoiceDisabled => "Entrada de voz desactivada.",
|
||||
MessageId::VoiceSendEnabled => "Envío automático por voz activado.",
|
||||
MessageId::VoiceSendDisabled => "Envío automático por voz desactivado.",
|
||||
MessageId::VoiceControlEnabled => "Control por voz activado.",
|
||||
MessageId::VoiceControlDisabled => "Control por voz desactivado.",
|
||||
MessageId::VoiceErrNoAuth => {
|
||||
"Voz: no hay clave de API configurada para el proveedor activo"
|
||||
}
|
||||
MessageId::VoiceErrNoRecorder => {
|
||||
"Voz: no se encontró herramienta de grabación. Instala sox, arecord o rec."
|
||||
}
|
||||
MessageId::VoiceErrNetwork => "Voz: falló la solicitud de transcripción",
|
||||
MessageId::VoiceErrEmptySend => "Voz: nada que enviar",
|
||||
MessageId::VoiceErrTooShort => "Voz: no se detectó voz, grabación demasiado corta",
|
||||
MessageId::VoiceRecording => "🎙 Grabando... habla ahora",
|
||||
MessageId::VoiceProcessing => "🎙 Transcribiendo...",
|
||||
MessageId::VoiceTranscribed => "🎙 Transcrito",
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1481,6 +1481,14 @@ pub struct App {
|
||||
pub cost_currency: CostCurrency,
|
||||
pub composer_density: ComposerDensity,
|
||||
pub composer_border: bool,
|
||||
/// Voice input state — toggled by `/voice` and the voice hotbar action.
|
||||
pub voice_enabled: bool,
|
||||
/// Auto-send after transcription when the transcript ends with an
|
||||
/// explicit send instruction ("send it" / "发送"). Toggled by `/voice-send`.
|
||||
pub voice_send_enabled: bool,
|
||||
/// AI-assisted dictation that sees the current composer text.
|
||||
/// Toggled by `/voice-control`.
|
||||
pub voice_control_enabled: bool,
|
||||
pub transcript_spacing: TranscriptSpacing,
|
||||
pub sidebar_width_percent: u16,
|
||||
pub sidebar_focus: SidebarFocus,
|
||||
@@ -2275,6 +2283,9 @@ impl App {
|
||||
cost_currency,
|
||||
composer_density,
|
||||
composer_border,
|
||||
voice_enabled: false,
|
||||
voice_send_enabled: false,
|
||||
voice_control_enabled: false,
|
||||
transcript_spacing,
|
||||
sidebar_width_percent,
|
||||
sidebar_focus,
|
||||
@@ -5323,6 +5334,11 @@ pub enum AppAction {
|
||||
SwitchWorkspace {
|
||||
workspace: PathBuf,
|
||||
},
|
||||
/// Record from the microphone and route the transcription into the
|
||||
/// composer (or auto-send it). Emitted by `/voice` and the voice hotbar
|
||||
/// action; handled in the UI event loop where the live `Config` supplies
|
||||
/// provider credentials.
|
||||
VoiceCapture,
|
||||
/// Export and share the current session as a web URL.
|
||||
ShareSession {
|
||||
history_len: usize,
|
||||
|
||||
@@ -1141,7 +1141,7 @@ mod tests {
|
||||
|
||||
assert!(command_labels.contains(&"/config"));
|
||||
assert!(command_labels.contains(&"/links"));
|
||||
assert!(!command_labels.contains(&"/voice"));
|
||||
assert!(command_labels.contains(&"/voice"));
|
||||
assert!(!command_labels.contains(&"/set"));
|
||||
assert!(!command_labels.contains(&"/deepseek"));
|
||||
}
|
||||
|
||||
@@ -181,7 +181,7 @@ impl HotbarAction for AppHotbarAction {
|
||||
|
||||
fn is_active(&self, app: &App) -> bool {
|
||||
match self.kind {
|
||||
AppHotbarKind::VoiceToggle => false,
|
||||
AppHotbarKind::VoiceToggle => app.voice_enabled,
|
||||
AppHotbarKind::SessionCompact => app.is_compacting,
|
||||
AppHotbarKind::Mode(mode) => app.mode == mode,
|
||||
AppHotbarKind::ReasoningCycle => {
|
||||
@@ -197,9 +197,12 @@ impl HotbarAction for AppHotbarAction {
|
||||
fn dispatch(&self, app: &mut App) -> Result<HotbarDispatch> {
|
||||
match self.kind {
|
||||
AppHotbarKind::VoiceToggle => {
|
||||
app.status_message =
|
||||
Some("Voice input is not available in this terminal session yet.".to_string());
|
||||
Ok(HotbarDispatch::Handled)
|
||||
let result = crate::commands::voice::voice(app);
|
||||
app.status_message = result.message;
|
||||
match result.action {
|
||||
Some(action) => Ok(HotbarDispatch::AppAction(action)),
|
||||
None => Ok(HotbarDispatch::Handled),
|
||||
}
|
||||
}
|
||||
AppHotbarKind::SessionCompact => {
|
||||
if app.is_compacting {
|
||||
@@ -539,19 +542,36 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn voice_toggle_is_safe_until_voice_input_lands() {
|
||||
fn voice_toggle_dispatches_the_voice_command() {
|
||||
let registry = HotbarActionRegistry::with_builtins();
|
||||
let voice = registry.get("voice.toggle").expect("voice action");
|
||||
let mut app = test_app();
|
||||
|
||||
assert!(!voice.is_active(&app));
|
||||
assert_eq!(
|
||||
voice.dispatch(&mut app).expect("dispatch voice"),
|
||||
HotbarDispatch::Handled
|
||||
);
|
||||
assert_eq!(
|
||||
// The toggle is wired to the /voice command. With a recorder on the
|
||||
// host it arms voice input and defers capture to the UI event loop;
|
||||
// without one it fails gracefully with a localized error. No audio
|
||||
// is recorded in either case.
|
||||
let result = voice.dispatch(&mut app).expect("dispatch voice");
|
||||
assert!(app.status_message.is_some());
|
||||
// The old placeholder message must be gone — voice is implemented.
|
||||
assert_ne!(
|
||||
app.status_message.as_deref(),
|
||||
Some("Voice input is not available in this terminal session yet.")
|
||||
);
|
||||
if app.voice_enabled {
|
||||
assert_eq!(
|
||||
result,
|
||||
HotbarDispatch::AppAction(crate::tui::app::AppAction::VoiceCapture)
|
||||
);
|
||||
assert!(voice.is_active(&app));
|
||||
// A second press toggles voice input back off.
|
||||
let off = voice.dispatch(&mut app).expect("dispatch voice off");
|
||||
assert_eq!(off, HotbarDispatch::Handled);
|
||||
assert!(!app.voice_enabled);
|
||||
assert!(!voice.is_active(&app));
|
||||
} else {
|
||||
assert_eq!(result, HotbarDispatch::Handled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6431,6 +6431,28 @@ async fn apply_command_result(
|
||||
let queued = build_queued_message(app, content);
|
||||
submit_or_steer_message(app, config, engine_handle, queued).await?;
|
||||
}
|
||||
AppAction::VoiceCapture => {
|
||||
use commands::voice::VoiceCaptureOutcome;
|
||||
match commands::voice::capture_and_transcribe(app, config).await {
|
||||
Ok(VoiceCaptureOutcome::Insert(text)) => {
|
||||
app.insert_str(&text);
|
||||
app.status_message = Some(format!(
|
||||
"{}: {text}",
|
||||
tr(app.ui_locale, MessageId::VoiceTranscribed)
|
||||
));
|
||||
}
|
||||
Ok(VoiceCaptureOutcome::Send(content)) => {
|
||||
app.status_message =
|
||||
Some(tr(app.ui_locale, MessageId::VoiceTranscribed).to_string());
|
||||
let queued = build_queued_message(app, content);
|
||||
submit_or_steer_message(app, config, engine_handle, queued).await?;
|
||||
}
|
||||
Err(err) => {
|
||||
app.voice_enabled = false;
|
||||
app.status_message = Some(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
AppAction::ListSubAgents => {
|
||||
let _ = engine_handle.send(Op::ListSubAgents).await;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user