From 6ada07a61f6ef2351384e33cf71ea8e89fcc8e08 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Wed, 18 Jun 2025 09:23:31 -0300 Subject: [PATCH] refactor: remove conditional voice mode support from backend and frontend (#8597) * revert changes that made voice mode conditional to the presence of webrtcvad * refactor: lazy-load webrtcvad dependency * refactor: remove try-except block and directly import voice_mode_router * [autofix.ci] apply automated fixes * feat: add LRU caching to voice activity detector initialization --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- src/backend/base/langflow/api/router.py | 9 ++------- src/backend/base/langflow/api/utils.py | 8 -------- src/backend/base/langflow/api/v1/__init__.py | 9 ++------- src/backend/base/langflow/api/v1/endpoints.py | 3 +-- src/backend/base/langflow/api/v1/schemas.py | 1 - .../base/langflow/api/v1/voice_mode.py | 20 +++++++++++++++---- .../API/queries/config/use-get-config.ts | 6 +----- .../components/voice-button.tsx | 8 -------- src/frontend/src/stores/flowsManagerStore.ts | 2 -- .../src/types/zustand/flowsManager/index.ts | 2 -- 10 files changed, 22 insertions(+), 46 deletions(-) diff --git a/src/backend/base/langflow/api/router.py b/src/backend/base/langflow/api/router.py index c236f5ae5..df7b2aebe 100644 --- a/src/backend/base/langflow/api/router.py +++ b/src/backend/base/langflow/api/router.py @@ -19,6 +19,7 @@ from langflow.api.v1 import ( validate_router, variables_router, ) +from langflow.api.v1.voice_mode import router as voice_mode_router from langflow.api.v2 import files_router as files_router_v2 from langflow.api.v2 import mcp_router as mcp_router_v2 @@ -45,18 +46,12 @@ router_v1.include_router(folders_router) router_v1.include_router(projects_router) router_v1.include_router(starter_projects_router) router_v1.include_router(mcp_router) +router_v1.include_router(voice_mode_router) router_v1.include_router(mcp_projects_router) router_v2.include_router(files_router_v2) router_v2.include_router(mcp_router_v2) -try: - from langflow.api.v1.voice_mode import router as voice_mode_router - - router_v1.include_router(voice_mode_router) -except ImportError: - pass - router = APIRouter( prefix="/api", ) diff --git a/src/backend/base/langflow/api/utils.py b/src/backend/base/langflow/api/utils.py index 406f61257..d78edd6cd 100644 --- a/src/backend/base/langflow/api/utils.py +++ b/src/backend/base/langflow/api/utils.py @@ -378,11 +378,3 @@ async def verify_public_flow_and_get_user(flow_id: uuid.UUID, client_id: str | N raise HTTPException(status_code=403, detail=msg) return user, new_flow_id - - -def get_voice_mode_enabled() -> bool: - try: - import webrtcvad # noqa: F401 - except ImportError: - return False - return True diff --git a/src/backend/base/langflow/api/v1/__init__.py b/src/backend/base/langflow/api/v1/__init__.py index 95e33c715..ad276df48 100644 --- a/src/backend/base/langflow/api/v1/__init__.py +++ b/src/backend/base/langflow/api/v1/__init__.py @@ -14,6 +14,7 @@ from langflow.api.v1.store import router as store_router from langflow.api.v1.users import router as users_router from langflow.api.v1.validate import router as validate_router from langflow.api.v1.variable import router as variables_router +from langflow.api.v1.voice_mode import router as voice_mode_router __all__ = [ "api_key_router", @@ -32,11 +33,5 @@ __all__ = [ "users_router", "validate_router", "variables_router", + "voice_mode_router", ] - -try: - from langflow.api.v1.voice_mode import router as voice_mode_router - - __all__ += ["voice_mode_router"] -except ImportError: - pass diff --git a/src/backend/base/langflow/api/v1/endpoints.py b/src/backend/base/langflow/api/v1/endpoints.py index 011a78556..3e8585148 100644 --- a/src/backend/base/langflow/api/v1/endpoints.py +++ b/src/backend/base/langflow/api/v1/endpoints.py @@ -14,7 +14,7 @@ from fastapi.responses import StreamingResponse from loguru import logger from sqlmodel import select -from langflow.api.utils import CurrentActiveUser, DbSession, get_voice_mode_enabled, parse_value +from langflow.api.utils import CurrentActiveUser, DbSession, parse_value from langflow.api.v1.schemas import ( ConfigResponse, CustomComponentRequest, @@ -752,7 +752,6 @@ async def get_config(): return { "feature_flags": FEATURE_FLAGS, **settings_service.settings.model_dump(), - "voice_mode_enabled": get_voice_mode_enabled(), } except Exception as exc: raise HTTPException(status_code=500, detail=str(exc)) from exc diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index d828d7feb..1bef87e9d 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -388,7 +388,6 @@ class ConfigResponse(BaseModel): public_flow_cleanup_interval: int public_flow_expiration: int event_delivery: Literal["polling", "streaming", "direct"] - voice_mode_enabled: bool class CancelFlowResponse(BaseModel): diff --git a/src/backend/base/langflow/api/v1/voice_mode.py b/src/backend/base/langflow/api/v1/voice_mode.py index 262c1237c..429b94800 100644 --- a/src/backend/base/langflow/api/v1/voice_mode.py +++ b/src/backend/base/langflow/api/v1/voice_mode.py @@ -7,14 +7,13 @@ import traceback import uuid from collections import defaultdict from datetime import datetime, timezone -from functools import partial +from functools import lru_cache, partial from typing import Any from uuid import UUID, uuid4 import numpy as np import requests import sqlalchemy -import webrtcvad import websockets from cryptography.fernet import InvalidToken from elevenlabs import ElevenLabs @@ -83,6 +82,13 @@ CLIENT_TO_LF = "Client → LF" # --- Helper Functions --- +@lru_cache(maxsize=1) +def get_vad(): + import webrtcvad + + return webrtcvad.Vad(mode=3) + + async def authenticate_and_get_openai_key(session: DbSession, user: User, websocket: WebSocket): """Authenticate the user using a token or API key and retrieve the OpenAI API key. @@ -129,7 +135,7 @@ class VoiceConfig: self.elevenlabs_model = "eleven_multilingual_v2" self.elevenlabs_client = None self.elevenlabs_key = None - self.barge_in_enabled = False + self._barge_in_enabled = False self.progress_enabled = True self.default_openai_realtime_session = { @@ -151,6 +157,12 @@ class VoiceConfig: } self.openai_realtime_session: dict[str, Any] = {} + @property + def barge_in_enabled(self): + # Later on we may want to tie this value + # to the availability of the webrtcvad package. + return self._barge_in_enabled + def get_session_dict(self): return dict(self.default_openai_realtime_session) @@ -765,11 +777,11 @@ async def flow_as_tool_websocket( vad_queue: asyncio.Queue = asyncio.Queue() vad_audio_buffer = bytearray() bot_speaking_flag = [False] - vad = webrtcvad.Vad(mode=3) async def process_vad_audio() -> None: nonlocal vad_audio_buffer last_speech_time = datetime.now(tz=timezone.utc) + vad = get_vad() while True: base64_data = await vad_queue.get() raw_chunk_24k = base64.b64decode(base64_data) diff --git a/src/frontend/src/controllers/API/queries/config/use-get-config.ts b/src/frontend/src/controllers/API/queries/config/use-get-config.ts index dfd6eaa66..53c66064f 100644 --- a/src/frontend/src/controllers/API/queries/config/use-get-config.ts +++ b/src/frontend/src/controllers/API/queries/config/use-get-config.ts @@ -21,7 +21,6 @@ export interface ConfigResponse { webhook_polling_interval: number; serialization_max_items_length: number; event_delivery: EventDeliveryType; - voice_mode_enabled: boolean; } export const useGetConfig: useQueryFunctionType = ( @@ -45,9 +44,7 @@ export const useGetConfig: useQueryFunctionType = ( (state) => state.setWebhookPollingInterval, ); const setEventDelivery = useUtilityStore((state) => state.setEventDelivery); - const setVoiceModeEnabled = useFlowsManagerStore( - (state) => state.setVoiceModeEnabled, - ); + const { query } = UseRequestProcessor(); const getConfigFn = async () => { @@ -69,7 +66,6 @@ export const useGetConfig: useQueryFunctionType = ( data.webhook_polling_interval ?? DEFAULT_POLLING_INTERVAL, ); setEventDelivery(data.event_delivery ?? EventDeliveryType.POLLING); - setVoiceModeEnabled(Boolean(data.voice_mode_enabled)); } return data; }; diff --git a/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx b/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx index 561bc7606..be7383b4c 100644 --- a/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx +++ b/src/frontend/src/modals/IOModal/components/chatView/chatInput/components/voice-assistant/components/voice-button.tsx @@ -1,7 +1,6 @@ import ForwardedIconComponent from "@/components/common/genericIconComponent"; import { Button } from "@/components/ui/button"; import { ICON_STROKE_WIDTH } from "@/constants/constants"; -import useFlowsManagerStore from "@/stores/flowsManagerStore"; import { useVoiceStore } from "@/stores/voiceStore"; interface VoiceButtonProps { @@ -12,13 +11,6 @@ const VoiceButton = ({ toggleRecording }: VoiceButtonProps) => { const setNewSessionCloseVoiceAssistant = useVoiceStore( (state) => state.setNewSessionCloseVoiceAssistant, ); - const voiceModeEnabled = useFlowsManagerStore( - (state) => state.voiceModeEnabled, - ); - - if (!voiceModeEnabled) { - return null; - } return ( <> diff --git a/src/frontend/src/stores/flowsManagerStore.ts b/src/frontend/src/stores/flowsManagerStore.ts index 2b6309f46..4336080c3 100644 --- a/src/frontend/src/stores/flowsManagerStore.ts +++ b/src/frontend/src/stores/flowsManagerStore.ts @@ -17,8 +17,6 @@ const past = {}; const future = {}; const useFlowsManagerStore = create((set, get) => ({ - voiceModeEnabled: false, - setVoiceModeEnabled: (voiceModeEnabled: boolean) => set({ voiceModeEnabled }), IOModalOpen: false, setIOModalOpen: (IOModalOpen: boolean) => { set({ IOModalOpen }); diff --git a/src/frontend/src/types/zustand/flowsManager/index.ts b/src/frontend/src/types/zustand/flowsManager/index.ts index e236944a7..5a15565f4 100644 --- a/src/frontend/src/types/zustand/flowsManager/index.ts +++ b/src/frontend/src/types/zustand/flowsManager/index.ts @@ -29,8 +29,6 @@ export type FlowsManagerStoreType = { IOModalOpen: boolean; setIOModalOpen: (IOModalOpen: boolean) => void; resetStore: () => void; - voiceModeEnabled: boolean; - setVoiceModeEnabled: (voiceModeEnabled: boolean) => void; }; export type UseUndoRedoOptions = {