Skip to content

Commit

Permalink
ops: Add OTEL metric for STT latency
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Dec 14, 2024
1 parent beb893e commit a2b484c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
22 changes: 20 additions & 2 deletions app/helpers/call_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
SpanAttributeEnum,
call_answer_latency,
call_cutoff_latency,
call_stt_complete_latency,
gauge_set,
tracer,
)
Expand Down Expand Up @@ -233,6 +234,17 @@ async def _commit_answer(
if wait:
await last_chat

async def _compute_stt_metrics() -> None:
"""
Report the recognition latency.
"""
start = time.monotonic()
await stt_complete_gate.wait()
gauge_set(
metric=call_stt_complete_latency,
value=time.monotonic() - start,
)

async def _response_callback(_retry: bool = False) -> None:
"""
Triggered when the audio buffer needs to be processed.
Expand All @@ -243,6 +255,9 @@ async def _response_callback(_retry: bool = False) -> None:
nonlocal answer_start
answer_start = time.monotonic()

# Report the STT metrics
stt_metrics_task = asyncio.create_task(_compute_stt_metrics())

# Wait the complete recognition for 50ms maximum
try:
await asyncio.wait_for(stt_complete_gate.wait(), timeout=0.05)
Expand Down Expand Up @@ -277,8 +292,11 @@ async def _response_callback(_retry: bool = False) -> None:
)
)

# Process the response
await _commit_answer(wait=True)
# Process the response and wait for latency metrics
await asyncio.gather(
_commit_answer(wait=False),
stt_metrics_task,
)

# First call
if len(call.messages) <= 1:
Expand Down
3 changes: 3 additions & 0 deletions app/helpers/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class SpanMeterEnum(str, Enum):
"""Audio frames in latency in seconds."""
CALL_FRAMES_OUT_LATENCY = "call.frames.out.latency"
"""Audio frames out latency in seconds."""
CALL_STT_COMPLETE_LATENCY = "call.stt.complete.latency"
"""Speech-to-text missed complete latency."""

def counter(
self,
Expand Down Expand Up @@ -127,6 +129,7 @@ def gauge(
call_cutoff_latency = SpanMeterEnum.CALL_CUTOFF_LATENCY.gauge("s")
call_frames_in_latency = SpanMeterEnum.CALL_FRAMES_IN_LATENCY.gauge("s")
call_frames_out_latency = SpanMeterEnum.CALL_FRAMES_OUT_LATENCY.gauge("s")
call_stt_complete_latency = SpanMeterEnum.CALL_STT_COMPLETE_LATENCY.gauge("s")


def gauge_set(
Expand Down

0 comments on commit a2b484c

Please sign in to comment.