From 8c0f314a1ea2c8584d86ed55ba6d6b25aa1c57d6 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 08:27:54 -0500 Subject: [PATCH 01/20] PYTHON-5846 Consolidate CMAP, heartbeat, and SDAM telemetry into _telemetry.py Add _CmapTelemetry, _HeartbeatTelemetry, and _SdamTelemetry classes to eliminate the repetitive if-enabled_for_cmap / if-logger.isEnabledFor boilerplate spread across pool.py, monitor.py, topology.py, and server.py. --- pymongo/_telemetry.py | 340 ++++++++++++++++++++++++++++++- pymongo/asynchronous/monitor.py | 67 ++---- pymongo/asynchronous/pool.py | 285 ++++---------------------- pymongo/asynchronous/server.py | 26 +-- pymongo/asynchronous/topology.py | 120 ++--------- pymongo/synchronous/monitor.py | 67 ++---- pymongo/synchronous/pool.py | 285 ++++---------------------- pymongo/synchronous/server.py | 26 +-- pymongo/synchronous/topology.py | 120 ++--------- 9 files changed, 466 insertions(+), 870 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 471a013eb9..c796041487 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -18,16 +18,26 @@ import datetime import logging +import queue from collections.abc import MutableMapping from typing import TYPE_CHECKING, Any, Optional -from pymongo.logger import _COMMAND_LOGGER, _CommandStatusMessage, _debug_log +from pymongo.logger import ( + _COMMAND_LOGGER, + _CONNECTION_LOGGER, + _SDAM_LOGGER, + _CommandStatusMessage, + _ConnectionStatusMessage, + _debug_log, + _SDAMStatusMessage, + _verbose_connection_error_reason, +) from pymongo.pool_shared import _ConnectionTelemetryInfo if TYPE_CHECKING: from bson.objectid import ObjectId from pymongo.monitoring import _EventListeners - from pymongo.typings import _DocumentOut + from pymongo.typings import _Address, _DocumentOut class _CommandTelemetry: @@ -183,3 +193,329 @@ def failed( service_id=self._conn.service_id, database_name=self._dbname, ) + + +class _CmapTelemetry: + """Combines CMAP structured logging and APM event publishing for pool and connection events.""" + + __slots__ = ("_address", "_client_id", "_listeners", "_should_log", "_should_publish") + + def __init__( + self, + client_id: Optional[ObjectId], + address: _Address, + listeners: Optional[_EventListeners], + is_sdam: bool, + ) -> None: + self._client_id = client_id + self._address = address + self._listeners = listeners + self._should_publish = not is_sdam and listeners is not None and listeners.enabled_for_cmap + self._should_log = not is_sdam + + def _log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: + if self._should_log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _CONNECTION_LOGGER, + message=message, + clientId=self._client_id, + serverHost=self._address[0], + serverPort=self._address[1], + **extra, + ) + + def pool_created(self, non_default_options: dict[str, Any]) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_pool_created(self._address, non_default_options) + + def pool_ready(self) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log(_ConnectionStatusMessage.POOL_READY) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_pool_ready(self._address) + + def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bool) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_pool_cleared( + self._address, + service_id=service_id, + interrupt_connections=interrupt_connections, + ) + + def pool_closed(self) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log(_ConnectionStatusMessage.POOL_CLOSED) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_pool_closed(self._address) + + def connection_created(self, conn_id: int) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_created(self._address, conn_id) + + def connection_ready(self, conn_id: int, duration: float) -> None: + # Log before publishing to prevent potential listener preemption in tests. + self._log( + _ConnectionStatusMessage.CONN_READY, + driverConnectionId=conn_id, + durationMS=duration, + ) + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_ready(self._address, conn_id, duration) + + def connection_closed(self, conn_id: int, reason: str) -> None: + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_closed(self._address, conn_id, reason) + self._log( + _ConnectionStatusMessage.CONN_CLOSED, + driverConnectionId=conn_id, + reason=_verbose_connection_error_reason(reason), + error=reason, + ) + + def checkout_started(self) -> None: + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_check_out_started(self._address) + self._log(_ConnectionStatusMessage.CHECKOUT_STARTED) + + def checkout_succeeded(self, conn_id: int, duration: float) -> None: + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_checked_out(self._address, conn_id, duration) + self._log( + _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, + driverConnectionId=conn_id, + durationMS=duration, + ) + + def checkout_failed(self, reason: str, error: str, duration: float) -> None: + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_check_out_failed(self._address, error, duration) + self._log( + _ConnectionStatusMessage.CHECKOUT_FAILED, + reason=reason, + error=error, + durationMS=duration, + ) + + def checked_in(self, conn_id: int) -> None: + if self._should_publish: + assert self._listeners is not None + self._listeners.publish_connection_checked_in(self._address, conn_id) + self._log(_ConnectionStatusMessage.CHECKEDIN, driverConnectionId=conn_id) + + +class _HeartbeatTelemetry: + """Combines SDAM structured logging and APM event publishing for server heartbeats. + + The APM started event is published before connection checkout (no conn_id yet); + the log entry for started is emitted after checkout once the conn_id is known. + Call :meth:`apm_started` first, then :meth:`log_started` inside the checkout + context, then :meth:`succeeded` or :meth:`failed` when the outcome is known. + """ + + __slots__ = ("_address", "_awaited", "_listeners", "_publish", "_topology_id") + + def __init__( + self, + topology_id: ObjectId, + address: _Address, + listeners: Optional[_EventListeners], + publish: bool, + awaited: bool, + ) -> None: + self._topology_id = topology_id + self._address = address + self._listeners = listeners + self._publish = publish + self._awaited = awaited + + def apm_started(self) -> None: + """Publish the APM heartbeat-started event (before connection checkout).""" + if self._publish: + assert self._listeners is not None + self._listeners.publish_server_heartbeat_started(self._address, self._awaited) + + def log_started(self, conn_id: int, server_conn_id: Optional[int]) -> None: + """Emit the log entry for heartbeat started (after connection checkout).""" + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.HEARTBEAT_START, + topologyId=self._topology_id, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + serverHost=self._address[0], + serverPort=self._address[1], + awaited=self._awaited, + ) + + def succeeded( + self, + round_trip_time: float, + response: Any, + conn_id: int, + server_conn_id: Optional[int], + ) -> None: + """Emit the SUCCEEDED log entry and APM event.""" + if self._publish: + assert self._listeners is not None + self._listeners.publish_server_heartbeat_succeeded( + self._address, round_trip_time, response, self._awaited + ) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.HEARTBEAT_SUCCESS, + topologyId=self._topology_id, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + serverHost=self._address[0], + serverPort=self._address[1], + awaited=self._awaited, + durationMS=round_trip_time * 1000, + reply=response.document, + ) + + def failed(self, duration: float, error: Exception, conn_id: Optional[int]) -> None: + """Emit the FAILED log entry and APM event.""" + if self._publish: + assert self._listeners is not None + self._listeners.publish_server_heartbeat_failed( + self._address, duration, error, self._awaited + ) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.HEARTBEAT_FAIL, + topologyId=self._topology_id, + serverHost=self._address[0], + serverPort=self._address[1], + awaited=self._awaited, + durationMS=duration * 1000, + failure=error, + driverConnectionId=conn_id, + ) + + +class _SdamTelemetry: + """Combines SDAM structured logging and APM event publishing for topology and server events. + + Topology events are queued for asynchronous delivery; log entries are emitted inline. + """ + + __slots__ = ("_events", "_listeners", "_publish_server", "_publish_tp", "_topology_id") + + def __init__( + self, + topology_id: ObjectId, + listeners: Optional[_EventListeners], + events: Optional[queue.Queue[Any]], + ) -> None: + self._topology_id = topology_id + self._listeners = listeners + self._events = events + self._publish_server = listeners is not None and listeners.enabled_for_server + self._publish_tp = listeners is not None and listeners.enabled_for_topology + + def _enqueue(self, fn: Any, args: tuple[Any, ...]) -> None: + if self._events is not None: + self._events.put((fn, args)) + + def topology_opened(self) -> None: + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.START_TOPOLOGY, + topologyId=self._topology_id, + ) + if self._publish_tp: + assert self._listeners is not None + self._enqueue(self._listeners.publish_topology_opened, (self._topology_id,)) + + def topology_description_changed(self, old_td: Any, new_td: Any) -> None: + if self._publish_tp: + assert self._listeners is not None + self._enqueue( + self._listeners.publish_topology_description_changed, + (old_td, new_td, self._topology_id), + ) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.TOPOLOGY_CHANGE, + topologyId=self._topology_id, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) + + def topology_closed(self, old_td: Any, new_td: Any) -> None: + """Emit APM and log events for topology description change + topology closed.""" + if self._publish_tp: + assert self._listeners is not None + self._enqueue( + self._listeners.publish_topology_description_changed, + (old_td, new_td, self._topology_id), + ) + self._enqueue(self._listeners.publish_topology_closed, (self._topology_id,)) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.TOPOLOGY_CHANGE, + topologyId=self._topology_id, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.STOP_TOPOLOGY, + topologyId=self._topology_id, + ) + + def server_opened(self, address: _Address) -> None: + if self._publish_server: + assert self._listeners is not None + self._enqueue(self._listeners.publish_server_opened, (address, self._topology_id)) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.START_SERVER, + topologyId=self._topology_id, + serverHost=address[0], + serverPort=address[1], + ) + + def server_description_changed(self, sd_old: Any, sd_new: Any, address: _Address) -> None: + if self._publish_server: + assert self._listeners is not None + self._enqueue( + self._listeners.publish_server_description_changed, + (sd_old, sd_new, address, self._topology_id), + ) + + def server_closed(self, address: _Address) -> None: + if self._publish_server: + assert self._listeners is not None + self._enqueue(self._listeners.publish_server_closed, (address, self._topology_id)) + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=_SDAMStatusMessage.STOP_SERVER, + topologyId=self._topology_id, + serverHost=address[0], + serverPort=address[1], + ) diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index 45c12b219f..f6b636a7ae 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -18,18 +18,18 @@ import asyncio import atexit -import logging import time import weakref from typing import TYPE_CHECKING, Any, Optional from pymongo import common, periodic_executor from pymongo._csot import MovingMinimum +from pymongo._telemetry import _HeartbeatTelemetry from pymongo.asynchronous.srv_resolver import _SrvResolver from pymongo.errors import NetworkTimeout, _OperationCancelled from pymongo.hello import Hello from pymongo.lock import _async_create_lock -from pymongo.logger import _SDAM_LOGGER, _debug_log, _SDAMStatusMessage +from pymongo.logger import _SDAM_LOGGER, _debug_log from pymongo.periodic_executor import _shutdown_executors from pymongo.pool_options import _is_faas from pymongo.read_preferences import MovingAverage @@ -154,6 +154,7 @@ def __init__( self._publish = self._listeners is not None and self._listeners.enabled_for_server_heartbeat self._cancel_context: Optional[_CancellationContext] = None self._conn_id: Optional[int] = None + self._current_hb: Optional[_HeartbeatTelemetry] = None self._rtt_monitor = _RttMonitor( topology, topology_settings, @@ -257,6 +258,7 @@ async def _check_server(self) -> ServerDescription: Returns a ServerDescription. """ self._conn_id = None + self._current_hb = None start = time.monotonic() try: return await self._check_once() @@ -264,25 +266,10 @@ async def _check_server(self) -> ServerDescription: raise except Exception as error: _sanitize(error) - sd = self._server_description - address = sd.address + address = self._server_description.address duration = _monotonic_duration(start) - awaited = bool(self._stream and sd.is_server_type_known and sd.topology_version) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_failed(address, duration, error, awaited) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_FAIL, - topologyId=self._topology._topology_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - durationMS=duration * 1000, - failure=error, - driverConnectionId=self._conn_id, - ) + if self._current_hb is not None: + self._current_hb.failed(duration, error, self._conn_id) await self._reset_connection() if isinstance(error, _OperationCancelled): raise @@ -303,25 +290,16 @@ async def _check_once(self) -> ServerDescription: awaited = bool( self._pool.conns and self._stream and sd.is_server_type_known and sd.topology_version ) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_started(address, awaited) + hb = _HeartbeatTelemetry( + self._topology._topology_id, address, self._listeners, self._publish, awaited + ) + self._current_hb = hb + hb.apm_started() if self._cancel_context and self._cancel_context.cancelled: await self._reset_connection() async with self._pool.checkout() as conn: - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_START, - topologyId=self._topology._topology_id, - driverConnectionId=conn.id, - serverConnectionId=conn.server_connection_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - ) - + hb.log_started(conn.id, conn.server_connection_id) self._cancel_context = conn.cancel_context # Record the connection id so we can later attach it to the failed log message. self._conn_id = conn.id @@ -331,24 +309,7 @@ async def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = await self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_succeeded( - address, round_trip_time, response, response.awaitable - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_SUCCESS, - topologyId=self._topology._topology_id, - driverConnectionId=conn.id, - serverConnectionId=conn.server_connection_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - durationMS=round_trip_time * 1000, - reply=response.document, - ) + hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) return sd async def _check_with_socket(self, conn: AsyncConnection) -> tuple[Hello, float]: # type: ignore[type-arg] diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index 4ed3b85dbf..d2f2356a47 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -17,7 +17,6 @@ import asyncio import collections import contextlib -import logging import os import socket import ssl @@ -35,6 +34,7 @@ from bson import DEFAULT_CODEC_OPTIONS from pymongo import _csot, helpers_shared +from pymongo._telemetry import _CmapTelemetry from pymongo.asynchronous.client_session import _validate_session_write_concern from pymongo.asynchronous.command_runner import run_command from pymongo.asynchronous.helpers import _handle_reauth @@ -65,12 +65,6 @@ _async_create_condition, _async_create_lock, ) -from pymongo.logger import ( - _CONNECTION_LOGGER, - _ConnectionStatusMessage, - _debug_log, - _verbose_connection_error_reason, -) from pymongo.monitoring import ( ConnectionCheckOutFailedReason, ConnectionClosedReason, @@ -145,8 +139,7 @@ def __init__( self.hello_ok: bool = False self.is_mongos = False self.listeners = pool.opts._event_listeners - self.enabled_for_cmap = pool.enabled_for_cmap - self.enabled_for_logging = pool.enabled_for_logging + self._telemetry = pool._telemetry self.compression_settings = pool.opts._compression_settings self.compression_context: Union[SnappyContext, ZlibContext, ZstdContext, None] = None self.socket_checker: SocketChecker = SocketChecker() @@ -173,7 +166,6 @@ def __init__( self.active = False self.last_timeout = self.opts.socket_timeout self.connect_rtt = 0.0 - self._client_id = pool._client_id self.creation_time = time.monotonic() # For gossiping $clusterTime from the connection handshake to the client. self._cluster_time = None @@ -476,20 +468,7 @@ async def authenticate(self, reauthenticate: bool = False) -> None: await auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True duration = time.monotonic() - self.creation_time - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_READY, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=self.id, - durationMS=duration, - ) - if self.enabled_for_cmap: - assert self.listeners is not None - self.listeners.publish_connection_ready(self.address, self.id, duration) + self._telemetry.connection_ready(self.id, duration) def validate_session( self, client: Optional[AsyncMongoClient[Any]], session: Optional[AsyncClientSession] @@ -510,20 +489,7 @@ async def close_conn(self, reason: Optional[str]) -> None: return await self._close_conn() if reason: - if self.enabled_for_cmap: - assert self.listeners is not None - self.listeners.publish_connection_closed(self.address, self.id, reason) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=self.id, - reason=_verbose_connection_error_reason(reason), - error=reason, - ) + self._telemetry.connection_closed(self.id, reason) async def _close_conn(self) -> None: """Close this connection.""" @@ -699,13 +665,6 @@ def __init__( self.address = address self.opts = options self.is_sdam = is_sdam - # Don't publish events or logs in Monitor pools. - self.enabled_for_cmap = ( - not self.is_sdam - and self.opts._event_listeners is not None - and self.opts._event_listeners.enabled_for_cmap - ) - self.enabled_for_logging = not self.is_sdam # The first portion of the wait queue. # Enforces: maxPoolSize @@ -721,25 +680,11 @@ def __init__( self._max_connecting_cond = _async_create_condition(self.lock) self._pending = 0 self._max_connecting = self.opts.max_connecting - self._client_id = client_id self._ssl_session_cache: Optional[list[Any]] = ( [None] if self.opts._ssl_context is not None else None ) - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CREATED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - **self.opts.non_default_options, - ) - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_pool_created( - self.address, self.opts.non_default_options - ) + self._telemetry = _CmapTelemetry(client_id, address, options._event_listeners, is_sdam) + self._telemetry.pool_created(self.opts.non_default_options) # Similar to active_sockets but includes threads in the wait queue. self.operation_count: int = 0 # Retain references to pinned connections to prevent the CPython GC @@ -754,17 +699,7 @@ async def ready(self) -> None: async with self.lock: if self.state != PoolState.READY: self.state = PoolState.READY - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_READY, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_pool_ready(self.address) + self._telemetry.pool_ready() @property def closed(self) -> bool: @@ -812,23 +747,7 @@ async def _reset( # and free-threaded Python causes ConnectionCheckOutFailedEvent to # arrive before PoolClearedEvent (PYTHON-3519). if not close and old_state != PoolState.PAUSED: - _listeners = self.opts._event_listeners - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CLEARED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - serviceId=service_id, - ) - if self.enabled_for_cmap: - assert _listeners is not None - _listeners.publish_pool_cleared( - self.address, - service_id=service_id, - interrupt_connections=interrupt_connections, - ) + self._telemetry.pool_cleared(service_id, interrupt_connections) # Clear the wait queue self._max_connecting_cond.notify_all() @@ -838,7 +757,6 @@ async def _reset( for context in self.active_contexts: context.cancel() - listeners = self.opts._event_listeners # CMAP spec says that close() MUST close sockets before publishing the # PoolClosedEvent but that reset() SHOULD close sockets *after* # publishing the PoolClearedEvent. @@ -851,17 +769,7 @@ async def _reset( else: for conn in sockets: await conn.close_conn(ConnectionClosedReason.POOL_CLOSED) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_pool_closed(self.address) + self._telemetry.pool_closed() else: if not _IS_SYNC: await asyncio.gather( @@ -996,20 +904,7 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A tmp_context = _CancellationContext() self.active_contexts.add(tmp_context) - listeners = self.opts._event_listeners - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CREATED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn_id, - ) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_created(self.address, conn_id) + self._telemetry.connection_created(conn_id) try: networking_interface = await _configured_protocol_interface( @@ -1019,22 +914,7 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A except BaseException as error: async with self.lock: self.active_contexts.discard(tmp_context) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_closed( - self.address, conn_id, ConnectionClosedReason.ERROR - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn_id, - reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), - error=ConnectionClosedReason.ERROR, - ) + self._telemetry.connection_closed(conn_id, ConnectionClosedReason.ERROR) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) # Wrap to AutoReconnect/NetworkTimeout BEFORE labeling so the @@ -1096,36 +976,13 @@ async def checkout( :param handler: A _MongoClientErrorHandler. """ - listeners = self.opts._event_listeners checkout_started_time = time.monotonic() - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_check_out_started(self.address) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_STARTED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) + self._telemetry.checkout_started() conn = await self._get_conn(checkout_started_time, handler=handler) duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_checked_out(self.address, conn.id, duration) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_SUCCEEDED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - durationMS=duration, - ) + self._telemetry.checkout_succeeded(conn.id, duration) try: async with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1160,22 +1017,11 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> if self.state != PoolState.READY: if emit_event: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.CONN_ERROR, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="An error occurred while trying to establish a new connection", - error=ConnectionCheckOutFailedReason.CONN_ERROR, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "An error occurred while trying to establish a new connection", + ConnectionCheckOutFailedReason.CONN_ERROR, + duration, + ) details = _get_timeout_details(self.opts) _raise_connection_failure( @@ -1194,22 +1040,11 @@ async def _get_conn( if self.closed: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.POOL_CLOSED, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="Connection pool was closed", - error=ConnectionCheckOutFailedReason.POOL_CLOSED, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "Connection pool was closed", + ConnectionCheckOutFailedReason.POOL_CLOSED, + duration, + ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" ) @@ -1290,22 +1125,11 @@ async def _get_conn( if not emitted_event: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.CONN_ERROR, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="An error occurred while trying to establish a new connection", - error=ConnectionCheckOutFailedReason.CONN_ERROR, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "An error occurred while trying to establish a new connection", + ConnectionCheckOutFailedReason.CONN_ERROR, + duration, + ) raise conn.active = True @@ -1322,21 +1146,9 @@ async def checkin(self, conn: AsyncConnection) -> None: conn.pinned_txn = False conn.pinned_cursor = False self.__pinned_sockets.discard(conn) - listeners = self.opts._event_listeners async with self.lock: self.active_contexts.discard(conn.cancel_context) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_checked_in(self.address, conn.id) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKEDIN, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - ) + self._telemetry.checked_in(conn.id) if self.pid != os.getpid(): await self.reset_without_pause() else: @@ -1344,22 +1156,7 @@ async def checkin(self, conn: AsyncConnection) -> None: await conn.close_conn(ConnectionClosedReason.POOL_CLOSED) elif conn.closed: # CMAP requires the closed event be emitted after the check in. - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_closed( - self.address, conn.id, ConnectionClosedReason.ERROR - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), - error=ConnectionClosedReason.ERROR, - ) + self._telemetry.connection_closed(conn.id, ConnectionClosedReason.ERROR) else: close_conn = False async with self.lock: @@ -1424,24 +1221,12 @@ async def _perished(self, conn: AsyncConnection) -> bool: return False def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: - listeners = self.opts._event_listeners duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.TIMEOUT, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="Wait queue timeout elapsed without a connection becoming available", - error=ConnectionCheckOutFailedReason.TIMEOUT, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "Wait queue timeout elapsed without a connection becoming available", + ConnectionCheckOutFailedReason.TIMEOUT, + duration, + ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: other_ops = self.active_sockets - self.ncursors - self.ntxns diff --git a/pymongo/asynchronous/server.py b/pymongo/asynchronous/server.py index 57158dfc44..dff297dc58 100644 --- a/pymongo/asynchronous/server.py +++ b/pymongo/asynchronous/server.py @@ -16,7 +16,6 @@ from __future__ import annotations -import logging from contextlib import AbstractAsyncContextManager from typing import ( TYPE_CHECKING, @@ -26,13 +25,9 @@ Union, ) +from pymongo._telemetry import _SdamTelemetry from pymongo.asynchronous.command_runner import run_cursor_command from pymongo.asynchronous.helpers import _handle_reauth -from pymongo.logger import ( - _SDAM_LOGGER, - _debug_log, - _SDAMStatusMessage, -) from pymongo.message import _GetMore, _OpMsg, _Query from pymongo.response import PinnedResponse, Response @@ -74,6 +69,7 @@ def __init__( self._events = None if self._publish: self._events = events() # type: ignore[misc] + self._sdam = _SdamTelemetry(topology_id, listeners, self._events) # type: ignore[arg-type] async def open(self) -> None: """Start monitoring, or restart after a fork. @@ -92,23 +88,7 @@ async def close(self) -> None: Reconnect with open(). """ - if self._publish: - assert self._listener is not None - assert self._events is not None - self._events.put( - ( - self._listener.publish_server_closed, - (self._description.address, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.STOP_SERVER, - topologyId=self._topology_id, - serverHost=self._description.address[0], - serverPort=self._description.address[1], - ) + self._sdam.server_closed(self._description.address) await self._monitor.close() await self._pool.close() diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index 6af075cdc2..b3046a7b21 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -30,6 +30,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast from pymongo import _csot, common, helpers_shared, periodic_executor +from pymongo._telemetry import _SdamTelemetry from pymongo.asynchronous.client_session import _ServerSession, _ServerSessionPool from pymongo.asynchronous.monitor import MonitorBase, SrvMonitor from pymongo.asynchronous.pool import Pool @@ -52,10 +53,8 @@ _async_create_lock, ) from pymongo.logger import ( - _SDAM_LOGGER, _SERVER_SELECTION_LOGGER, _debug_log, - _SDAMStatusMessage, _ServerSelectionStatusMessage, ) from pymongo.pool_options import PoolOptions @@ -118,17 +117,9 @@ def __init__(self, topology_settings: TopologySettings): if self._publish_server or self._publish_tp: self._events = queue.Queue(maxsize=100) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.START_TOPOLOGY, - topologyId=self._topology_id, - ) + self._sdam = _SdamTelemetry(self._topology_id, self._listeners, self._events) + self._sdam.topology_opened() - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put((self._listeners.publish_topology_opened, (self._topology_id,))) self._settings = topology_settings topology_description = TopologyDescription( topology_settings.get_topology_type(), @@ -143,37 +134,10 @@ def __init__(self, topology_settings: TopologySettings): initial_td = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, None, None, None, self._settings ) - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (initial_td, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(initial_td), - newDescription=repr(self._description), - ) + self._sdam.topology_description_changed(initial_td, self._description) for seed in topology_settings.seeds: - if self._publish_server: - assert self._events is not None - assert self._listeners is not None - self._events.put((self._listeners.publish_server_opened, (seed, self._topology_id))) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.START_SERVER, - topologyId=self._topology_id, - serverHost=seed[0], - serverPort=seed[1], - ) + self._sdam.server_opened(seed) # Store the seed list to help diagnose errors in _error_message(). self._seed_addresses = list(topology_description.server_descriptions()) @@ -508,36 +472,16 @@ async def _process_change( await server.pool.ready() suppress_event = sd_old == server_description - if self._publish_server and not suppress_event: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_server_description_changed, - (sd_old, server_description, server_description.address, self._topology_id), - ) + if not suppress_event: + self._sdam.server_description_changed( + sd_old, server_description, server_description.address ) self._description = new_td await self._update_servers() - if self._publish_tp and not suppress_event: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (td_old, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG) and not suppress_event: - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(td_old), - newDescription=repr(self._description), - ) + if not suppress_event: + self._sdam.topology_description_changed(td_old, self._description) # Shutdown SRV polling for unsupported cluster types. # This is only applicable if the old topology was Unknown, and the @@ -588,24 +532,7 @@ async def _process_srv_update(self, seedlist: list[tuple[str, Any]]) -> None: self._description = _updated_topology_description_srv_polling(self._description, seedlist) await self._update_servers() - - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (td_old, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(td_old), - newDescription=repr(self._description), - ) + self._sdam.topology_description_changed(td_old, self._description) async def on_srv_update(self, seedlist: list[tuple[str, Any]]) -> None: """Process a new list of nodes obtained from scanning SRV records.""" @@ -744,8 +671,6 @@ async def close(self) -> None: # Publish only after releasing the lock. if self._publish_tp: - assert self._events is not None - assert self._listeners is not None self._description = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, @@ -754,28 +679,7 @@ async def close(self) -> None: self._description.max_election_id, self._description._topology_settings, ) - self._events.put( - ( - self._listeners.publish_topology_description_changed, - ( - old_td, - self._description, - self._topology_id, - ), - ) - ) - self._events.put((self._listeners.publish_topology_closed, (self._topology_id,))) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(old_td), - newDescription=repr(self._description), - ) - _debug_log( - _SDAM_LOGGER, message=_SDAMStatusMessage.STOP_TOPOLOGY, topologyId=self._topology_id - ) + self._sdam.topology_closed(old_td, self._description) if self._publish_server or self._publish_tp: # Make sure the events executor thread is fully closed before publishing the remaining events diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index f395588814..053ec8d8b4 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -18,17 +18,17 @@ import asyncio import atexit -import logging import time import weakref from typing import TYPE_CHECKING, Any, Optional from pymongo import common, periodic_executor from pymongo._csot import MovingMinimum +from pymongo._telemetry import _HeartbeatTelemetry from pymongo.errors import NetworkTimeout, _OperationCancelled from pymongo.hello import Hello from pymongo.lock import _create_lock -from pymongo.logger import _SDAM_LOGGER, _debug_log, _SDAMStatusMessage +from pymongo.logger import _SDAM_LOGGER, _debug_log from pymongo.periodic_executor import _shutdown_executors from pymongo.pool_options import _is_faas from pymongo.read_preferences import MovingAverage @@ -154,6 +154,7 @@ def __init__( self._publish = self._listeners is not None and self._listeners.enabled_for_server_heartbeat self._cancel_context: Optional[_CancellationContext] = None self._conn_id: Optional[int] = None + self._current_hb: Optional[_HeartbeatTelemetry] = None self._rtt_monitor = _RttMonitor( topology, topology_settings, @@ -255,6 +256,7 @@ def _check_server(self) -> ServerDescription: Returns a ServerDescription. """ self._conn_id = None + self._current_hb = None start = time.monotonic() try: return self._check_once() @@ -262,25 +264,10 @@ def _check_server(self) -> ServerDescription: raise except Exception as error: _sanitize(error) - sd = self._server_description - address = sd.address + address = self._server_description.address duration = _monotonic_duration(start) - awaited = bool(self._stream and sd.is_server_type_known and sd.topology_version) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_failed(address, duration, error, awaited) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_FAIL, - topologyId=self._topology._topology_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - durationMS=duration * 1000, - failure=error, - driverConnectionId=self._conn_id, - ) + if self._current_hb is not None: + self._current_hb.failed(duration, error, self._conn_id) self._reset_connection() if isinstance(error, _OperationCancelled): raise @@ -301,25 +288,16 @@ def _check_once(self) -> ServerDescription: awaited = bool( self._pool.conns and self._stream and sd.is_server_type_known and sd.topology_version ) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_started(address, awaited) + hb = _HeartbeatTelemetry( + self._topology._topology_id, address, self._listeners, self._publish, awaited + ) + self._current_hb = hb + hb.apm_started() if self._cancel_context and self._cancel_context.cancelled: self._reset_connection() with self._pool.checkout() as conn: - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_START, - topologyId=self._topology._topology_id, - driverConnectionId=conn.id, - serverConnectionId=conn.server_connection_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - ) - + hb.log_started(conn.id, conn.server_connection_id) self._cancel_context = conn.cancel_context # Record the connection id so we can later attach it to the failed log message. self._conn_id = conn.id @@ -329,24 +307,7 @@ def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - if self._publish: - assert self._listeners is not None - self._listeners.publish_server_heartbeat_succeeded( - address, round_trip_time, response, response.awaitable - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_SUCCESS, - topologyId=self._topology._topology_id, - driverConnectionId=conn.id, - serverConnectionId=conn.server_connection_id, - serverHost=address[0], - serverPort=address[1], - awaited=awaited, - durationMS=round_trip_time * 1000, - reply=response.document, - ) + hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) return sd def _check_with_socket(self, conn: Connection) -> tuple[Hello, float]: # type: ignore[type-arg] diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 1006735444..7eb6125326 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -17,7 +17,6 @@ import asyncio import collections import contextlib -import logging import os import socket import ssl @@ -35,6 +34,7 @@ from bson import DEFAULT_CODEC_OPTIONS from pymongo import _csot, helpers_shared +from pymongo._telemetry import _CmapTelemetry from pymongo.common import ( MAX_BSON_SIZE, MAX_MESSAGE_SIZE, @@ -62,12 +62,6 @@ _create_condition, _create_lock, ) -from pymongo.logger import ( - _CONNECTION_LOGGER, - _ConnectionStatusMessage, - _debug_log, - _verbose_connection_error_reason, -) from pymongo.monitoring import ( ConnectionCheckOutFailedReason, ConnectionClosedReason, @@ -145,8 +139,7 @@ def __init__( self.hello_ok: bool = False self.is_mongos = False self.listeners = pool.opts._event_listeners - self.enabled_for_cmap = pool.enabled_for_cmap - self.enabled_for_logging = pool.enabled_for_logging + self._telemetry = pool._telemetry self.compression_settings = pool.opts._compression_settings self.compression_context: Union[SnappyContext, ZlibContext, ZstdContext, None] = None self.socket_checker: SocketChecker = SocketChecker() @@ -173,7 +166,6 @@ def __init__( self.active = False self.last_timeout = self.opts.socket_timeout self.connect_rtt = 0.0 - self._client_id = pool._client_id self.creation_time = time.monotonic() # For gossiping $clusterTime from the connection handshake to the client. self._cluster_time = None @@ -476,20 +468,7 @@ def authenticate(self, reauthenticate: bool = False) -> None: auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True duration = time.monotonic() - self.creation_time - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_READY, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=self.id, - durationMS=duration, - ) - if self.enabled_for_cmap: - assert self.listeners is not None - self.listeners.publish_connection_ready(self.address, self.id, duration) + self._telemetry.connection_ready(self.id, duration) def validate_session( self, client: Optional[MongoClient[Any]], session: Optional[ClientSession] @@ -508,20 +487,7 @@ def close_conn(self, reason: Optional[str]) -> None: return self._close_conn() if reason: - if self.enabled_for_cmap: - assert self.listeners is not None - self.listeners.publish_connection_closed(self.address, self.id, reason) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=self.id, - reason=_verbose_connection_error_reason(reason), - error=reason, - ) + self._telemetry.connection_closed(self.id, reason) def _close_conn(self) -> None: """Close this connection.""" @@ -697,13 +663,6 @@ def __init__( self.address = address self.opts = options self.is_sdam = is_sdam - # Don't publish events or logs in Monitor pools. - self.enabled_for_cmap = ( - not self.is_sdam - and self.opts._event_listeners is not None - and self.opts._event_listeners.enabled_for_cmap - ) - self.enabled_for_logging = not self.is_sdam # The first portion of the wait queue. # Enforces: maxPoolSize @@ -719,25 +678,11 @@ def __init__( self._max_connecting_cond = _create_condition(self.lock) self._pending = 0 self._max_connecting = self.opts.max_connecting - self._client_id = client_id self._ssl_session_cache: Optional[list[Any]] = ( [None] if self.opts._ssl_context is not None else None ) - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CREATED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - **self.opts.non_default_options, - ) - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_pool_created( - self.address, self.opts.non_default_options - ) + self._telemetry = _CmapTelemetry(client_id, address, options._event_listeners, is_sdam) + self._telemetry.pool_created(self.opts.non_default_options) # Similar to active_sockets but includes threads in the wait queue. self.operation_count: int = 0 # Retain references to pinned connections to prevent the CPython GC @@ -752,17 +697,7 @@ def ready(self) -> None: with self.lock: if self.state != PoolState.READY: self.state = PoolState.READY - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_READY, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_pool_ready(self.address) + self._telemetry.pool_ready() @property def closed(self) -> bool: @@ -810,23 +745,7 @@ def _reset( # and free-threaded Python causes ConnectionCheckOutFailedEvent to # arrive before PoolClearedEvent (PYTHON-3519). if not close and old_state != PoolState.PAUSED: - _listeners = self.opts._event_listeners - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CLEARED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - serviceId=service_id, - ) - if self.enabled_for_cmap: - assert _listeners is not None - _listeners.publish_pool_cleared( - self.address, - service_id=service_id, - interrupt_connections=interrupt_connections, - ) + self._telemetry.pool_cleared(service_id, interrupt_connections) # Clear the wait queue self._max_connecting_cond.notify_all() @@ -836,7 +755,6 @@ def _reset( for context in self.active_contexts: context.cancel() - listeners = self.opts._event_listeners # CMAP spec says that close() MUST close sockets before publishing the # PoolClosedEvent but that reset() SHOULD close sockets *after* # publishing the PoolClearedEvent. @@ -849,17 +767,7 @@ def _reset( else: for conn in sockets: conn.close_conn(ConnectionClosedReason.POOL_CLOSED) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.POOL_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_pool_closed(self.address) + self._telemetry.pool_closed() else: if not _IS_SYNC: asyncio.gather( @@ -992,20 +900,7 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect tmp_context = _CancellationContext() self.active_contexts.add(tmp_context) - listeners = self.opts._event_listeners - # Log before publishing event to prevent potential listener preemption in tests - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CREATED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn_id, - ) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_created(self.address, conn_id) + self._telemetry.connection_created(conn_id) try: networking_interface = _configured_socket_interface( @@ -1015,22 +910,7 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect except BaseException as error: with self.lock: self.active_contexts.discard(tmp_context) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_closed( - self.address, conn_id, ConnectionClosedReason.ERROR - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn_id, - reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), - error=ConnectionClosedReason.ERROR, - ) + self._telemetry.connection_closed(conn_id, ConnectionClosedReason.ERROR) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) # Wrap to AutoReconnect/NetworkTimeout BEFORE labeling so the @@ -1092,36 +972,13 @@ def checkout( :param handler: A _MongoClientErrorHandler. """ - listeners = self.opts._event_listeners checkout_started_time = time.monotonic() - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_check_out_started(self.address) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_STARTED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - ) + self._telemetry.checkout_started() conn = self._get_conn(checkout_started_time, handler=handler) duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_checked_out(self.address, conn.id, duration) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_SUCCEEDED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - durationMS=duration, - ) + self._telemetry.checkout_succeeded(conn.id, duration) try: with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1156,22 +1013,11 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> if self.state != PoolState.READY: if emit_event: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.CONN_ERROR, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="An error occurred while trying to establish a new connection", - error=ConnectionCheckOutFailedReason.CONN_ERROR, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "An error occurred while trying to establish a new connection", + ConnectionCheckOutFailedReason.CONN_ERROR, + duration, + ) details = _get_timeout_details(self.opts) _raise_connection_failure( @@ -1190,22 +1036,11 @@ def _get_conn( if self.closed: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.POOL_CLOSED, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="Connection pool was closed", - error=ConnectionCheckOutFailedReason.POOL_CLOSED, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "Connection pool was closed", + ConnectionCheckOutFailedReason.POOL_CLOSED, + duration, + ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" ) @@ -1286,22 +1121,11 @@ def _get_conn( if not emitted_event: duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert self.opts._event_listeners is not None - self.opts._event_listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.CONN_ERROR, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="An error occurred while trying to establish a new connection", - error=ConnectionCheckOutFailedReason.CONN_ERROR, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "An error occurred while trying to establish a new connection", + ConnectionCheckOutFailedReason.CONN_ERROR, + duration, + ) raise conn.active = True @@ -1318,21 +1142,9 @@ def checkin(self, conn: Connection) -> None: conn.pinned_txn = False conn.pinned_cursor = False self.__pinned_sockets.discard(conn) - listeners = self.opts._event_listeners with self.lock: self.active_contexts.discard(conn.cancel_context) - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_checked_in(self.address, conn.id) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKEDIN, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - ) + self._telemetry.checked_in(conn.id) if self.pid != os.getpid(): self.reset_without_pause() else: @@ -1340,22 +1152,7 @@ def checkin(self, conn: Connection) -> None: conn.close_conn(ConnectionClosedReason.POOL_CLOSED) elif conn.closed: # CMAP requires the closed event be emitted after the check in. - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_closed( - self.address, conn.id, ConnectionClosedReason.ERROR - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CONN_CLOSED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - driverConnectionId=conn.id, - reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), - error=ConnectionClosedReason.ERROR, - ) + self._telemetry.connection_closed(conn.id, ConnectionClosedReason.ERROR) else: close_conn = False with self.lock: @@ -1420,24 +1217,12 @@ def _perished(self, conn: Connection) -> bool: return False def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: - listeners = self.opts._event_listeners duration = time.monotonic() - checkout_started_time - if self.enabled_for_cmap: - assert listeners is not None - listeners.publish_connection_check_out_failed( - self.address, ConnectionCheckOutFailedReason.TIMEOUT, duration - ) - if self.enabled_for_logging and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _CONNECTION_LOGGER, - message=_ConnectionStatusMessage.CHECKOUT_FAILED, - clientId=self._client_id, - serverHost=self.address[0], - serverPort=self.address[1], - reason="Wait queue timeout elapsed without a connection becoming available", - error=ConnectionCheckOutFailedReason.TIMEOUT, - durationMS=duration, - ) + self._telemetry.checkout_failed( + "Wait queue timeout elapsed without a connection becoming available", + ConnectionCheckOutFailedReason.TIMEOUT, + duration, + ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: other_ops = self.active_sockets - self.ncursors - self.ntxns diff --git a/pymongo/synchronous/server.py b/pymongo/synchronous/server.py index 09d8fb75e1..8564b84750 100644 --- a/pymongo/synchronous/server.py +++ b/pymongo/synchronous/server.py @@ -16,7 +16,6 @@ from __future__ import annotations -import logging from contextlib import AbstractContextManager from typing import ( TYPE_CHECKING, @@ -26,11 +25,7 @@ Union, ) -from pymongo.logger import ( - _SDAM_LOGGER, - _debug_log, - _SDAMStatusMessage, -) +from pymongo._telemetry import _SdamTelemetry from pymongo.message import _GetMore, _OpMsg, _Query from pymongo.response import PinnedResponse, Response from pymongo.synchronous.command_runner import run_cursor_command @@ -74,6 +69,7 @@ def __init__( self._events = None if self._publish: self._events = events() # type: ignore[misc] + self._sdam = _SdamTelemetry(topology_id, listeners, self._events) # type: ignore[arg-type] def open(self) -> None: """Start monitoring, or restart after a fork. @@ -92,23 +88,7 @@ def close(self) -> None: Reconnect with open(). """ - if self._publish: - assert self._listener is not None - assert self._events is not None - self._events.put( - ( - self._listener.publish_server_closed, - (self._description.address, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.STOP_SERVER, - topologyId=self._topology_id, - serverHost=self._description.address[0], - serverPort=self._description.address[1], - ) + self._sdam.server_closed(self._description.address) self._monitor.close() self._pool.close() diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index b419833256..4f273f60f0 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -30,6 +30,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast from pymongo import _csot, common, helpers_shared, periodic_executor +from pymongo._telemetry import _SdamTelemetry from pymongo.errors import ( ConnectionFailure, InvalidOperation, @@ -48,10 +49,8 @@ _create_lock, ) from pymongo.logger import ( - _SDAM_LOGGER, _SERVER_SELECTION_LOGGER, _debug_log, - _SDAMStatusMessage, _ServerSelectionStatusMessage, ) from pymongo.pool_options import PoolOptions @@ -118,17 +117,9 @@ def __init__(self, topology_settings: TopologySettings): if self._publish_server or self._publish_tp: self._events = queue.Queue(maxsize=100) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.START_TOPOLOGY, - topologyId=self._topology_id, - ) + self._sdam = _SdamTelemetry(self._topology_id, self._listeners, self._events) + self._sdam.topology_opened() - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put((self._listeners.publish_topology_opened, (self._topology_id,))) self._settings = topology_settings topology_description = TopologyDescription( topology_settings.get_topology_type(), @@ -143,37 +134,10 @@ def __init__(self, topology_settings: TopologySettings): initial_td = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, None, None, None, self._settings ) - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (initial_td, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(initial_td), - newDescription=repr(self._description), - ) + self._sdam.topology_description_changed(initial_td, self._description) for seed in topology_settings.seeds: - if self._publish_server: - assert self._events is not None - assert self._listeners is not None - self._events.put((self._listeners.publish_server_opened, (seed, self._topology_id))) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.START_SERVER, - topologyId=self._topology_id, - serverHost=seed[0], - serverPort=seed[1], - ) + self._sdam.server_opened(seed) # Store the seed list to help diagnose errors in _error_message(). self._seed_addresses = list(topology_description.server_descriptions()) @@ -508,36 +472,16 @@ def _process_change( server.pool.ready() suppress_event = sd_old == server_description - if self._publish_server and not suppress_event: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_server_description_changed, - (sd_old, server_description, server_description.address, self._topology_id), - ) + if not suppress_event: + self._sdam.server_description_changed( + sd_old, server_description, server_description.address ) self._description = new_td self._update_servers() - if self._publish_tp and not suppress_event: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (td_old, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG) and not suppress_event: - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(td_old), - newDescription=repr(self._description), - ) + if not suppress_event: + self._sdam.topology_description_changed(td_old, self._description) # Shutdown SRV polling for unsupported cluster types. # This is only applicable if the old topology was Unknown, and the @@ -588,24 +532,7 @@ def _process_srv_update(self, seedlist: list[tuple[str, Any]]) -> None: self._description = _updated_topology_description_srv_polling(self._description, seedlist) self._update_servers() - - if self._publish_tp: - assert self._events is not None - assert self._listeners is not None - self._events.put( - ( - self._listeners.publish_topology_description_changed, - (td_old, self._description, self._topology_id), - ) - ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(td_old), - newDescription=repr(self._description), - ) + self._sdam.topology_description_changed(td_old, self._description) def on_srv_update(self, seedlist: list[tuple[str, Any]]) -> None: """Process a new list of nodes obtained from scanning SRV records.""" @@ -742,8 +669,6 @@ def close(self) -> None: # Publish only after releasing the lock. if self._publish_tp: - assert self._events is not None - assert self._listeners is not None self._description = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, @@ -752,28 +677,7 @@ def close(self) -> None: self._description.max_election_id, self._description._topology_settings, ) - self._events.put( - ( - self._listeners.publish_topology_description_changed, - ( - old_td, - self._description, - self._topology_id, - ), - ) - ) - self._events.put((self._listeners.publish_topology_closed, (self._topology_id,))) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(old_td), - newDescription=repr(self._description), - ) - _debug_log( - _SDAM_LOGGER, message=_SDAMStatusMessage.STOP_TOPOLOGY, topologyId=self._topology_id - ) + self._sdam.topology_closed(old_td, self._description) if self._publish_server or self._publish_tp: # Make sure the events executor thread is fully closed before publishing the remaining events From 265971cd353b51795b65740729e4f058400dfc31 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 14:06:53 -0500 Subject: [PATCH 02/20] PYTHON-5846 Refine _CmapTelemetry and _HeartbeatTelemetry API - Rename _CmapTelemetry._log -> _emit_log for consistency with _CommandTelemetry - Rename _HeartbeatTelemetry.apm_started -> started to match started/succeeded/failed lifecycle - Rename _HeartbeatTelemetry.log_started -> emit_started_log to signal it is the deferred log-only half - Replace is_sdam flag with separate publish/log bool parameters on _CmapTelemetry --- pymongo/_telemetry.py | 35 +++++++++++++++++---------------- pymongo/asynchronous/monitor.py | 4 ++-- pymongo/asynchronous/pool.py | 4 +++- pymongo/synchronous/monitor.py | 4 ++-- pymongo/synchronous/pool.py | 4 +++- 5 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index c796041487..c358dc62f1 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -205,15 +205,16 @@ def __init__( client_id: Optional[ObjectId], address: _Address, listeners: Optional[_EventListeners], - is_sdam: bool, + publish: bool, + log: bool, ) -> None: self._client_id = client_id self._address = address self._listeners = listeners - self._should_publish = not is_sdam and listeners is not None and listeners.enabled_for_cmap - self._should_log = not is_sdam + self._should_publish = publish and listeners is not None and listeners.enabled_for_cmap + self._should_log = log - def _log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: + def _emit_log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: if self._should_log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): _debug_log( _CONNECTION_LOGGER, @@ -226,21 +227,21 @@ def _log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: def pool_created(self, non_default_options: dict[str, Any]) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) + self._emit_log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_created(self._address, non_default_options) def pool_ready(self) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log(_ConnectionStatusMessage.POOL_READY) + self._emit_log(_ConnectionStatusMessage.POOL_READY) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_ready(self._address) def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bool) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) + self._emit_log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_cleared( @@ -251,21 +252,21 @@ def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bo def pool_closed(self) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log(_ConnectionStatusMessage.POOL_CLOSED) + self._emit_log(_ConnectionStatusMessage.POOL_CLOSED) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_closed(self._address) def connection_created(self, conn_id: int) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) + self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_created(self._address, conn_id) def connection_ready(self, conn_id: int, duration: float) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._log( + self._emit_log( _ConnectionStatusMessage.CONN_READY, driverConnectionId=conn_id, durationMS=duration, @@ -278,7 +279,7 @@ def connection_closed(self, conn_id: int, reason: str) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_closed(self._address, conn_id, reason) - self._log( + self._emit_log( _ConnectionStatusMessage.CONN_CLOSED, driverConnectionId=conn_id, reason=_verbose_connection_error_reason(reason), @@ -289,13 +290,13 @@ def checkout_started(self) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_started(self._address) - self._log(_ConnectionStatusMessage.CHECKOUT_STARTED) + self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) def checkout_succeeded(self, conn_id: int, duration: float) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_out(self._address, conn_id, duration) - self._log( + self._emit_log( _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, driverConnectionId=conn_id, durationMS=duration, @@ -305,7 +306,7 @@ def checkout_failed(self, reason: str, error: str, duration: float) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_failed(self._address, error, duration) - self._log( + self._emit_log( _ConnectionStatusMessage.CHECKOUT_FAILED, reason=reason, error=error, @@ -316,7 +317,7 @@ def checked_in(self, conn_id: int) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_in(self._address, conn_id) - self._log(_ConnectionStatusMessage.CHECKEDIN, driverConnectionId=conn_id) + self._emit_log(_ConnectionStatusMessage.CHECKEDIN, driverConnectionId=conn_id) class _HeartbeatTelemetry: @@ -344,13 +345,13 @@ def __init__( self._publish = publish self._awaited = awaited - def apm_started(self) -> None: + def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" if self._publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_started(self._address, self._awaited) - def log_started(self, conn_id: int, server_conn_id: Optional[int]) -> None: + def emit_started_log(self, conn_id: int, server_conn_id: Optional[int]) -> None: """Emit the log entry for heartbeat started (after connection checkout).""" if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): _debug_log( diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index f6b636a7ae..e97d645c2c 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -294,12 +294,12 @@ async def _check_once(self) -> ServerDescription: self._topology._topology_id, address, self._listeners, self._publish, awaited ) self._current_hb = hb - hb.apm_started() + hb.started() if self._cancel_context and self._cancel_context.cancelled: await self._reset_connection() async with self._pool.checkout() as conn: - hb.log_started(conn.id, conn.server_connection_id) + hb.emit_started_log(conn.id, conn.server_connection_id) self._cancel_context = conn.cancel_context # Record the connection id so we can later attach it to the failed log message. self._conn_id = conn.id diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index d2f2356a47..fbd140baf5 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -683,7 +683,9 @@ def __init__( self._ssl_session_cache: Optional[list[Any]] = ( [None] if self.opts._ssl_context is not None else None ) - self._telemetry = _CmapTelemetry(client_id, address, options._event_listeners, is_sdam) + self._telemetry = _CmapTelemetry( + client_id, address, options._event_listeners, publish=not is_sdam, log=not is_sdam + ) self._telemetry.pool_created(self.opts.non_default_options) # Similar to active_sockets but includes threads in the wait queue. self.operation_count: int = 0 diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index 053ec8d8b4..946cbe08f2 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -292,12 +292,12 @@ def _check_once(self) -> ServerDescription: self._topology._topology_id, address, self._listeners, self._publish, awaited ) self._current_hb = hb - hb.apm_started() + hb.started() if self._cancel_context and self._cancel_context.cancelled: self._reset_connection() with self._pool.checkout() as conn: - hb.log_started(conn.id, conn.server_connection_id) + hb.emit_started_log(conn.id, conn.server_connection_id) self._cancel_context = conn.cancel_context # Record the connection id so we can later attach it to the failed log message. self._conn_id = conn.id diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 7eb6125326..66a500855b 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -681,7 +681,9 @@ def __init__( self._ssl_session_cache: Optional[list[Any]] = ( [None] if self.opts._ssl_context is not None else None ) - self._telemetry = _CmapTelemetry(client_id, address, options._event_listeners, is_sdam) + self._telemetry = _CmapTelemetry( + client_id, address, options._event_listeners, publish=not is_sdam, log=not is_sdam + ) self._telemetry.pool_created(self.opts.non_default_options) # Similar to active_sockets but includes threads in the wait queue. self.operation_count: int = 0 From 55b21ecb157fa15c10be6d8228211062f3e61a61 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 16:27:42 -0500 Subject: [PATCH 03/20] PYTHON-5846 Move duration tracking into telemetry classes _CmapTelemetry now owns connection-creation and checkout durations: connection_created() starts the clock, connection_ready() computes it; checkout_started() starts the clock, checkout_succeeded/failed() compute it. _HeartbeatTelemetry.started() starts the clock; failed() computes its own duration instead of receiving it as a parameter. Removes checkout_started_time from Pool._get_conn, _raise_if_not_ready, and _raise_wait_queue_timeout, and removes AsyncConnection.creation_time. --- pymongo/_telemetry.py | 28 ++++++++++++++++++------ pymongo/asynchronous/monitor.py | 4 +--- pymongo/asynchronous/pool.py | 36 +++++++++++-------------------- pymongo/synchronous/monitor.py | 4 +--- pymongo/synchronous/pool.py | 38 +++++++++++---------------------- 5 files changed, 48 insertions(+), 62 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index c358dc62f1..ce5a6646bb 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -19,6 +19,7 @@ import datetime import logging import queue +import time from collections.abc import MutableMapping from typing import TYPE_CHECKING, Any, Optional @@ -198,7 +199,15 @@ def failed( class _CmapTelemetry: """Combines CMAP structured logging and APM event publishing for pool and connection events.""" - __slots__ = ("_address", "_client_id", "_listeners", "_should_log", "_should_publish") + __slots__ = ( + "_address", + "_checkout_start", + "_client_id", + "_conn_created_start", + "_listeners", + "_should_log", + "_should_publish", + ) def __init__( self, @@ -258,13 +267,15 @@ def pool_closed(self) -> None: self._listeners.publish_pool_closed(self._address) def connection_created(self, conn_id: int) -> None: + self._conn_created_start = time.monotonic() # Log before publishing to prevent potential listener preemption in tests. self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_created(self._address, conn_id) - def connection_ready(self, conn_id: int, duration: float) -> None: + def connection_ready(self, conn_id: int) -> None: + duration = max(0.0, time.monotonic() - self._conn_created_start) # Log before publishing to prevent potential listener preemption in tests. self._emit_log( _ConnectionStatusMessage.CONN_READY, @@ -287,12 +298,14 @@ def connection_closed(self, conn_id: int, reason: str) -> None: ) def checkout_started(self) -> None: + self._checkout_start = time.monotonic() if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_started(self._address) self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) - def checkout_succeeded(self, conn_id: int, duration: float) -> None: + def checkout_succeeded(self, conn_id: int) -> None: + duration = max(0.0, time.monotonic() - self._checkout_start) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_out(self._address, conn_id, duration) @@ -302,7 +315,8 @@ def checkout_succeeded(self, conn_id: int, duration: float) -> None: durationMS=duration, ) - def checkout_failed(self, reason: str, error: str, duration: float) -> None: + def checkout_failed(self, reason: str, error: str) -> None: + duration = max(0.0, time.monotonic() - self._checkout_start) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_failed(self._address, error, duration) @@ -329,7 +343,7 @@ class _HeartbeatTelemetry: context, then :meth:`succeeded` or :meth:`failed` when the outcome is known. """ - __slots__ = ("_address", "_awaited", "_listeners", "_publish", "_topology_id") + __slots__ = ("_address", "_awaited", "_listeners", "_publish", "_start", "_topology_id") def __init__( self, @@ -347,6 +361,7 @@ def __init__( def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" + self._start = time.monotonic() if self._publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_started(self._address, self._awaited) @@ -392,8 +407,9 @@ def succeeded( reply=response.document, ) - def failed(self, duration: float, error: Exception, conn_id: Optional[int]) -> None: + def failed(self, error: Exception, conn_id: Optional[int]) -> None: """Emit the FAILED log entry and APM event.""" + duration = max(0.0, time.monotonic() - self._start) if self._publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_failed( diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index e97d645c2c..913f497a13 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -259,7 +259,6 @@ async def _check_server(self) -> ServerDescription: """ self._conn_id = None self._current_hb = None - start = time.monotonic() try: return await self._check_once() except ReferenceError: @@ -267,9 +266,8 @@ async def _check_server(self) -> ServerDescription: except Exception as error: _sanitize(error) address = self._server_description.address - duration = _monotonic_duration(start) if self._current_hb is not None: - self._current_hb.failed(duration, error, self._conn_id) + self._current_hb.failed(error, self._conn_id) await self._reset_connection() if isinstance(error, _OperationCancelled): raise diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index fbd140baf5..41058343a8 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -166,7 +166,6 @@ def __init__( self.active = False self.last_timeout = self.opts.socket_timeout self.connect_rtt = 0.0 - self.creation_time = time.monotonic() # For gossiping $clusterTime from the connection handshake to the client. self._cluster_time = None @@ -467,8 +466,7 @@ async def authenticate(self, reauthenticate: bool = False) -> None: await auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True - duration = time.monotonic() - self.creation_time - self._telemetry.connection_ready(self.id, duration) + self._telemetry.connection_ready(self.id) def validate_session( self, client: Optional[AsyncMongoClient[Any]], session: Optional[AsyncClientSession] @@ -978,13 +976,11 @@ async def checkout( :param handler: A _MongoClientErrorHandler. """ - checkout_started_time = time.monotonic() self._telemetry.checkout_started() - conn = await self._get_conn(checkout_started_time, handler=handler) + conn = await self._get_conn(handler=handler) - duration = time.monotonic() - checkout_started_time - self._telemetry.checkout_succeeded(conn.id, duration) + self._telemetry.checkout_succeeded(conn.id) try: async with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1015,14 +1011,12 @@ async def checkout( elif conn.active: await self.checkin(conn) - def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> None: + def _raise_if_not_ready(self, emit_event: bool) -> None: if self.state != PoolState.READY: if emit_event: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - duration, ) details = _get_timeout_details(self.opts) @@ -1031,7 +1025,7 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> ) async def _get_conn( - self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None + self, handler: Optional[_MongoClientErrorHandler] = None ) -> AsyncConnection: """Get or create a AsyncConnection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. @@ -1041,11 +1035,9 @@ async def _get_conn( await self.reset_without_pause() if self.closed: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, - duration, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1063,7 +1055,7 @@ async def _get_conn( deadline = None async with self.size_cond: - self._raise_if_not_ready(checkout_started_time, emit_event=True) + self._raise_if_not_ready(emit_event=True) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self.size_cond, timeout): @@ -1071,8 +1063,8 @@ async def _get_conn( # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout(checkout_started_time) - self._raise_if_not_ready(checkout_started_time, emit_event=True) + self._raise_wait_queue_timeout() + self._raise_if_not_ready(emit_event=True) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1087,7 +1079,7 @@ async def _get_conn( # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. async with self._max_connecting_cond: - self._raise_if_not_ready(checkout_started_time, emit_event=False) + self._raise_if_not_ready(emit_event=False) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self._max_connecting_cond, timeout): @@ -1096,8 +1088,8 @@ async def _get_conn( if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout(checkout_started_time) - self._raise_if_not_ready(checkout_started_time, emit_event=False) + self._raise_wait_queue_timeout() + self._raise_if_not_ready(emit_event=False) try: conn = self.conns.popleft() @@ -1126,11 +1118,9 @@ async def _get_conn( self.size_cond.notify() if not emitted_event: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - duration, ) raise @@ -1222,12 +1212,10 @@ async def _perished(self, conn: AsyncConnection) -> bool: return False - def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: - duration = time.monotonic() - checkout_started_time + def _raise_wait_queue_timeout(self) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, - duration, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index 946cbe08f2..583748b897 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -257,7 +257,6 @@ def _check_server(self) -> ServerDescription: """ self._conn_id = None self._current_hb = None - start = time.monotonic() try: return self._check_once() except ReferenceError: @@ -265,9 +264,8 @@ def _check_server(self) -> ServerDescription: except Exception as error: _sanitize(error) address = self._server_description.address - duration = _monotonic_duration(start) if self._current_hb is not None: - self._current_hb.failed(duration, error, self._conn_id) + self._current_hb.failed(error, self._conn_id) self._reset_connection() if isinstance(error, _OperationCancelled): raise diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 66a500855b..6dc7ea47a5 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -166,7 +166,6 @@ def __init__( self.active = False self.last_timeout = self.opts.socket_timeout self.connect_rtt = 0.0 - self.creation_time = time.monotonic() # For gossiping $clusterTime from the connection handshake to the client. self._cluster_time = None @@ -467,8 +466,7 @@ def authenticate(self, reauthenticate: bool = False) -> None: auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True - duration = time.monotonic() - self.creation_time - self._telemetry.connection_ready(self.id, duration) + self._telemetry.connection_ready(self.id) def validate_session( self, client: Optional[MongoClient[Any]], session: Optional[ClientSession] @@ -974,13 +972,11 @@ def checkout( :param handler: A _MongoClientErrorHandler. """ - checkout_started_time = time.monotonic() self._telemetry.checkout_started() - conn = self._get_conn(checkout_started_time, handler=handler) + conn = self._get_conn(handler=handler) - duration = time.monotonic() - checkout_started_time - self._telemetry.checkout_succeeded(conn.id, duration) + self._telemetry.checkout_succeeded(conn.id) try: with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1011,14 +1007,12 @@ def checkout( elif conn.active: self.checkin(conn) - def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> None: + def _raise_if_not_ready(self, emit_event: bool) -> None: if self.state != PoolState.READY: if emit_event: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - duration, ) details = _get_timeout_details(self.opts) @@ -1026,9 +1020,7 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> self.address, AutoReconnect("connection pool paused"), timeout_details=details ) - def _get_conn( - self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None - ) -> Connection: + def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connection: """Get or create a Connection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. # See test.test_client:TestClient.test_fork for an example of @@ -1037,11 +1029,9 @@ def _get_conn( self.reset_without_pause() if self.closed: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, - duration, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1059,7 +1049,7 @@ def _get_conn( deadline = None with self.size_cond: - self._raise_if_not_ready(checkout_started_time, emit_event=True) + self._raise_if_not_ready(emit_event=True) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self.size_cond, timeout): @@ -1067,8 +1057,8 @@ def _get_conn( # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout(checkout_started_time) - self._raise_if_not_ready(checkout_started_time, emit_event=True) + self._raise_wait_queue_timeout() + self._raise_if_not_ready(emit_event=True) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1083,7 +1073,7 @@ def _get_conn( # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. with self._max_connecting_cond: - self._raise_if_not_ready(checkout_started_time, emit_event=False) + self._raise_if_not_ready(emit_event=False) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self._max_connecting_cond, timeout): @@ -1092,8 +1082,8 @@ def _get_conn( if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout(checkout_started_time) - self._raise_if_not_ready(checkout_started_time, emit_event=False) + self._raise_wait_queue_timeout() + self._raise_if_not_ready(emit_event=False) try: conn = self.conns.popleft() @@ -1122,11 +1112,9 @@ def _get_conn( self.size_cond.notify() if not emitted_event: - duration = time.monotonic() - checkout_started_time self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - duration, ) raise @@ -1218,12 +1206,10 @@ def _perished(self, conn: Connection) -> bool: return False - def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: - duration = time.monotonic() - checkout_started_time + def _raise_wait_queue_timeout(self) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, - duration, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: From a30d380289c372970eb7d5309fb6996d52ba6da4 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 16:37:45 -0500 Subject: [PATCH 04/20] PYTHON-5846 Fix awaited value in heartbeat succeeded APM event Use response.awaitable (ground truth from wire) rather than self._awaited (pre-computed before response) for ServerHeartbeatSucceededEvent.awaited. Also fix stale method references in _HeartbeatTelemetry docstring. --- pymongo/_telemetry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index ce5a6646bb..0598345b2e 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -339,7 +339,7 @@ class _HeartbeatTelemetry: The APM started event is published before connection checkout (no conn_id yet); the log entry for started is emitted after checkout once the conn_id is known. - Call :meth:`apm_started` first, then :meth:`log_started` inside the checkout + Call :meth:`started` first, then :meth:`emit_started_log` inside the checkout context, then :meth:`succeeded` or :meth:`failed` when the outcome is known. """ @@ -391,7 +391,7 @@ def succeeded( if self._publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_succeeded( - self._address, round_trip_time, response, self._awaited + self._address, round_trip_time, response, response.awaitable ) if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): _debug_log( From 577b594da30732070427e8597f490e484d96c005 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 18:48:22 -0500 Subject: [PATCH 05/20] PYTHON-5846 Add _emit_log helpers to _HeartbeatTelemetry and _SdamTelemetry --- pymongo/_telemetry.py | 132 ++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 75 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 0598345b2e..111d11b1e7 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -359,6 +359,18 @@ def __init__( self._publish = publish self._awaited = awaited + def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log( + _SDAM_LOGGER, + message=message, + topologyId=self._topology_id, + serverHost=self._address[0], + serverPort=self._address[1], + awaited=self._awaited, + **extra, + ) + def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" self._start = time.monotonic() @@ -368,17 +380,11 @@ def started(self) -> None: def emit_started_log(self, conn_id: int, server_conn_id: Optional[int]) -> None: """Emit the log entry for heartbeat started (after connection checkout).""" - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_START, - topologyId=self._topology_id, - driverConnectionId=conn_id, - serverConnectionId=server_conn_id, - serverHost=self._address[0], - serverPort=self._address[1], - awaited=self._awaited, - ) + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_START, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + ) def succeeded( self, @@ -393,19 +399,13 @@ def succeeded( self._listeners.publish_server_heartbeat_succeeded( self._address, round_trip_time, response, response.awaitable ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_SUCCESS, - topologyId=self._topology_id, - driverConnectionId=conn_id, - serverConnectionId=server_conn_id, - serverHost=self._address[0], - serverPort=self._address[1], - awaited=self._awaited, - durationMS=round_trip_time * 1000, - reply=response.document, - ) + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_SUCCESS, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + durationMS=round_trip_time * 1000, + reply=response.document, + ) def failed(self, error: Exception, conn_id: Optional[int]) -> None: """Emit the FAILED log entry and APM event.""" @@ -415,18 +415,12 @@ def failed(self, error: Exception, conn_id: Optional[int]) -> None: self._listeners.publish_server_heartbeat_failed( self._address, duration, error, self._awaited ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.HEARTBEAT_FAIL, - topologyId=self._topology_id, - serverHost=self._address[0], - serverPort=self._address[1], - awaited=self._awaited, - durationMS=duration * 1000, - failure=error, - driverConnectionId=conn_id, - ) + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_FAIL, + durationMS=duration * 1000, + failure=error, + driverConnectionId=conn_id, + ) class _SdamTelemetry: @@ -453,13 +447,17 @@ def _enqueue(self, fn: Any, args: tuple[Any, ...]) -> None: if self._events is not None: self._events.put((fn, args)) - def topology_opened(self) -> None: + def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): _debug_log( _SDAM_LOGGER, - message=_SDAMStatusMessage.START_TOPOLOGY, + message=message, topologyId=self._topology_id, + **extra, ) + + def topology_opened(self) -> None: + self._emit_log(_SDAMStatusMessage.START_TOPOLOGY) if self._publish_tp: assert self._listeners is not None self._enqueue(self._listeners.publish_topology_opened, (self._topology_id,)) @@ -471,14 +469,11 @@ def topology_description_changed(self, old_td: Any, new_td: Any) -> None: self._listeners.publish_topology_description_changed, (old_td, new_td, self._topology_id), ) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(old_td), - newDescription=repr(new_td), - ) + self._emit_log( + _SDAMStatusMessage.TOPOLOGY_CHANGE, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) def topology_closed(self, old_td: Any, new_td: Any) -> None: """Emit APM and log events for topology description change + topology closed.""" @@ -489,32 +484,22 @@ def topology_closed(self, old_td: Any, new_td: Any) -> None: (old_td, new_td, self._topology_id), ) self._enqueue(self._listeners.publish_topology_closed, (self._topology_id,)) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.TOPOLOGY_CHANGE, - topologyId=self._topology_id, - previousDescription=repr(old_td), - newDescription=repr(new_td), - ) - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.STOP_TOPOLOGY, - topologyId=self._topology_id, - ) + self._emit_log( + _SDAMStatusMessage.TOPOLOGY_CHANGE, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) + self._emit_log(_SDAMStatusMessage.STOP_TOPOLOGY) def server_opened(self, address: _Address) -> None: if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_opened, (address, self._topology_id)) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.START_SERVER, - topologyId=self._topology_id, - serverHost=address[0], - serverPort=address[1], - ) + self._emit_log( + _SDAMStatusMessage.START_SERVER, + serverHost=address[0], + serverPort=address[1], + ) def server_description_changed(self, sd_old: Any, sd_new: Any, address: _Address) -> None: if self._publish_server: @@ -528,11 +513,8 @@ def server_closed(self, address: _Address) -> None: if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_closed, (address, self._topology_id)) - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SDAM_LOGGER, - message=_SDAMStatusMessage.STOP_SERVER, - topologyId=self._topology_id, - serverHost=address[0], - serverPort=address[1], - ) + self._emit_log( + _SDAMStatusMessage.STOP_SERVER, + serverHost=address[0], + serverPort=address[1], + ) From fc143ffc77de4b2fcd4de0b60370a32a70dc222d Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 18:59:04 -0500 Subject: [PATCH 06/20] PYTHON-5846 Fix durationMS unit in CMAP log entries (seconds -> milliseconds) --- pymongo/_telemetry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 111d11b1e7..0724ad392e 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -280,7 +280,7 @@ def connection_ready(self, conn_id: int) -> None: self._emit_log( _ConnectionStatusMessage.CONN_READY, driverConnectionId=conn_id, - durationMS=duration, + durationMS=duration * 1000, ) if self._should_publish: assert self._listeners is not None @@ -312,7 +312,7 @@ def checkout_succeeded(self, conn_id: int) -> None: self._emit_log( _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, driverConnectionId=conn_id, - durationMS=duration, + durationMS=duration * 1000, ) def checkout_failed(self, reason: str, error: str) -> None: @@ -324,7 +324,7 @@ def checkout_failed(self, reason: str, error: str) -> None: _ConnectionStatusMessage.CHECKOUT_FAILED, reason=reason, error=error, - durationMS=duration, + durationMS=duration * 1000, ) def checked_in(self, conn_id: int) -> None: From 4ea520532bb761185d29402fa650f5b34ebb3d5b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 19:05:53 -0500 Subject: [PATCH 07/20] PYTHON-5846 Replace cached telemetry flags with dynamic properties _CmapTelemetry, _HeartbeatTelemetry, and _SdamTelemetry now compute _should_publish and _should_log as properties that check listener state and logger level at call time rather than caching derived booleans in the constructor. --- pymongo/_telemetry.py | 52 ++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 0724ad392e..dadd81ab03 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -205,8 +205,8 @@ class _CmapTelemetry: "_client_id", "_conn_created_start", "_listeners", - "_should_log", - "_should_publish", + "_log", + "_publish", ) def __init__( @@ -220,11 +220,19 @@ def __init__( self._client_id = client_id self._address = address self._listeners = listeners - self._should_publish = publish and listeners is not None and listeners.enabled_for_cmap - self._should_log = log + self._publish = publish + self._log = log + + @property + def _should_publish(self) -> bool: + return self._publish and self._listeners is not None and self._listeners.enabled_for_cmap + + @property + def _should_log(self) -> bool: + return self._log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: - if self._should_log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG): + if self._should_log: _debug_log( _CONNECTION_LOGGER, message=message, @@ -359,8 +367,16 @@ def __init__( self._publish = publish self._awaited = awaited + @property + def _should_publish(self) -> bool: + return self._publish and self._listeners is not None + + @property + def _should_log(self) -> bool: + return _SDAM_LOGGER.isEnabledFor(logging.DEBUG) + def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + if self._should_log: _debug_log( _SDAM_LOGGER, message=message, @@ -374,7 +390,7 @@ def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" self._start = time.monotonic() - if self._publish: + if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_started(self._address, self._awaited) @@ -394,7 +410,7 @@ def succeeded( server_conn_id: Optional[int], ) -> None: """Emit the SUCCEEDED log entry and APM event.""" - if self._publish: + if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_succeeded( self._address, round_trip_time, response, response.awaitable @@ -410,7 +426,7 @@ def succeeded( def failed(self, error: Exception, conn_id: Optional[int]) -> None: """Emit the FAILED log entry and APM event.""" duration = max(0.0, time.monotonic() - self._start) - if self._publish: + if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_failed( self._address, duration, error, self._awaited @@ -429,7 +445,7 @@ class _SdamTelemetry: Topology events are queued for asynchronous delivery; log entries are emitted inline. """ - __slots__ = ("_events", "_listeners", "_publish_server", "_publish_tp", "_topology_id") + __slots__ = ("_events", "_listeners", "_topology_id") def __init__( self, @@ -440,15 +456,25 @@ def __init__( self._topology_id = topology_id self._listeners = listeners self._events = events - self._publish_server = listeners is not None and listeners.enabled_for_server - self._publish_tp = listeners is not None and listeners.enabled_for_topology + + @property + def _publish_server(self) -> bool: + return self._listeners is not None and self._listeners.enabled_for_server + + @property + def _publish_tp(self) -> bool: + return self._listeners is not None and self._listeners.enabled_for_topology + + @property + def _should_log(self) -> bool: + return _SDAM_LOGGER.isEnabledFor(logging.DEBUG) def _enqueue(self, fn: Any, args: tuple[Any, ...]) -> None: if self._events is not None: self._events.put((fn, args)) def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: - if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + if self._should_log: _debug_log( _SDAM_LOGGER, message=message, From 553dc313265411e6355992d4e7fd91e6a53bc1a0 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 19:56:40 -0500 Subject: [PATCH 08/20] PYTHON-5846 Drop publish param from _HeartbeatTelemetry, derive from listeners --- pymongo/_telemetry.py | 6 ++---- pymongo/asynchronous/monitor.py | 5 +---- pymongo/synchronous/monitor.py | 5 +---- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index dadd81ab03..fbcb85e39e 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -351,25 +351,23 @@ class _HeartbeatTelemetry: context, then :meth:`succeeded` or :meth:`failed` when the outcome is known. """ - __slots__ = ("_address", "_awaited", "_listeners", "_publish", "_start", "_topology_id") + __slots__ = ("_address", "_awaited", "_listeners", "_start", "_topology_id") def __init__( self, topology_id: ObjectId, address: _Address, listeners: Optional[_EventListeners], - publish: bool, awaited: bool, ) -> None: self._topology_id = topology_id self._address = address self._listeners = listeners - self._publish = publish self._awaited = awaited @property def _should_publish(self) -> bool: - return self._publish and self._listeners is not None + return self._listeners is not None and self._listeners.enabled_for_server_heartbeat @property def _should_log(self) -> bool: diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index 913f497a13..a3528c7723 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -151,7 +151,6 @@ def __init__( self._pool = pool self._settings = topology_settings self._listeners = self._settings._pool_options._event_listeners - self._publish = self._listeners is not None and self._listeners.enabled_for_server_heartbeat self._cancel_context: Optional[_CancellationContext] = None self._conn_id: Optional[int] = None self._current_hb: Optional[_HeartbeatTelemetry] = None @@ -288,9 +287,7 @@ async def _check_once(self) -> ServerDescription: awaited = bool( self._pool.conns and self._stream and sd.is_server_type_known and sd.topology_version ) - hb = _HeartbeatTelemetry( - self._topology._topology_id, address, self._listeners, self._publish, awaited - ) + hb = _HeartbeatTelemetry(self._topology._topology_id, address, self._listeners, awaited) self._current_hb = hb hb.started() diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index 583748b897..b31ef516fb 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -151,7 +151,6 @@ def __init__( self._pool = pool self._settings = topology_settings self._listeners = self._settings._pool_options._event_listeners - self._publish = self._listeners is not None and self._listeners.enabled_for_server_heartbeat self._cancel_context: Optional[_CancellationContext] = None self._conn_id: Optional[int] = None self._current_hb: Optional[_HeartbeatTelemetry] = None @@ -286,9 +285,7 @@ def _check_once(self) -> ServerDescription: awaited = bool( self._pool.conns and self._stream and sd.is_server_type_known and sd.topology_version ) - hb = _HeartbeatTelemetry( - self._topology._topology_id, address, self._listeners, self._publish, awaited - ) + hb = _HeartbeatTelemetry(self._topology._topology_id, address, self._listeners, awaited) self._current_hb = hb hb.started() From 0f33a1cdf8464263d087c53c194e75daedd11574 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 20:05:18 -0500 Subject: [PATCH 09/20] PYTHON-5846 Decompose Hello response in _HeartbeatTelemetry.succeeded --- pymongo/_telemetry.py | 7 ++++--- pymongo/asynchronous/monitor.py | 8 +++++++- pymongo/synchronous/monitor.py | 8 +++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index fbcb85e39e..2408109fc4 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -403,7 +403,8 @@ def emit_started_log(self, conn_id: int, server_conn_id: Optional[int]) -> None: def succeeded( self, round_trip_time: float, - response: Any, + reply: _DocumentOut, + awaited: bool, conn_id: int, server_conn_id: Optional[int], ) -> None: @@ -411,14 +412,14 @@ def succeeded( if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_succeeded( - self._address, round_trip_time, response, response.awaitable + self._address, round_trip_time, reply, awaited ) self._emit_log( _SDAMStatusMessage.HEARTBEAT_SUCCESS, driverConnectionId=conn_id, serverConnectionId=server_conn_id, durationMS=round_trip_time * 1000, - reply=response.document, + reply=reply, ) def failed(self, error: Exception, conn_id: Optional[int]) -> None: diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index a3528c7723..931a7fe348 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -304,7 +304,13 @@ async def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = await self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) + hb.succeeded( + round_trip_time, + response.document, + response.awaitable, + conn.id, + conn.server_connection_id, + ) return sd async def _check_with_socket(self, conn: AsyncConnection) -> tuple[Hello, float]: # type: ignore[type-arg] diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index b31ef516fb..c70a3db82f 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -302,7 +302,13 @@ def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) + hb.succeeded( + round_trip_time, + response.document, + response.awaitable, + conn.id, + conn.server_connection_id, + ) return sd def _check_with_socket(self, conn: Connection) -> tuple[Hello, float]: # type: ignore[type-arg] From 3eefcf7b92b480cfa52445e63de8601ce881adc2 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 30 Jun 2026 20:22:26 -0500 Subject: [PATCH 10/20] PYTHON-5846 Fix typing: use Hello type in _HeartbeatTelemetry.succeeded --- pymongo/_telemetry.py | 8 ++++---- pymongo/asynchronous/monitor.py | 8 +------- pymongo/synchronous/monitor.py | 8 +------- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 2408109fc4..b99d3e90e4 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -37,6 +37,7 @@ if TYPE_CHECKING: from bson.objectid import ObjectId + from pymongo.hello import Hello from pymongo.monitoring import _EventListeners from pymongo.typings import _Address, _DocumentOut @@ -403,8 +404,7 @@ def emit_started_log(self, conn_id: int, server_conn_id: Optional[int]) -> None: def succeeded( self, round_trip_time: float, - reply: _DocumentOut, - awaited: bool, + response: Hello[Any], conn_id: int, server_conn_id: Optional[int], ) -> None: @@ -412,14 +412,14 @@ def succeeded( if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_succeeded( - self._address, round_trip_time, reply, awaited + self._address, round_trip_time, response, response.awaitable ) self._emit_log( _SDAMStatusMessage.HEARTBEAT_SUCCESS, driverConnectionId=conn_id, serverConnectionId=server_conn_id, durationMS=round_trip_time * 1000, - reply=reply, + reply=response.document, ) def failed(self, error: Exception, conn_id: Optional[int]) -> None: diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index 931a7fe348..a3528c7723 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -304,13 +304,7 @@ async def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = await self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - hb.succeeded( - round_trip_time, - response.document, - response.awaitable, - conn.id, - conn.server_connection_id, - ) + hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) return sd async def _check_with_socket(self, conn: AsyncConnection) -> tuple[Hello, float]: # type: ignore[type-arg] diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index c70a3db82f..b31ef516fb 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -302,13 +302,7 @@ def _check_once(self) -> ServerDescription: avg_rtt, min_rtt = self._rtt_monitor.get() sd = ServerDescription(address, response, avg_rtt, min_round_trip_time=min_rtt) - hb.succeeded( - round_trip_time, - response.document, - response.awaitable, - conn.id, - conn.server_connection_id, - ) + hb.succeeded(round_trip_time, response, conn.id, conn.server_connection_id) return sd def _check_with_socket(self, conn: Connection) -> tuple[Hello, float]: # type: ignore[type-arg] From d844557881ff503a536dfe43435714e9f797b5a6 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 06:22:08 -0500 Subject: [PATCH 11/20] PYTHON-5846 Consolidate server selection logging and retry logging into _telemetry.py --- pymongo/_telemetry.py | 90 ++++++++++++++++++++++++++++ pymongo/asynchronous/mongo_client.py | 27 ++++----- pymongo/asynchronous/topology.py | 77 ++++++------------------ pymongo/synchronous/mongo_client.py | 27 ++++----- pymongo/synchronous/topology.py | 77 ++++++------------------ 5 files changed, 152 insertions(+), 146 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index b99d3e90e4..4a111e96a8 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -27,10 +27,12 @@ _COMMAND_LOGGER, _CONNECTION_LOGGER, _SDAM_LOGGER, + _SERVER_SELECTION_LOGGER, _CommandStatusMessage, _ConnectionStatusMessage, _debug_log, _SDAMStatusMessage, + _ServerSelectionStatusMessage, _verbose_connection_error_reason, ) from pymongo.pool_shared import _ConnectionTelemetryInfo @@ -543,3 +545,91 @@ def server_closed(self, address: _Address) -> None: serverHost=address[0], serverPort=address[1], ) + + +class _ServerSelectionTelemetry: + """Structured logging for server selection events. + + The server selection spec defines only log entries, not APM events, so this + class has no publish methods. + + Construct once per :meth:`select_server` call. + """ + + __slots__ = ( + "_operation", + "_operation_id", + "_selector", + "_topology_description", + "_topology_id", + ) + + def __init__( + self, + topology_id: Any, + selector: Any, + operation: str, + operation_id: Optional[int], + topology_description: Any, + ) -> None: + self._topology_id = topology_id + self._selector = selector + self._operation = operation + self._operation_id = operation_id + self._topology_description = topology_description + + @property + def _should_log(self) -> bool: + return _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG) + + def _emit_log(self, message: _ServerSelectionStatusMessage, **extra: Any) -> None: + if self._should_log: + _debug_log( + _SERVER_SELECTION_LOGGER, + message=message, + clientId=self._topology_id, + selector=self._selector, + operation=self._operation, + operationId=self._operation_id, + topologyDescription=self._topology_description, + **extra, + ) + + def started(self) -> None: + """Emit the server selection STARTED log entry.""" + self._emit_log(_ServerSelectionStatusMessage.STARTED) + + def waiting(self, remaining_time_ms: int) -> None: + """Emit the server selection WAITING log entry.""" + self._emit_log(_ServerSelectionStatusMessage.WAITING, remainingTimeMS=remaining_time_ms) + + def failed(self, failure: str) -> None: + """Emit the server selection FAILED log entry.""" + self._emit_log(_ServerSelectionStatusMessage.FAILED, failure=failure) + + def succeeded(self, server_host: str, server_port: Optional[int]) -> None: + """Emit the server selection SUCCEEDED log entry.""" + self._emit_log( + _ServerSelectionStatusMessage.SUCCEEDED, + serverHost=server_host, + serverPort=server_port, + ) + + +def log_command_retry( + topology_id: Any, + command_name: str, + operation_id: Optional[int], + attempt_number: int, + is_write: bool, +) -> None: + """Emit a command-retry log entry.""" + if _COMMAND_LOGGER.isEnabledFor(logging.DEBUG): + op = "write" if is_write else "read" + _debug_log( + _COMMAND_LOGGER, + message=f"Retrying {op} attempt number {attempt_number}", + clientId=topology_id, + commandName=command_name, + operationId=operation_id, + ) diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index a558f96356..678e62e6ff 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -57,6 +57,7 @@ from bson.codec_options import DEFAULT_CODEC_OPTIONS, CodecOptions, TypeRegistry from bson.timestamp import Timestamp from pymongo import _csot, common, helpers_shared, periodic_executor +from pymongo._telemetry import log_command_retry from pymongo.asynchronous import client_session, database, uri_parser from pymongo.asynchronous.change_stream import AsyncChangeStream, AsyncClusterChangeStream from pymongo.asynchronous.client_bulk import _AsyncClientBulk @@ -90,8 +91,6 @@ ) from pymongo.logger import ( _CLIENT_LOGGER, - _COMMAND_LOGGER, - _debug_log, _log_client_error, _log_or_warn, ) @@ -3014,12 +3013,12 @@ async def _write(self) -> T: self._check_last_error() self._retryable = False if self._retrying: - _debug_log( - _COMMAND_LOGGER, - message=f"Retrying write attempt number {self._attempt_number}", - clientId=self._client._topology_id, - commandName=self._operation, - operationId=self._operation_id, + log_command_retry( + self._client._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write=True, ) return await self._func(self._session, conn, self._retryable) # type: ignore except PyMongoError as exc: @@ -3043,12 +3042,12 @@ async def _read(self) -> T: if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: - _debug_log( - _COMMAND_LOGGER, - message=f"Retrying read attempt number {self._attempt_number}", - clientId=self._client._topology_settings._topology_id, - commandName=self._operation, - operationId=self._operation_id, + log_command_retry( + self._client._topology_settings._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write=False, ) return await self._func(self._session, self._server, conn, read_pref) # type: ignore diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index b3046a7b21..c0982e247a 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -17,7 +17,6 @@ from __future__ import annotations import asyncio -import logging import os import queue import random @@ -30,7 +29,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast from pymongo import _csot, common, helpers_shared, periodic_executor -from pymongo._telemetry import _SdamTelemetry +from pymongo._telemetry import _SdamTelemetry, _ServerSelectionTelemetry from pymongo.asynchronous.client_session import _ServerSession, _ServerSessionPool from pymongo.asynchronous.monitor import MonitorBase, SrvMonitor from pymongo.asynchronous.pool import Pool @@ -52,11 +51,6 @@ _async_create_condition, _async_create_lock, ) -from pymongo.logger import ( - _SERVER_SELECTION_LOGGER, - _debug_log, - _ServerSelectionStatusMessage, -) from pymongo.pool_options import PoolOptions from pymongo.server_description import ServerDescription from pymongo.server_selectors import ( @@ -107,14 +101,14 @@ class Topology: def __init__(self, topology_settings: TopologySettings): self._topology_id = topology_settings._topology_id self._listeners = topology_settings._pool_options._event_listeners - self._publish_server = self._listeners is not None and self._listeners.enabled_for_server - self._publish_tp = self._listeners is not None and self._listeners.enabled_for_topology # Create events queue if there are publishers. self._events: queue.Queue[Any] | None = None self.__events_executor: Any = None - if self._publish_server or self._publish_tp: + publish_server = self._listeners is not None and self._listeners.enabled_for_server + publish_tp = self._listeners is not None and self._listeners.enabled_for_topology + if publish_server or publish_tp: self._events = queue.Queue(maxsize=100) self._sdam = _SdamTelemetry(self._topology_id, self._listeners, self._events) @@ -152,7 +146,7 @@ def __init__(self, topology_settings: TopologySettings): self._max_cluster_time: Optional[ClusterTime] = None self._session_pool = _ServerSessionPool() - if self._publish_server or self._publish_tp: + if self._sdam._publish_server or self._sdam._publish_tp: assert self._events is not None weak: weakref.ReferenceType[queue.Queue[Any]] @@ -285,17 +279,10 @@ async def _select_servers_loop( now = time.monotonic() end_time = now + timeout logged_waiting = False - - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.STARTED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - ) + ss = _ServerSelectionTelemetry( + self._topology_id, selector, operation, operation_id, self.description + ) + ss.started() server_descriptions = self._description.apply_selector( selector, @@ -309,32 +296,13 @@ async def _select_servers_loop( while not server_descriptions: # No suitable servers. if timeout == 0 or now > end_time: - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.FAILED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - failure=self._error_message(selector), - ) + ss.failed(self._error_message(selector)) raise ServerSelectionTimeoutError( f"{self._error_message(selector)}, Timeout: {timeout}s, Topology Description: {self.description!r}" ) if not logged_waiting: - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.WAITING, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - remainingTimeMS=int(1000 * (end_time - time.monotonic())), - ) + ss.waiting(int(1000 * (end_time - time.monotonic()))) logged_waiting = True await self._ensure_opened() @@ -399,18 +367,9 @@ async def select_server( ) if _csot.get_timeout(): _csot.set_rtt(server.description.min_round_trip_time) - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.SUCCEEDED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - serverHost=server.description.address[0], - serverPort=server.description.address[1], - ) + _ServerSelectionTelemetry( + self._topology_id, selector, operation, operation_id, self.description + ).succeeded(server.description.address[0], server.description.address[1]) return server async def select_server_by_address( @@ -670,7 +629,7 @@ async def close(self) -> None: self._closed = True # Publish only after releasing the lock. - if self._publish_tp: + if self._sdam._publish_tp: self._description = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, @@ -681,7 +640,7 @@ async def close(self) -> None: ) self._sdam.topology_closed(old_td, self._description) - if self._publish_server or self._publish_tp: + if self._sdam._publish_server or self._sdam._publish_tp: # Make sure the events executor thread is fully closed before publishing the remaining events self.__events_executor.close() await self.__events_executor.join(1) @@ -722,7 +681,7 @@ async def _ensure_opened(self) -> None: await self._update_servers() # Start or restart the events publishing thread. - if self._publish_tp or self._publish_server: + if self._sdam._publish_tp or self._sdam._publish_server: self.__events_executor.open() # Start the SRV polling thread. @@ -861,7 +820,7 @@ async def _update_servers(self) -> None: ) weak = None - if self._publish_server and self._events is not None: + if self._sdam._publish_server and self._events is not None: weak = weakref.ref(self._events) server = Server( server_description=sd, diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 5f321afe5c..2458324e6e 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -57,6 +57,7 @@ from bson.codec_options import DEFAULT_CODEC_OPTIONS, CodecOptions, TypeRegistry from bson.timestamp import Timestamp from pymongo import _csot, common, helpers_shared, periodic_executor +from pymongo._telemetry import log_command_retry from pymongo.client_options import ClientOptions from pymongo.driver_info import DriverInfo from pymongo.errors import ( @@ -80,8 +81,6 @@ ) from pymongo.logger import ( _CLIENT_LOGGER, - _COMMAND_LOGGER, - _debug_log, _log_client_error, _log_or_warn, ) @@ -3005,12 +3004,12 @@ def _write(self) -> T: self._check_last_error() self._retryable = False if self._retrying: - _debug_log( - _COMMAND_LOGGER, - message=f"Retrying write attempt number {self._attempt_number}", - clientId=self._client._topology_id, - commandName=self._operation, - operationId=self._operation_id, + log_command_retry( + self._client._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write=True, ) return self._func(self._session, conn, self._retryable) # type: ignore except PyMongoError as exc: @@ -3034,12 +3033,12 @@ def _read(self) -> T: if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: - _debug_log( - _COMMAND_LOGGER, - message=f"Retrying read attempt number {self._attempt_number}", - clientId=self._client._topology_settings._topology_id, - commandName=self._operation, - operationId=self._operation_id, + log_command_retry( + self._client._topology_settings._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write=False, ) return self._func(self._session, self._server, conn, read_pref) # type: ignore diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index 4f273f60f0..483f89d901 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -17,7 +17,6 @@ from __future__ import annotations import asyncio -import logging import os import queue import random @@ -30,7 +29,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast from pymongo import _csot, common, helpers_shared, periodic_executor -from pymongo._telemetry import _SdamTelemetry +from pymongo._telemetry import _SdamTelemetry, _ServerSelectionTelemetry from pymongo.errors import ( ConnectionFailure, InvalidOperation, @@ -48,11 +47,6 @@ _create_condition, _create_lock, ) -from pymongo.logger import ( - _SERVER_SELECTION_LOGGER, - _debug_log, - _ServerSelectionStatusMessage, -) from pymongo.pool_options import PoolOptions from pymongo.server_description import ServerDescription from pymongo.server_selectors import ( @@ -107,14 +101,14 @@ class Topology: def __init__(self, topology_settings: TopologySettings): self._topology_id = topology_settings._topology_id self._listeners = topology_settings._pool_options._event_listeners - self._publish_server = self._listeners is not None and self._listeners.enabled_for_server - self._publish_tp = self._listeners is not None and self._listeners.enabled_for_topology # Create events queue if there are publishers. self._events: queue.Queue[Any] | None = None self.__events_executor: Any = None - if self._publish_server or self._publish_tp: + publish_server = self._listeners is not None and self._listeners.enabled_for_server + publish_tp = self._listeners is not None and self._listeners.enabled_for_topology + if publish_server or publish_tp: self._events = queue.Queue(maxsize=100) self._sdam = _SdamTelemetry(self._topology_id, self._listeners, self._events) @@ -152,7 +146,7 @@ def __init__(self, topology_settings: TopologySettings): self._max_cluster_time: Optional[ClusterTime] = None self._session_pool = _ServerSessionPool() - if self._publish_server or self._publish_tp: + if self._sdam._publish_server or self._sdam._publish_tp: assert self._events is not None weak: weakref.ReferenceType[queue.Queue[Any]] @@ -285,17 +279,10 @@ def _select_servers_loop( now = time.monotonic() end_time = now + timeout logged_waiting = False - - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.STARTED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - ) + ss = _ServerSelectionTelemetry( + self._topology_id, selector, operation, operation_id, self.description + ) + ss.started() server_descriptions = self._description.apply_selector( selector, @@ -309,32 +296,13 @@ def _select_servers_loop( while not server_descriptions: # No suitable servers. if timeout == 0 or now > end_time: - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.FAILED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - failure=self._error_message(selector), - ) + ss.failed(self._error_message(selector)) raise ServerSelectionTimeoutError( f"{self._error_message(selector)}, Timeout: {timeout}s, Topology Description: {self.description!r}" ) if not logged_waiting: - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.WAITING, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - remainingTimeMS=int(1000 * (end_time - time.monotonic())), - ) + ss.waiting(int(1000 * (end_time - time.monotonic()))) logged_waiting = True self._ensure_opened() @@ -399,18 +367,9 @@ def select_server( ) if _csot.get_timeout(): _csot.set_rtt(server.description.min_round_trip_time) - if _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG): - _debug_log( - _SERVER_SELECTION_LOGGER, - message=_ServerSelectionStatusMessage.SUCCEEDED, - selector=selector, - operation=operation, - operationId=operation_id, - topologyDescription=self.description, - clientId=self.description._topology_settings._topology_id, - serverHost=server.description.address[0], - serverPort=server.description.address[1], - ) + _ServerSelectionTelemetry( + self._topology_id, selector, operation, operation_id, self.description + ).succeeded(server.description.address[0], server.description.address[1]) return server def select_server_by_address( @@ -668,7 +627,7 @@ def close(self) -> None: self._closed = True # Publish only after releasing the lock. - if self._publish_tp: + if self._sdam._publish_tp: self._description = TopologyDescription( TOPOLOGY_TYPE.Unknown, {}, @@ -679,7 +638,7 @@ def close(self) -> None: ) self._sdam.topology_closed(old_td, self._description) - if self._publish_server or self._publish_tp: + if self._sdam._publish_server or self._sdam._publish_tp: # Make sure the events executor thread is fully closed before publishing the remaining events self.__events_executor.close() self.__events_executor.join(1) @@ -720,7 +679,7 @@ def _ensure_opened(self) -> None: self._update_servers() # Start or restart the events publishing thread. - if self._publish_tp or self._publish_server: + if self._sdam._publish_tp or self._sdam._publish_server: self.__events_executor.open() # Start the SRV polling thread. @@ -859,7 +818,7 @@ def _update_servers(self) -> None: ) weak = None - if self._publish_server and self._events is not None: + if self._sdam._publish_server and self._events is not None: weak = weakref.ref(self._events) server = Server( server_description=sd, From 57f4e504001696b95bc591e7a2c890d20d354c27 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 07:54:22 -0500 Subject: [PATCH 12/20] PYTHON-5846 Move _should_log check to call site in all telemetry classes --- pymongo/_telemetry.py | 247 +++++++++++++++++++++++------------------- 1 file changed, 133 insertions(+), 114 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 4a111e96a8..73c0a5e102 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -235,33 +235,35 @@ def _should_log(self) -> bool: return self._log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: - if self._should_log: - _debug_log( - _CONNECTION_LOGGER, - message=message, - clientId=self._client_id, - serverHost=self._address[0], - serverPort=self._address[1], - **extra, - ) + _debug_log( + _CONNECTION_LOGGER, + message=message, + clientId=self._client_id, + serverHost=self._address[0], + serverPort=self._address[1], + **extra, + ) def pool_created(self, non_default_options: dict[str, Any]) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._emit_log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_created(self._address, non_default_options) def pool_ready(self) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._emit_log(_ConnectionStatusMessage.POOL_READY) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.POOL_READY) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_ready(self._address) def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bool) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._emit_log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_cleared( @@ -272,7 +274,8 @@ def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bo def pool_closed(self) -> None: # Log before publishing to prevent potential listener preemption in tests. - self._emit_log(_ConnectionStatusMessage.POOL_CLOSED) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.POOL_CLOSED) if self._should_publish: assert self._listeners is not None self._listeners.publish_pool_closed(self._address) @@ -280,7 +283,8 @@ def pool_closed(self) -> None: def connection_created(self, conn_id: int) -> None: self._conn_created_start = time.monotonic() # Log before publishing to prevent potential listener preemption in tests. - self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_created(self._address, conn_id) @@ -288,11 +292,12 @@ def connection_created(self, conn_id: int) -> None: def connection_ready(self, conn_id: int) -> None: duration = max(0.0, time.monotonic() - self._conn_created_start) # Log before publishing to prevent potential listener preemption in tests. - self._emit_log( - _ConnectionStatusMessage.CONN_READY, - driverConnectionId=conn_id, - durationMS=duration * 1000, - ) + if self._should_log: + self._emit_log( + _ConnectionStatusMessage.CONN_READY, + driverConnectionId=conn_id, + durationMS=duration * 1000, + ) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_ready(self._address, conn_id, duration) @@ -301,48 +306,53 @@ def connection_closed(self, conn_id: int, reason: str) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_closed(self._address, conn_id, reason) - self._emit_log( - _ConnectionStatusMessage.CONN_CLOSED, - driverConnectionId=conn_id, - reason=_verbose_connection_error_reason(reason), - error=reason, - ) + if self._should_log: + self._emit_log( + _ConnectionStatusMessage.CONN_CLOSED, + driverConnectionId=conn_id, + reason=_verbose_connection_error_reason(reason), + error=reason, + ) def checkout_started(self) -> None: self._checkout_start = time.monotonic() if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_started(self._address) - self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) def checkout_succeeded(self, conn_id: int) -> None: duration = max(0.0, time.monotonic() - self._checkout_start) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_out(self._address, conn_id, duration) - self._emit_log( - _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, - driverConnectionId=conn_id, - durationMS=duration * 1000, - ) + if self._should_log: + self._emit_log( + _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, + driverConnectionId=conn_id, + durationMS=duration * 1000, + ) def checkout_failed(self, reason: str, error: str) -> None: duration = max(0.0, time.monotonic() - self._checkout_start) if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_failed(self._address, error, duration) - self._emit_log( - _ConnectionStatusMessage.CHECKOUT_FAILED, - reason=reason, - error=error, - durationMS=duration * 1000, - ) + if self._should_log: + self._emit_log( + _ConnectionStatusMessage.CHECKOUT_FAILED, + reason=reason, + error=error, + durationMS=duration * 1000, + ) def checked_in(self, conn_id: int) -> None: if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_in(self._address, conn_id) - self._emit_log(_ConnectionStatusMessage.CHECKEDIN, driverConnectionId=conn_id) + if self._should_log: + self._emit_log(_ConnectionStatusMessage.CHECKEDIN, driverConnectionId=conn_id) class _HeartbeatTelemetry: @@ -377,16 +387,15 @@ def _should_log(self) -> bool: return _SDAM_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: - if self._should_log: - _debug_log( - _SDAM_LOGGER, - message=message, - topologyId=self._topology_id, - serverHost=self._address[0], - serverPort=self._address[1], - awaited=self._awaited, - **extra, - ) + _debug_log( + _SDAM_LOGGER, + message=message, + topologyId=self._topology_id, + serverHost=self._address[0], + serverPort=self._address[1], + awaited=self._awaited, + **extra, + ) def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" @@ -397,11 +406,12 @@ def started(self) -> None: def emit_started_log(self, conn_id: int, server_conn_id: Optional[int]) -> None: """Emit the log entry for heartbeat started (after connection checkout).""" - self._emit_log( - _SDAMStatusMessage.HEARTBEAT_START, - driverConnectionId=conn_id, - serverConnectionId=server_conn_id, - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_START, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + ) def succeeded( self, @@ -416,13 +426,14 @@ def succeeded( self._listeners.publish_server_heartbeat_succeeded( self._address, round_trip_time, response, response.awaitable ) - self._emit_log( - _SDAMStatusMessage.HEARTBEAT_SUCCESS, - driverConnectionId=conn_id, - serverConnectionId=server_conn_id, - durationMS=round_trip_time * 1000, - reply=response.document, - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_SUCCESS, + driverConnectionId=conn_id, + serverConnectionId=server_conn_id, + durationMS=round_trip_time * 1000, + reply=response.document, + ) def failed(self, error: Exception, conn_id: Optional[int]) -> None: """Emit the FAILED log entry and APM event.""" @@ -432,12 +443,13 @@ def failed(self, error: Exception, conn_id: Optional[int]) -> None: self._listeners.publish_server_heartbeat_failed( self._address, duration, error, self._awaited ) - self._emit_log( - _SDAMStatusMessage.HEARTBEAT_FAIL, - durationMS=duration * 1000, - failure=error, - driverConnectionId=conn_id, - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.HEARTBEAT_FAIL, + durationMS=duration * 1000, + failure=error, + driverConnectionId=conn_id, + ) class _SdamTelemetry: @@ -475,16 +487,16 @@ def _enqueue(self, fn: Any, args: tuple[Any, ...]) -> None: self._events.put((fn, args)) def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: - if self._should_log: - _debug_log( - _SDAM_LOGGER, - message=message, - topologyId=self._topology_id, - **extra, - ) + _debug_log( + _SDAM_LOGGER, + message=message, + topologyId=self._topology_id, + **extra, + ) def topology_opened(self) -> None: - self._emit_log(_SDAMStatusMessage.START_TOPOLOGY) + if self._should_log: + self._emit_log(_SDAMStatusMessage.START_TOPOLOGY) if self._publish_tp: assert self._listeners is not None self._enqueue(self._listeners.publish_topology_opened, (self._topology_id,)) @@ -496,11 +508,12 @@ def topology_description_changed(self, old_td: Any, new_td: Any) -> None: self._listeners.publish_topology_description_changed, (old_td, new_td, self._topology_id), ) - self._emit_log( - _SDAMStatusMessage.TOPOLOGY_CHANGE, - previousDescription=repr(old_td), - newDescription=repr(new_td), - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.TOPOLOGY_CHANGE, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) def topology_closed(self, old_td: Any, new_td: Any) -> None: """Emit APM and log events for topology description change + topology closed.""" @@ -511,22 +524,24 @@ def topology_closed(self, old_td: Any, new_td: Any) -> None: (old_td, new_td, self._topology_id), ) self._enqueue(self._listeners.publish_topology_closed, (self._topology_id,)) - self._emit_log( - _SDAMStatusMessage.TOPOLOGY_CHANGE, - previousDescription=repr(old_td), - newDescription=repr(new_td), - ) - self._emit_log(_SDAMStatusMessage.STOP_TOPOLOGY) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.TOPOLOGY_CHANGE, + previousDescription=repr(old_td), + newDescription=repr(new_td), + ) + self._emit_log(_SDAMStatusMessage.STOP_TOPOLOGY) def server_opened(self, address: _Address) -> None: if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_opened, (address, self._topology_id)) - self._emit_log( - _SDAMStatusMessage.START_SERVER, - serverHost=address[0], - serverPort=address[1], - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.START_SERVER, + serverHost=address[0], + serverPort=address[1], + ) def server_description_changed(self, sd_old: Any, sd_new: Any, address: _Address) -> None: if self._publish_server: @@ -540,11 +555,12 @@ def server_closed(self, address: _Address) -> None: if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_closed, (address, self._topology_id)) - self._emit_log( - _SDAMStatusMessage.STOP_SERVER, - serverHost=address[0], - serverPort=address[1], - ) + if self._should_log: + self._emit_log( + _SDAMStatusMessage.STOP_SERVER, + serverHost=address[0], + serverPort=address[1], + ) class _ServerSelectionTelemetry: @@ -583,37 +599,40 @@ def _should_log(self) -> bool: return _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _ServerSelectionStatusMessage, **extra: Any) -> None: - if self._should_log: - _debug_log( - _SERVER_SELECTION_LOGGER, - message=message, - clientId=self._topology_id, - selector=self._selector, - operation=self._operation, - operationId=self._operation_id, - topologyDescription=self._topology_description, - **extra, - ) + _debug_log( + _SERVER_SELECTION_LOGGER, + message=message, + clientId=self._topology_id, + selector=self._selector, + operation=self._operation, + operationId=self._operation_id, + topologyDescription=self._topology_description, + **extra, + ) def started(self) -> None: """Emit the server selection STARTED log entry.""" - self._emit_log(_ServerSelectionStatusMessage.STARTED) + if self._should_log: + self._emit_log(_ServerSelectionStatusMessage.STARTED) def waiting(self, remaining_time_ms: int) -> None: """Emit the server selection WAITING log entry.""" - self._emit_log(_ServerSelectionStatusMessage.WAITING, remainingTimeMS=remaining_time_ms) + if self._should_log: + self._emit_log(_ServerSelectionStatusMessage.WAITING, remainingTimeMS=remaining_time_ms) def failed(self, failure: str) -> None: """Emit the server selection FAILED log entry.""" - self._emit_log(_ServerSelectionStatusMessage.FAILED, failure=failure) + if self._should_log: + self._emit_log(_ServerSelectionStatusMessage.FAILED, failure=failure) def succeeded(self, server_host: str, server_port: Optional[int]) -> None: """Emit the server selection SUCCEEDED log entry.""" - self._emit_log( - _ServerSelectionStatusMessage.SUCCEEDED, - serverHost=server_host, - serverPort=server_port, - ) + if self._should_log: + self._emit_log( + _ServerSelectionStatusMessage.SUCCEEDED, + serverHost=server_host, + serverPort=server_port, + ) def log_command_retry( From d51f38b5806c92324d4e2c6031cbb0f65a08c8ed Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 08:07:25 -0500 Subject: [PATCH 13/20] PYTHON-5846 Cache _should_log/_should_publish in short-lived telemetry classes; document why long-lived classes use properties --- pymongo/_telemetry.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 73c0a5e102..73712d9737 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -228,10 +228,12 @@ def __init__( @property def _should_publish(self) -> bool: + """Computed per-call because listener registration can change while the pool is open.""" return self._publish and self._listeners is not None and self._listeners.enabled_for_cmap @property def _should_log(self) -> bool: + """Computed per-call because logging level can be reconfigured at runtime.""" return self._log and _CONNECTION_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: @@ -364,7 +366,15 @@ class _HeartbeatTelemetry: context, then :meth:`succeeded` or :meth:`failed` when the outcome is known. """ - __slots__ = ("_address", "_awaited", "_listeners", "_start", "_topology_id") + __slots__ = ( + "_address", + "_awaited", + "_listeners", + "_should_log", + "_should_publish", + "_start", + "_topology_id", + ) def __init__( self, @@ -377,14 +387,10 @@ def __init__( self._address = address self._listeners = listeners self._awaited = awaited - - @property - def _should_publish(self) -> bool: - return self._listeners is not None and self._listeners.enabled_for_server_heartbeat - - @property - def _should_log(self) -> bool: - return _SDAM_LOGGER.isEnabledFor(logging.DEBUG) + # Cached at construction: this object is short-lived (one heartbeat check) so + # listener registration and logging level are stable for its lifetime. + self._should_publish = listeners is not None and listeners.enabled_for_server_heartbeat + self._should_log = _SDAM_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: _debug_log( @@ -472,14 +478,17 @@ def __init__( @property def _publish_server(self) -> bool: + """Computed per-call because listener registration can change while the topology is open.""" return self._listeners is not None and self._listeners.enabled_for_server @property def _publish_tp(self) -> bool: + """Computed per-call because listener registration can change while the topology is open.""" return self._listeners is not None and self._listeners.enabled_for_topology @property def _should_log(self) -> bool: + """Computed per-call because logging level can be reconfigured at runtime.""" return _SDAM_LOGGER.isEnabledFor(logging.DEBUG) def _enqueue(self, fn: Any, args: tuple[Any, ...]) -> None: @@ -576,6 +585,7 @@ class has no publish methods. "_operation", "_operation_id", "_selector", + "_should_log", "_topology_description", "_topology_id", ) @@ -593,10 +603,9 @@ def __init__( self._operation = operation self._operation_id = operation_id self._topology_description = topology_description - - @property - def _should_log(self) -> bool: - return _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG) + # Cached at construction: this object is short-lived (one select_server call) so + # logging level is stable for its lifetime. + self._should_log = _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG) def _emit_log(self, message: _ServerSelectionStatusMessage, **extra: Any) -> None: _debug_log( From de14677f702e655cfdc03b266499dd0694b91af7 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 08:38:12 -0500 Subject: [PATCH 14/20] PYTHON-5846 Cache _should_log/_should_publish locally and guard duration computation in _CmapTelemetry and _HeartbeatTelemetry --- pymongo/_telemetry.py | 44 +++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 73712d9737..6f989aa946 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -292,23 +292,29 @@ def connection_created(self, conn_id: int) -> None: self._listeners.publish_connection_created(self._address, conn_id) def connection_ready(self, conn_id: int) -> None: + should_log = self._should_log + should_publish = self._should_publish + if not should_log and not should_publish: + return duration = max(0.0, time.monotonic() - self._conn_created_start) # Log before publishing to prevent potential listener preemption in tests. - if self._should_log: + if should_log: self._emit_log( _ConnectionStatusMessage.CONN_READY, driverConnectionId=conn_id, durationMS=duration * 1000, ) - if self._should_publish: + if should_publish: assert self._listeners is not None self._listeners.publish_connection_ready(self._address, conn_id, duration) def connection_closed(self, conn_id: int, reason: str) -> None: - if self._should_publish: + should_log = self._should_log + should_publish = self._should_publish + if should_publish: assert self._listeners is not None self._listeners.publish_connection_closed(self._address, conn_id, reason) - if self._should_log: + if should_log: self._emit_log( _ConnectionStatusMessage.CONN_CLOSED, driverConnectionId=conn_id, @@ -317,19 +323,25 @@ def connection_closed(self, conn_id: int, reason: str) -> None: ) def checkout_started(self) -> None: + should_log = self._should_log + should_publish = self._should_publish self._checkout_start = time.monotonic() - if self._should_publish: + if should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_started(self._address) - if self._should_log: + if should_log: self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) def checkout_succeeded(self, conn_id: int) -> None: + should_log = self._should_log + should_publish = self._should_publish + if not should_log and not should_publish: + return duration = max(0.0, time.monotonic() - self._checkout_start) - if self._should_publish: + if should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_out(self._address, conn_id, duration) - if self._should_log: + if should_log: self._emit_log( _ConnectionStatusMessage.CHECKOUT_SUCCEEDED, driverConnectionId=conn_id, @@ -337,11 +349,15 @@ def checkout_succeeded(self, conn_id: int) -> None: ) def checkout_failed(self, reason: str, error: str) -> None: + should_log = self._should_log + should_publish = self._should_publish + if not should_log and not should_publish: + return duration = max(0.0, time.monotonic() - self._checkout_start) - if self._should_publish: + if should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_failed(self._address, error, duration) - if self._should_log: + if should_log: self._emit_log( _ConnectionStatusMessage.CHECKOUT_FAILED, reason=reason, @@ -443,13 +459,17 @@ def succeeded( def failed(self, error: Exception, conn_id: Optional[int]) -> None: """Emit the FAILED log entry and APM event.""" + should_publish = self._should_publish + should_log = self._should_log + if not should_publish and not should_log: + return duration = max(0.0, time.monotonic() - self._start) - if self._should_publish: + if should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_failed( self._address, duration, error, self._awaited ) - if self._should_log: + if should_log: self._emit_log( _SDAMStatusMessage.HEARTBEAT_FAIL, durationMS=duration * 1000, From 488c7507540e5183b44642d3237b852ff7d71491 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 12:01:16 -0500 Subject: [PATCH 15/20] PYTHON-5846 Add comments explaining unconditional start time recording in _CmapTelemetry --- pymongo/_telemetry.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 6f989aa946..1debd4368f 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -283,6 +283,8 @@ def pool_closed(self) -> None: self._listeners.publish_pool_closed(self._address) def connection_created(self, conn_id: int) -> None: + # Always record start time: logging or publishing may be enabled by the time + # connection_ready is called to compute the duration. self._conn_created_start = time.monotonic() # Log before publishing to prevent potential listener preemption in tests. if self._should_log: @@ -325,6 +327,8 @@ def connection_closed(self, conn_id: int, reason: str) -> None: def checkout_started(self) -> None: should_log = self._should_log should_publish = self._should_publish + # Always record start time: logging or publishing may be enabled by the time + # checkout_succeeded or checkout_failed is called to compute the duration. self._checkout_start = time.monotonic() if should_publish: assert self._listeners is not None From 58e966b920514182cee613875a55a26993b44b5c Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 12:47:15 -0500 Subject: [PATCH 16/20] PYTHON-5846 Only record heartbeat start time when logging or publishing is enabled --- pymongo/_telemetry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 1debd4368f..9c9851c101 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -425,7 +425,8 @@ def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: def started(self) -> None: """Publish the APM heartbeat-started event (before connection checkout).""" - self._start = time.monotonic() + if self._should_publish or self._should_log: + self._start = time.monotonic() if self._should_publish: assert self._listeners is not None self._listeners.publish_server_heartbeat_started(self._address, self._awaited) From 27463860f3e74936a558eaf43d1e86d82682e654 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 13:32:01 -0500 Subject: [PATCH 17/20] Update pymongo/asynchronous/mongo_client.py Co-authored-by: Noah Stapp --- pymongo/asynchronous/mongo_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 678e62e6ff..f45dd09f06 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -3043,7 +3043,7 @@ async def _read(self) -> T: self._check_last_error() if self._retrying: log_command_retry( - self._client._topology_settings._topology_id, + self._client._topology_id, self._operation, self._operation_id, self._attempt_number, From 012c143819ba6869d6d51d4bd3810a54e5c1a2b3 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 14:04:55 -0500 Subject: [PATCH 18/20] PYTHON-5846 Address PR review comments on telemetry refactor - Fix race condition in _CmapTelemetry: remove shared _conn_created_start and _checkout_start slots; connection_ready now takes creation_time from AsyncConnection.creation_time, checkout_started returns the start time and checkout_succeeded/failed accept it as a parameter - Move SRV monitor failure log into _telemetry.py as log_srv_monitor_failure - Add _log_retry helper to _ClientConnectionRetryable to deduplicate log_command_retry call sites - Fix _ServerSelectionTelemetry.failed to accept live topology_description at failure time instead of using the stale snapshot from construction --- pymongo/_telemetry.py | 64 +++++++++++++++++----------- pymongo/asynchronous/mongo_client.py | 25 +++++------ pymongo/asynchronous/monitor.py | 5 +-- pymongo/asynchronous/pool.py | 31 ++++++++------ pymongo/asynchronous/topology.py | 2 +- pymongo/synchronous/mongo_client.py | 25 +++++------ pymongo/synchronous/monitor.py | 5 +-- pymongo/synchronous/pool.py | 33 ++++++++------ pymongo/synchronous/topology.py | 2 +- 9 files changed, 104 insertions(+), 88 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 9c9851c101..29d116faaa 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -204,9 +204,7 @@ class _CmapTelemetry: __slots__ = ( "_address", - "_checkout_start", "_client_id", - "_conn_created_start", "_listeners", "_log", "_publish", @@ -283,9 +281,6 @@ def pool_closed(self) -> None: self._listeners.publish_pool_closed(self._address) def connection_created(self, conn_id: int) -> None: - # Always record start time: logging or publishing may be enabled by the time - # connection_ready is called to compute the duration. - self._conn_created_start = time.monotonic() # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) @@ -293,12 +288,12 @@ def connection_created(self, conn_id: int) -> None: assert self._listeners is not None self._listeners.publish_connection_created(self._address, conn_id) - def connection_ready(self, conn_id: int) -> None: + def connection_ready(self, conn_id: int, creation_time: float) -> None: should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: return - duration = max(0.0, time.monotonic() - self._conn_created_start) + duration = max(0.0, time.monotonic() - creation_time) # Log before publishing to prevent potential listener preemption in tests. if should_log: self._emit_log( @@ -324,24 +319,22 @@ def connection_closed(self, conn_id: int, reason: str) -> None: error=reason, ) - def checkout_started(self) -> None: - should_log = self._should_log - should_publish = self._should_publish - # Always record start time: logging or publishing may be enabled by the time - # checkout_succeeded or checkout_failed is called to compute the duration. - self._checkout_start = time.monotonic() - if should_publish: + def checkout_started(self) -> float: + """Emit the checkout started event/log and return the start time for duration tracking.""" + start = time.monotonic() + if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_started(self._address) - if should_log: + if self._should_log: self._emit_log(_ConnectionStatusMessage.CHECKOUT_STARTED) + return start - def checkout_succeeded(self, conn_id: int) -> None: + def checkout_succeeded(self, conn_id: int, start: float) -> None: should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: return - duration = max(0.0, time.monotonic() - self._checkout_start) + duration = max(0.0, time.monotonic() - start) if should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_out(self._address, conn_id, duration) @@ -352,12 +345,12 @@ def checkout_succeeded(self, conn_id: int) -> None: durationMS=duration * 1000, ) - def checkout_failed(self, reason: str, error: str) -> None: + def checkout_failed(self, reason: str, error: str, start: float) -> None: should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: return - duration = max(0.0, time.monotonic() - self._checkout_start) + duration = max(0.0, time.monotonic() - start) if should_publish: assert self._listeners is not None self._listeners.publish_connection_check_out_failed(self._address, error, duration) @@ -632,7 +625,9 @@ def __init__( # logging level is stable for its lifetime. self._should_log = _SERVER_SELECTION_LOGGER.isEnabledFor(logging.DEBUG) - def _emit_log(self, message: _ServerSelectionStatusMessage, **extra: Any) -> None: + def _emit_log( + self, message: _ServerSelectionStatusMessage, topology_description: Any, **extra: Any + ) -> None: _debug_log( _SERVER_SELECTION_LOGGER, message=message, @@ -640,35 +635,52 @@ def _emit_log(self, message: _ServerSelectionStatusMessage, **extra: Any) -> Non selector=self._selector, operation=self._operation, operationId=self._operation_id, - topologyDescription=self._topology_description, + topologyDescription=topology_description, **extra, ) def started(self) -> None: """Emit the server selection STARTED log entry.""" if self._should_log: - self._emit_log(_ServerSelectionStatusMessage.STARTED) + self._emit_log(_ServerSelectionStatusMessage.STARTED, self._topology_description) def waiting(self, remaining_time_ms: int) -> None: """Emit the server selection WAITING log entry.""" if self._should_log: - self._emit_log(_ServerSelectionStatusMessage.WAITING, remainingTimeMS=remaining_time_ms) + self._emit_log( + _ServerSelectionStatusMessage.WAITING, + self._topology_description, + remainingTimeMS=remaining_time_ms, + ) - def failed(self, failure: str) -> None: - """Emit the server selection FAILED log entry.""" + def failed(self, failure: str, topology_description: Any = None) -> None: + """Emit the server selection FAILED log entry with the current topology description.""" if self._should_log: - self._emit_log(_ServerSelectionStatusMessage.FAILED, failure=failure) + self._emit_log( + _ServerSelectionStatusMessage.FAILED, + topology_description + if topology_description is not None + else self._topology_description, + failure=failure, + ) def succeeded(self, server_host: str, server_port: Optional[int]) -> None: """Emit the server selection SUCCEEDED log entry.""" if self._should_log: self._emit_log( _ServerSelectionStatusMessage.SUCCEEDED, + self._topology_description, serverHost=server_host, serverPort=server_port, ) +def log_srv_monitor_failure(failure: Exception) -> None: + """Emit a log entry when the SRV monitor fails to poll DNS records.""" + if _SDAM_LOGGER.isEnabledFor(logging.DEBUG): + _debug_log(_SDAM_LOGGER, message="SRV monitor check failed", failure=repr(failure)) + + def log_command_retry( topology_id: Any, command_name: str, diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index f45dd09f06..3e811d3c0e 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -2990,6 +2990,15 @@ async def _get_server(self) -> Server: operation_id=self._operation_id, ) + def _log_retry(self, is_write: bool) -> None: + log_command_retry( + self._client._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write, + ) + async def _write(self) -> T: """Wrapper method for write-type retryable client executions @@ -3013,13 +3022,7 @@ async def _write(self) -> T: self._check_last_error() self._retryable = False if self._retrying: - log_command_retry( - self._client._topology_id, - self._operation, - self._operation_id, - self._attempt_number, - is_write=True, - ) + self._log_retry(is_write=True) return await self._func(self._session, conn, self._retryable) # type: ignore except PyMongoError as exc: if not self._retryable: @@ -3042,13 +3045,7 @@ async def _read(self) -> T: if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: - log_command_retry( - self._client._topology_id, - self._operation, - self._operation_id, - self._attempt_number, - is_write=False, - ) + self._log_retry(is_write=False) return await self._func(self._session, self._server, conn, read_pref) # type: ignore diff --git a/pymongo/asynchronous/monitor.py b/pymongo/asynchronous/monitor.py index a3528c7723..f601f54a2d 100644 --- a/pymongo/asynchronous/monitor.py +++ b/pymongo/asynchronous/monitor.py @@ -24,12 +24,11 @@ from pymongo import common, periodic_executor from pymongo._csot import MovingMinimum -from pymongo._telemetry import _HeartbeatTelemetry +from pymongo._telemetry import _HeartbeatTelemetry, log_srv_monitor_failure from pymongo.asynchronous.srv_resolver import _SrvResolver from pymongo.errors import NetworkTimeout, _OperationCancelled from pymongo.hello import Hello from pymongo.lock import _async_create_lock -from pymongo.logger import _SDAM_LOGGER, _debug_log from pymongo.periodic_executor import _shutdown_executors from pymongo.pool_options import _is_faas from pymongo.read_preferences import MovingAverage @@ -385,7 +384,7 @@ async def _get_seedlist(self) -> Optional[list[tuple[str, Any]]]: # - SRV records must be rescanned every heartbeatFrequencyMS # - Topology must be left unchanged self.request_check() - _debug_log(_SDAM_LOGGER, message="SRV monitor check failed", failure=repr(exc)) + log_srv_monitor_failure(exc) return None else: self._executor.update_interval(max(ttl, common.MIN_SRV_RESCAN_INTERVAL)) diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index 41058343a8..c7a5a3e7d8 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -128,6 +128,7 @@ def __init__( self.id = id self.is_sdam = is_sdam self.closed = False + self.creation_time = time.monotonic() self.last_checkin_time = time.monotonic() self.performed_handshake = False self.is_writable: bool = False @@ -466,7 +467,7 @@ async def authenticate(self, reauthenticate: bool = False) -> None: await auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True - self._telemetry.connection_ready(self.id) + self._telemetry.connection_ready(self.id, self.creation_time) def validate_session( self, client: Optional[AsyncMongoClient[Any]], session: Optional[AsyncClientSession] @@ -976,11 +977,11 @@ async def checkout( :param handler: A _MongoClientErrorHandler. """ - self._telemetry.checkout_started() + start = self._telemetry.checkout_started() - conn = await self._get_conn(handler=handler) + conn = await self._get_conn(handler=handler, checkout_start=start) - self._telemetry.checkout_succeeded(conn.id) + self._telemetry.checkout_succeeded(conn.id, start) try: async with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1011,12 +1012,13 @@ async def checkout( elif conn.active: await self.checkin(conn) - def _raise_if_not_ready(self, emit_event: bool) -> None: + def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: if self.state != PoolState.READY: if emit_event: self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, + checkout_start, ) details = _get_timeout_details(self.opts) @@ -1025,7 +1027,7 @@ def _raise_if_not_ready(self, emit_event: bool) -> None: ) async def _get_conn( - self, handler: Optional[_MongoClientErrorHandler] = None + self, handler: Optional[_MongoClientErrorHandler] = None, checkout_start: float = 0.0 ) -> AsyncConnection: """Get or create a AsyncConnection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. @@ -1038,6 +1040,7 @@ async def _get_conn( self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, + checkout_start, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1055,7 +1058,7 @@ async def _get_conn( deadline = None async with self.size_cond: - self._raise_if_not_ready(emit_event=True) + self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self.size_cond, timeout): @@ -1063,8 +1066,8 @@ async def _get_conn( # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout() - self._raise_if_not_ready(emit_event=True) + self._raise_wait_queue_timeout(checkout_start) + self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1079,7 +1082,7 @@ async def _get_conn( # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. async with self._max_connecting_cond: - self._raise_if_not_ready(emit_event=False) + self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self._max_connecting_cond, timeout): @@ -1088,8 +1091,8 @@ async def _get_conn( if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout() - self._raise_if_not_ready(emit_event=False) + self._raise_wait_queue_timeout(checkout_start) + self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) try: conn = self.conns.popleft() @@ -1121,6 +1124,7 @@ async def _get_conn( self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, + checkout_start, ) raise @@ -1212,10 +1216,11 @@ async def _perished(self, conn: AsyncConnection) -> bool: return False - def _raise_wait_queue_timeout(self) -> NoReturn: + def _raise_wait_queue_timeout(self, checkout_start: float) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, + checkout_start, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index c0982e247a..8f71861619 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -296,7 +296,7 @@ async def _select_servers_loop( while not server_descriptions: # No suitable servers. if timeout == 0 or now > end_time: - ss.failed(self._error_message(selector)) + ss.failed(self._error_message(selector), self.description) raise ServerSelectionTimeoutError( f"{self._error_message(selector)}, Timeout: {timeout}s, Topology Description: {self.description!r}" ) diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 2458324e6e..8a3c969242 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -2981,6 +2981,15 @@ def _get_server(self) -> Server: operation_id=self._operation_id, ) + def _log_retry(self, is_write: bool) -> None: + log_command_retry( + self._client._topology_id, + self._operation, + self._operation_id, + self._attempt_number, + is_write, + ) + def _write(self) -> T: """Wrapper method for write-type retryable client executions @@ -3004,13 +3013,7 @@ def _write(self) -> T: self._check_last_error() self._retryable = False if self._retrying: - log_command_retry( - self._client._topology_id, - self._operation, - self._operation_id, - self._attempt_number, - is_write=True, - ) + self._log_retry(is_write=True) return self._func(self._session, conn, self._retryable) # type: ignore except PyMongoError as exc: if not self._retryable: @@ -3033,13 +3036,7 @@ def _read(self) -> T: if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: - log_command_retry( - self._client._topology_settings._topology_id, - self._operation, - self._operation_id, - self._attempt_number, - is_write=False, - ) + self._log_retry(is_write=False) return self._func(self._session, self._server, conn, read_pref) # type: ignore diff --git a/pymongo/synchronous/monitor.py b/pymongo/synchronous/monitor.py index b31ef516fb..694e1f8a09 100644 --- a/pymongo/synchronous/monitor.py +++ b/pymongo/synchronous/monitor.py @@ -24,11 +24,10 @@ from pymongo import common, periodic_executor from pymongo._csot import MovingMinimum -from pymongo._telemetry import _HeartbeatTelemetry +from pymongo._telemetry import _HeartbeatTelemetry, log_srv_monitor_failure from pymongo.errors import NetworkTimeout, _OperationCancelled from pymongo.hello import Hello from pymongo.lock import _create_lock -from pymongo.logger import _SDAM_LOGGER, _debug_log from pymongo.periodic_executor import _shutdown_executors from pymongo.pool_options import _is_faas from pymongo.read_preferences import MovingAverage @@ -383,7 +382,7 @@ def _get_seedlist(self) -> Optional[list[tuple[str, Any]]]: # - SRV records must be rescanned every heartbeatFrequencyMS # - Topology must be left unchanged self.request_check() - _debug_log(_SDAM_LOGGER, message="SRV monitor check failed", failure=repr(exc)) + log_srv_monitor_failure(exc) return None else: self._executor.update_interval(max(ttl, common.MIN_SRV_RESCAN_INTERVAL)) diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 6dc7ea47a5..4dce8c73b0 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -128,6 +128,7 @@ def __init__( self.id = id self.is_sdam = is_sdam self.closed = False + self.creation_time = time.monotonic() self.last_checkin_time = time.monotonic() self.performed_handshake = False self.is_writable: bool = False @@ -466,7 +467,7 @@ def authenticate(self, reauthenticate: bool = False) -> None: auth.authenticate(creds, self, reauthenticate=reauthenticate) self.ready = True - self._telemetry.connection_ready(self.id) + self._telemetry.connection_ready(self.id, self.creation_time) def validate_session( self, client: Optional[MongoClient[Any]], session: Optional[ClientSession] @@ -972,11 +973,11 @@ def checkout( :param handler: A _MongoClientErrorHandler. """ - self._telemetry.checkout_started() + start = self._telemetry.checkout_started() - conn = self._get_conn(handler=handler) + conn = self._get_conn(handler=handler, checkout_start=start) - self._telemetry.checkout_succeeded(conn.id) + self._telemetry.checkout_succeeded(conn.id, start) try: with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1007,12 +1008,13 @@ def checkout( elif conn.active: self.checkin(conn) - def _raise_if_not_ready(self, emit_event: bool) -> None: + def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: if self.state != PoolState.READY: if emit_event: self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, + checkout_start, ) details = _get_timeout_details(self.opts) @@ -1020,7 +1022,9 @@ def _raise_if_not_ready(self, emit_event: bool) -> None: self.address, AutoReconnect("connection pool paused"), timeout_details=details ) - def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connection: + def _get_conn( + self, handler: Optional[_MongoClientErrorHandler] = None, checkout_start: float = 0.0 + ) -> Connection: """Get or create a Connection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. # See test.test_client:TestClient.test_fork for an example of @@ -1032,6 +1036,7 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, + checkout_start, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1049,7 +1054,7 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne deadline = None with self.size_cond: - self._raise_if_not_ready(emit_event=True) + self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self.size_cond, timeout): @@ -1057,8 +1062,8 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout() - self._raise_if_not_ready(emit_event=True) + self._raise_wait_queue_timeout(checkout_start) + self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1073,7 +1078,7 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. with self._max_connecting_cond: - self._raise_if_not_ready(emit_event=False) + self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self._max_connecting_cond, timeout): @@ -1082,8 +1087,8 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout() - self._raise_if_not_ready(emit_event=False) + self._raise_wait_queue_timeout(checkout_start) + self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) try: conn = self.conns.popleft() @@ -1115,6 +1120,7 @@ def _get_conn(self, handler: Optional[_MongoClientErrorHandler] = None) -> Conne self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, + checkout_start, ) raise @@ -1206,10 +1212,11 @@ def _perished(self, conn: Connection) -> bool: return False - def _raise_wait_queue_timeout(self) -> NoReturn: + def _raise_wait_queue_timeout(self, checkout_start: float) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, + checkout_start, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index 483f89d901..37181c00b0 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -296,7 +296,7 @@ def _select_servers_loop( while not server_descriptions: # No suitable servers. if timeout == 0 or now > end_time: - ss.failed(self._error_message(selector)) + ss.failed(self._error_message(selector), self.description) raise ServerSelectionTimeoutError( f"{self._error_message(selector)}, Timeout: {timeout}s, Topology Description: {self.description!r}" ) From 9cca32b0ffe46cf778882e5354ff713a98229252 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 15:30:01 -0500 Subject: [PATCH 19/20] PYTHON-5846 Address further PR review comments - Restore original _raise_if_not_ready and _get_conn parameter names/order - Add docstrings to all public methods on _CmapTelemetry and _SdamTelemetry - Remove redundant _publish/_listener/_events attrs from Server.__init__ - Avoid creating two _ServerSelectionTelemetry per select_server call by threading ss through select_servers and _select_server - Make _ServerSelectionTelemetry.failed topology_description a required arg - Move SRV monitor failure log to telemetry.py (log_srv_monitor_failure) --- pymongo/_telemetry.py | 21 +++++++++++++++++---- pymongo/asynchronous/pool.py | 32 ++++++++++++++++---------------- pymongo/asynchronous/server.py | 8 ++------ pymongo/asynchronous/topology.py | 26 ++++++++++++-------------- pymongo/synchronous/pool.py | 32 ++++++++++++++++---------------- pymongo/synchronous/server.py | 8 ++------ pymongo/synchronous/topology.py | 26 ++++++++++++-------------- 7 files changed, 77 insertions(+), 76 deletions(-) diff --git a/pymongo/_telemetry.py b/pymongo/_telemetry.py index 29d116faaa..67cd6e4987 100644 --- a/pymongo/_telemetry.py +++ b/pymongo/_telemetry.py @@ -245,6 +245,7 @@ def _emit_log(self, message: _ConnectionStatusMessage, **extra: Any) -> None: ) def pool_created(self, non_default_options: dict[str, Any]) -> None: + """Emit the pool created log entry and APM event.""" # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.POOL_CREATED, **non_default_options) @@ -253,6 +254,7 @@ def pool_created(self, non_default_options: dict[str, Any]) -> None: self._listeners.publish_pool_created(self._address, non_default_options) def pool_ready(self) -> None: + """Emit the pool ready log entry and APM event.""" # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.POOL_READY) @@ -261,6 +263,7 @@ def pool_ready(self) -> None: self._listeners.publish_pool_ready(self._address) def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bool) -> None: + """Emit the pool cleared log entry and APM event.""" # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.POOL_CLEARED, serviceId=service_id) @@ -273,6 +276,7 @@ def pool_cleared(self, service_id: Optional[ObjectId], interrupt_connections: bo ) def pool_closed(self) -> None: + """Emit the pool closed log entry and APM event.""" # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.POOL_CLOSED) @@ -281,6 +285,7 @@ def pool_closed(self) -> None: self._listeners.publish_pool_closed(self._address) def connection_created(self, conn_id: int) -> None: + """Emit the connection created log entry and APM event.""" # Log before publishing to prevent potential listener preemption in tests. if self._should_log: self._emit_log(_ConnectionStatusMessage.CONN_CREATED, driverConnectionId=conn_id) @@ -289,6 +294,7 @@ def connection_created(self, conn_id: int) -> None: self._listeners.publish_connection_created(self._address, conn_id) def connection_ready(self, conn_id: int, creation_time: float) -> None: + """Emit the connection ready log entry and APM event.""" should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: @@ -306,6 +312,7 @@ def connection_ready(self, conn_id: int, creation_time: float) -> None: self._listeners.publish_connection_ready(self._address, conn_id, duration) def connection_closed(self, conn_id: int, reason: str) -> None: + """Emit the connection closed log entry and APM event.""" should_log = self._should_log should_publish = self._should_publish if should_publish: @@ -330,6 +337,7 @@ def checkout_started(self) -> float: return start def checkout_succeeded(self, conn_id: int, start: float) -> None: + """Emit the checkout succeeded log entry and APM event.""" should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: @@ -346,6 +354,7 @@ def checkout_succeeded(self, conn_id: int, start: float) -> None: ) def checkout_failed(self, reason: str, error: str, start: float) -> None: + """Emit the checkout failed log entry and APM event.""" should_log = self._should_log should_publish = self._should_publish if not should_log and not should_publish: @@ -363,6 +372,7 @@ def checkout_failed(self, reason: str, error: str, start: float) -> None: ) def checked_in(self, conn_id: int) -> None: + """Emit the connection checked-in log entry and APM event.""" if self._should_publish: assert self._listeners is not None self._listeners.publish_connection_checked_in(self._address, conn_id) @@ -522,6 +532,7 @@ def _emit_log(self, message: _SDAMStatusMessage, **extra: Any) -> None: ) def topology_opened(self) -> None: + """Emit the topology opened log entry and APM event.""" if self._should_log: self._emit_log(_SDAMStatusMessage.START_TOPOLOGY) if self._publish_tp: @@ -529,6 +540,7 @@ def topology_opened(self) -> None: self._enqueue(self._listeners.publish_topology_opened, (self._topology_id,)) def topology_description_changed(self, old_td: Any, new_td: Any) -> None: + """Emit the topology description changed APM event and log entry.""" if self._publish_tp: assert self._listeners is not None self._enqueue( @@ -560,6 +572,7 @@ def topology_closed(self, old_td: Any, new_td: Any) -> None: self._emit_log(_SDAMStatusMessage.STOP_TOPOLOGY) def server_opened(self, address: _Address) -> None: + """Emit the server opened log entry and APM event.""" if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_opened, (address, self._topology_id)) @@ -571,6 +584,7 @@ def server_opened(self, address: _Address) -> None: ) def server_description_changed(self, sd_old: Any, sd_new: Any, address: _Address) -> None: + """Emit the server description changed APM event.""" if self._publish_server: assert self._listeners is not None self._enqueue( @@ -579,6 +593,7 @@ def server_description_changed(self, sd_old: Any, sd_new: Any, address: _Address ) def server_closed(self, address: _Address) -> None: + """Emit the server closed log entry and APM event.""" if self._publish_server: assert self._listeners is not None self._enqueue(self._listeners.publish_server_closed, (address, self._topology_id)) @@ -653,14 +668,12 @@ def waiting(self, remaining_time_ms: int) -> None: remainingTimeMS=remaining_time_ms, ) - def failed(self, failure: str, topology_description: Any = None) -> None: + def failed(self, failure: str, topology_description: Any) -> None: """Emit the server selection FAILED log entry with the current topology description.""" if self._should_log: self._emit_log( _ServerSelectionStatusMessage.FAILED, - topology_description - if topology_description is not None - else self._topology_description, + topology_description, failure=failure, ) diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index c7a5a3e7d8..b3cfae8a45 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -977,11 +977,11 @@ async def checkout( :param handler: A _MongoClientErrorHandler. """ - start = self._telemetry.checkout_started() + checkout_started_time = self._telemetry.checkout_started() - conn = await self._get_conn(handler=handler, checkout_start=start) + conn = await self._get_conn(checkout_started_time, handler=handler) - self._telemetry.checkout_succeeded(conn.id, start) + self._telemetry.checkout_succeeded(conn.id, checkout_started_time) try: async with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1012,13 +1012,13 @@ async def checkout( elif conn.active: await self.checkin(conn) - def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: + def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> None: if self.state != PoolState.READY: if emit_event: self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - checkout_start, + checkout_started_time, ) details = _get_timeout_details(self.opts) @@ -1027,7 +1027,7 @@ def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: ) async def _get_conn( - self, handler: Optional[_MongoClientErrorHandler] = None, checkout_start: float = 0.0 + self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None ) -> AsyncConnection: """Get or create a AsyncConnection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. @@ -1040,7 +1040,7 @@ async def _get_conn( self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, - checkout_start, + checkout_started_time, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1058,7 +1058,7 @@ async def _get_conn( deadline = None async with self.size_cond: - self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) + self._raise_if_not_ready(checkout_started_time, emit_event=True) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self.size_cond, timeout): @@ -1066,8 +1066,8 @@ async def _get_conn( # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout(checkout_start) - self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) + self._raise_wait_queue_timeout(checkout_started_time) + self._raise_if_not_ready(checkout_started_time, emit_event=True) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1082,7 +1082,7 @@ async def _get_conn( # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. async with self._max_connecting_cond: - self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) + self._raise_if_not_ready(checkout_started_time, emit_event=False) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not await _async_cond_wait(self._max_connecting_cond, timeout): @@ -1091,8 +1091,8 @@ async def _get_conn( if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout(checkout_start) - self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) + self._raise_wait_queue_timeout(checkout_started_time) + self._raise_if_not_ready(checkout_started_time, emit_event=False) try: conn = self.conns.popleft() @@ -1124,7 +1124,7 @@ async def _get_conn( self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - checkout_start, + checkout_started_time, ) raise @@ -1216,11 +1216,11 @@ async def _perished(self, conn: AsyncConnection) -> bool: return False - def _raise_wait_queue_timeout(self, checkout_start: float) -> NoReturn: + def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, - checkout_start, + checkout_started_time, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: diff --git a/pymongo/asynchronous/server.py b/pymongo/asynchronous/server.py index dff297dc58..85b7fa1ad8 100644 --- a/pymongo/asynchronous/server.py +++ b/pymongo/asynchronous/server.py @@ -64,12 +64,8 @@ def __init__( self._pool = pool self._monitor = monitor self._topology_id = topology_id - self._publish = listeners is not None and listeners.enabled_for_server - self._listener = listeners - self._events = None - if self._publish: - self._events = events() # type: ignore[misc] - self._sdam = _SdamTelemetry(topology_id, listeners, self._events) # type: ignore[arg-type] + _events = events() if listeners is not None and listeners.enabled_for_server else None # type: ignore[misc] + self._sdam = _SdamTelemetry(topology_id, listeners, _events) # type: ignore[arg-type] async def open(self) -> None: """Start monitoring, or restart after a fork. diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index 8f71861619..f3032a6775 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -227,7 +227,7 @@ async def select_servers( address: Optional[_Address] = None, operation_id: Optional[int] = None, deprioritized_servers: Optional[list[Server]] = None, - ) -> list[Server]: + ) -> tuple[list[Server], _ServerSelectionTelemetry]: """Return a list of Servers matching selector, or time out. :param selector: function that takes a list of Servers and returns @@ -253,7 +253,7 @@ async def select_servers( await self.cleanup_monitors() async with self._lock: - server_descriptions = await self._select_servers_loop( + server_descriptions, ss = await self._select_servers_loop( selector, server_timeout, operation, @@ -264,7 +264,7 @@ async def select_servers( return [ cast(Server, self.get_server_by_address(sd.address)) for sd in server_descriptions - ] + ], ss async def _select_servers_loop( self, @@ -274,7 +274,7 @@ async def _select_servers_loop( operation_id: Optional[int], address: Optional[_Address], deprioritized_servers: Optional[list[Server]] = None, - ) -> list[ServerDescription]: + ) -> tuple[list[ServerDescription], _ServerSelectionTelemetry]: """select_servers() guts. Hold the lock when calling this.""" now = time.monotonic() end_time = now + timeout @@ -320,7 +320,7 @@ async def _select_servers_loop( ) self._description.check_compatible() - return server_descriptions + return server_descriptions, ss async def _select_server( self, @@ -330,8 +330,8 @@ async def _select_server( address: Optional[_Address] = None, deprioritized_servers: Optional[list[Server]] = None, operation_id: Optional[int] = None, - ) -> Server: - servers = await self.select_servers( + ) -> tuple[Server, _ServerSelectionTelemetry]: + servers, ss = await self.select_servers( selector, operation, server_selection_timeout, @@ -340,12 +340,12 @@ async def _select_server( deprioritized_servers, ) if len(servers) == 1: - return servers[0] + return servers[0], ss server1, server2 = random.sample(servers, 2) if server1.pool.operation_count <= server2.pool.operation_count: - return server1 + return server1, ss else: - return server2 + return server2, ss async def select_server( self, @@ -357,7 +357,7 @@ async def select_server( operation_id: Optional[int] = None, ) -> Server: """Like select_servers, but choose a random server if several match.""" - server = await self._select_server( + server, ss = await self._select_server( selector, operation, server_selection_timeout, @@ -367,9 +367,7 @@ async def select_server( ) if _csot.get_timeout(): _csot.set_rtt(server.description.min_round_trip_time) - _ServerSelectionTelemetry( - self._topology_id, selector, operation, operation_id, self.description - ).succeeded(server.description.address[0], server.description.address[1]) + ss.succeeded(server.description.address[0], server.description.address[1]) return server async def select_server_by_address( diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 4dce8c73b0..9159ff704a 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -973,11 +973,11 @@ def checkout( :param handler: A _MongoClientErrorHandler. """ - start = self._telemetry.checkout_started() + checkout_started_time = self._telemetry.checkout_started() - conn = self._get_conn(handler=handler, checkout_start=start) + conn = self._get_conn(checkout_started_time, handler=handler) - self._telemetry.checkout_succeeded(conn.id, start) + self._telemetry.checkout_succeeded(conn.id, checkout_started_time) try: with self.lock: self.active_contexts.add(conn.cancel_context) @@ -1008,13 +1008,13 @@ def checkout( elif conn.active: self.checkin(conn) - def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: + def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) -> None: if self.state != PoolState.READY: if emit_event: self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - checkout_start, + checkout_started_time, ) details = _get_timeout_details(self.opts) @@ -1023,7 +1023,7 @@ def _raise_if_not_ready(self, emit_event: bool, checkout_start: float) -> None: ) def _get_conn( - self, handler: Optional[_MongoClientErrorHandler] = None, checkout_start: float = 0.0 + self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None ) -> Connection: """Get or create a Connection. Can raise ConnectionFailure.""" # We use the pid here to avoid issues with fork / multiprocessing. @@ -1036,7 +1036,7 @@ def _get_conn( self._telemetry.checkout_failed( "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED, - checkout_start, + checkout_started_time, ) raise _PoolClosedError( "Attempted to check out a connection from closed connection pool" @@ -1054,7 +1054,7 @@ def _get_conn( deadline = None with self.size_cond: - self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) + self._raise_if_not_ready(checkout_started_time, emit_event=True) while not (self.requests < self.max_pool_size): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self.size_cond, timeout): @@ -1062,8 +1062,8 @@ def _get_conn( # timeout doesn't consume the condition. if self.requests < self.max_pool_size: self.size_cond.notify() - self._raise_wait_queue_timeout(checkout_start) - self._raise_if_not_ready(emit_event=True, checkout_start=checkout_start) + self._raise_wait_queue_timeout(checkout_started_time) + self._raise_if_not_ready(checkout_started_time, emit_event=True) self.requests += 1 # We've now acquired the semaphore and must release it on error. @@ -1078,7 +1078,7 @@ def _get_conn( # CMAP: we MUST wait for either maxConnecting OR for a socket # to be checked back into the pool. with self._max_connecting_cond: - self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) + self._raise_if_not_ready(checkout_started_time, emit_event=False) while not (self.conns or self._pending < self._max_connecting): timeout = deadline - time.monotonic() if deadline else None if not _cond_wait(self._max_connecting_cond, timeout): @@ -1087,8 +1087,8 @@ def _get_conn( if self.conns or self._pending < self._max_connecting: self._max_connecting_cond.notify() emitted_event = True - self._raise_wait_queue_timeout(checkout_start) - self._raise_if_not_ready(emit_event=False, checkout_start=checkout_start) + self._raise_wait_queue_timeout(checkout_started_time) + self._raise_if_not_ready(checkout_started_time, emit_event=False) try: conn = self.conns.popleft() @@ -1120,7 +1120,7 @@ def _get_conn( self._telemetry.checkout_failed( "An error occurred while trying to establish a new connection", ConnectionCheckOutFailedReason.CONN_ERROR, - checkout_start, + checkout_started_time, ) raise @@ -1212,11 +1212,11 @@ def _perished(self, conn: Connection) -> bool: return False - def _raise_wait_queue_timeout(self, checkout_start: float) -> NoReturn: + def _raise_wait_queue_timeout(self, checkout_started_time: float) -> NoReturn: self._telemetry.checkout_failed( "Wait queue timeout elapsed without a connection becoming available", ConnectionCheckOutFailedReason.TIMEOUT, - checkout_start, + checkout_started_time, ) timeout = _csot.get_timeout() or self.opts.wait_queue_timeout if self.opts.load_balanced: diff --git a/pymongo/synchronous/server.py b/pymongo/synchronous/server.py index 8564b84750..9424b9794a 100644 --- a/pymongo/synchronous/server.py +++ b/pymongo/synchronous/server.py @@ -64,12 +64,8 @@ def __init__( self._pool = pool self._monitor = monitor self._topology_id = topology_id - self._publish = listeners is not None and listeners.enabled_for_server - self._listener = listeners - self._events = None - if self._publish: - self._events = events() # type: ignore[misc] - self._sdam = _SdamTelemetry(topology_id, listeners, self._events) # type: ignore[arg-type] + _events = events() if listeners is not None and listeners.enabled_for_server else None # type: ignore[misc] + self._sdam = _SdamTelemetry(topology_id, listeners, _events) # type: ignore[arg-type] def open(self) -> None: """Start monitoring, or restart after a fork. diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index 37181c00b0..5e868046c3 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -227,7 +227,7 @@ def select_servers( address: Optional[_Address] = None, operation_id: Optional[int] = None, deprioritized_servers: Optional[list[Server]] = None, - ) -> list[Server]: + ) -> tuple[list[Server], _ServerSelectionTelemetry]: """Return a list of Servers matching selector, or time out. :param selector: function that takes a list of Servers and returns @@ -253,7 +253,7 @@ def select_servers( self.cleanup_monitors() with self._lock: - server_descriptions = self._select_servers_loop( + server_descriptions, ss = self._select_servers_loop( selector, server_timeout, operation, @@ -264,7 +264,7 @@ def select_servers( return [ cast(Server, self.get_server_by_address(sd.address)) for sd in server_descriptions - ] + ], ss def _select_servers_loop( self, @@ -274,7 +274,7 @@ def _select_servers_loop( operation_id: Optional[int], address: Optional[_Address], deprioritized_servers: Optional[list[Server]] = None, - ) -> list[ServerDescription]: + ) -> tuple[list[ServerDescription], _ServerSelectionTelemetry]: """select_servers() guts. Hold the lock when calling this.""" now = time.monotonic() end_time = now + timeout @@ -320,7 +320,7 @@ def _select_servers_loop( ) self._description.check_compatible() - return server_descriptions + return server_descriptions, ss def _select_server( self, @@ -330,8 +330,8 @@ def _select_server( address: Optional[_Address] = None, deprioritized_servers: Optional[list[Server]] = None, operation_id: Optional[int] = None, - ) -> Server: - servers = self.select_servers( + ) -> tuple[Server, _ServerSelectionTelemetry]: + servers, ss = self.select_servers( selector, operation, server_selection_timeout, @@ -340,12 +340,12 @@ def _select_server( deprioritized_servers, ) if len(servers) == 1: - return servers[0] + return servers[0], ss server1, server2 = random.sample(servers, 2) if server1.pool.operation_count <= server2.pool.operation_count: - return server1 + return server1, ss else: - return server2 + return server2, ss def select_server( self, @@ -357,7 +357,7 @@ def select_server( operation_id: Optional[int] = None, ) -> Server: """Like select_servers, but choose a random server if several match.""" - server = self._select_server( + server, ss = self._select_server( selector, operation, server_selection_timeout, @@ -367,9 +367,7 @@ def select_server( ) if _csot.get_timeout(): _csot.set_rtt(server.description.min_round_trip_time) - _ServerSelectionTelemetry( - self._topology_id, selector, operation, operation_id, self.description - ).succeeded(server.description.address[0], server.description.address[1]) + ss.succeeded(server.description.address[0], server.description.address[1]) return server def select_server_by_address( From 9fab1db0010cbdc8b6f973c1272d84ae4233a327 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 1 Jul 2026 16:46:54 -0500 Subject: [PATCH 20/20] PYTHON-5846 Fix test callers to unpack (servers, ss) tuple from select_servers/_select_server --- test/asynchronous/test_mongos_load_balancing.py | 2 +- test/asynchronous/test_read_preferences.py | 6 +++--- test/asynchronous/utils.py | 10 +++------- test/asynchronous/utils_selection_tests.py | 4 ++-- test/test_mongos_load_balancing.py | 2 +- test/test_read_preferences.py | 4 +++- test/utils.py | 8 +++----- test/utils_selection_tests.py | 4 ++-- 8 files changed, 18 insertions(+), 22 deletions(-) diff --git a/test/asynchronous/test_mongos_load_balancing.py b/test/asynchronous/test_mongos_load_balancing.py index 028c9a5cdb..bb89e25842 100644 --- a/test/asynchronous/test_mongos_load_balancing.py +++ b/test/asynchronous/test_mongos_load_balancing.py @@ -61,7 +61,7 @@ async def do_simple_op(client, ntasks): async def writable_addresses(topology): return { server.description.address - for server in await topology.select_servers(writable_server_selector, _Op.TEST) + for server in (await topology.select_servers(writable_server_selector, _Op.TEST))[0] } diff --git a/test/asynchronous/test_read_preferences.py b/test/asynchronous/test_read_preferences.py index 9f92a39920..17c7a4a996 100644 --- a/test/asynchronous/test_read_preferences.py +++ b/test/asynchronous/test_read_preferences.py @@ -310,9 +310,9 @@ async def test_nearest(self): not_used = data_members.difference(used) latencies = ", ".join( "%s: %sms" % (server.description.address, server.description.round_trip_time) - for server in await (await c._get_topology()).select_servers( - readable_server_selector, _Op.TEST - ) + for server in ( + await (await c._get_topology()).select_servers(readable_server_selector, _Op.TEST) + )[0] ) self.assertFalse( diff --git a/test/asynchronous/utils.py b/test/asynchronous/utils.py index 5842224220..1e8ece6c25 100644 --- a/test/asynchronous/utils.py +++ b/test/asynchronous/utils.py @@ -43,18 +43,14 @@ async def async_get_pool(client: AsyncMongoClient) -> Pool: """Get the standalone, primary, or mongos pool.""" topology = await client._get_topology() - server = await topology._select_server(writable_server_selector, _Op.TEST) + server, _ = await topology._select_server(writable_server_selector, _Op.TEST) return server.pool async def async_get_pools(client: AsyncMongoClient) -> list[Pool]: """Get all pools.""" - return [ - server.pool - for server in await (await client._get_topology()).select_servers( - any_server_selector, _Op.TEST - ) - ] + servers, _ = await (await client._get_topology()).select_servers(any_server_selector, _Op.TEST) + return [server.pool for server in servers] async def async_wait_until(predicate, success_description, timeout=10): diff --git a/test/asynchronous/utils_selection_tests.py b/test/asynchronous/utils_selection_tests.py index eec6dc9719..81df672fb6 100644 --- a/test/asynchronous/utils_selection_tests.py +++ b/test/asynchronous/utils_selection_tests.py @@ -150,13 +150,13 @@ async def run_scenario(self): return - actual_suitable_s = await top_suitable.select_servers( + actual_suitable_s, _ = await top_suitable.select_servers( pref, _Op.TEST, server_selection_timeout=0, deprioritized_servers=top_suitable_deprioritized_servers, ) - actual_latency_s = await top_latency.select_servers( + actual_latency_s, _ = await top_latency.select_servers( pref, _Op.TEST, server_selection_timeout=0, diff --git a/test/test_mongos_load_balancing.py b/test/test_mongos_load_balancing.py index 362f67925c..e7d284f805 100644 --- a/test/test_mongos_load_balancing.py +++ b/test/test_mongos_load_balancing.py @@ -61,7 +61,7 @@ def do_simple_op(client, ntasks): def writable_addresses(topology): return { server.description.address - for server in topology.select_servers(writable_server_selector, _Op.TEST) + for server in (topology.select_servers(writable_server_selector, _Op.TEST))[0] } diff --git a/test/test_read_preferences.py b/test/test_read_preferences.py index 27c8d0704a..9e1b0fde25 100644 --- a/test/test_read_preferences.py +++ b/test/test_read_preferences.py @@ -292,7 +292,9 @@ def test_nearest(self): not_used = data_members.difference(used) latencies = ", ".join( "%s: %sms" % (server.description.address, server.description.round_trip_time) - for server in (c._get_topology()).select_servers(readable_server_selector, _Op.TEST) + for server in ((c._get_topology()).select_servers(readable_server_selector, _Op.TEST))[ + 0 + ] ) self.assertFalse( diff --git a/test/utils.py b/test/utils.py index 54b7e351e3..77d8549d8f 100644 --- a/test/utils.py +++ b/test/utils.py @@ -43,16 +43,14 @@ def get_pool(client: MongoClient) -> Pool: """Get the standalone, primary, or mongos pool.""" topology = client._get_topology() - server = topology._select_server(writable_server_selector, _Op.TEST) + server, _ = topology._select_server(writable_server_selector, _Op.TEST) return server.pool def get_pools(client: MongoClient) -> list[Pool]: """Get all pools.""" - return [ - server.pool - for server in (client._get_topology()).select_servers(any_server_selector, _Op.TEST) - ] + servers, _ = (client._get_topology()).select_servers(any_server_selector, _Op.TEST) + return [server.pool for server in servers] def wait_until(predicate, success_description, timeout=10): diff --git a/test/utils_selection_tests.py b/test/utils_selection_tests.py index 3754c74674..18e84c103a 100644 --- a/test/utils_selection_tests.py +++ b/test/utils_selection_tests.py @@ -150,13 +150,13 @@ def run_scenario(self): return - actual_suitable_s = top_suitable.select_servers( + actual_suitable_s, _ = top_suitable.select_servers( pref, _Op.TEST, server_selection_timeout=0, deprioritized_servers=top_suitable_deprioritized_servers, ) - actual_latency_s = top_latency.select_servers( + actual_latency_s, _ = top_latency.select_servers( pref, _Op.TEST, server_selection_timeout=0,