from __future__ import annotations import atexit import logging import os import threading from dataclasses import dataclass from typing import Any, Optional from .version import __version__ SDK_NAME = "starry-client-sdk" DEFAULT_SERVICE_NAME = "starry-python-sdk-consumer" DEFAULT_OTLP_ENDPOINT = "http://host.docker.internal:4318" _LOCK = threading.Lock() _HANDLES: Optional["TelemetryHandles"] = None _METER_PROVIDER: Any = None _LOGGER_PROVIDER: Any = None class _NoopCounter: def add(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None: return None class _NoopHistogram: def record(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None: return None @dataclass(frozen=True) class TelemetryHandles: request_counter: Any error_counter: Any duration_histogram: Any logger: logging.Logger enabled: bool def _truthy(value: str) -> bool: return value.strip().lower() not in {"0", "false", "f", "no", "n", "off", "disabled"} def telemetry_enabled() -> bool: return _truthy(os.getenv("STARRYSDK_TELEMETRY_ENABLED", "true")) def _base_endpoint() -> str: return os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", DEFAULT_OTLP_ENDPOINT).rstrip("/") def _metrics_endpoint() -> str: return os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", f"{_base_endpoint()}/v1/metrics") def _logs_endpoint() -> str: return os.getenv("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT", f"{_base_endpoint()}/v1/logs") def _noop_handles() -> TelemetryHandles: return TelemetryHandles( request_counter=_NoopCounter(), error_counter=_NoopCounter(), duration_histogram=_NoopHistogram(), logger=logging.getLogger("starry_client_sdk"), enabled=False, ) def configure_telemetry(*, service_name: Optional[str] = None, sdk_version: str = __version__) -> TelemetryHandles: """Configure default-on, non-blocking telemetry for this SDK. This demo intentionally configures local OpenTelemetry providers owned by the SDK, so the SDK can be observable by default without overwriting an application's global OpenTelemetry configuration. Export failures must never break business calls; if setup fails, the SDK falls back to no-op meters. """ global _HANDLES, _METER_PROVIDER, _LOGGER_PROVIDER with _LOCK: if _HANDLES is not None: return _HANDLES logger = logging.getLogger("starry_client_sdk") logger.setLevel(logging.INFO) if not telemetry_enabled(): _HANDLES = _noop_handles() return _HANDLES try: from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.instrumentation.logging.handler import LoggingHandler from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource service_name = service_name or os.getenv("STARRYSDK_SERVICE_NAME", DEFAULT_SERVICE_NAME) export_interval_ms = int(os.getenv("STARRYSDK_METRIC_EXPORT_INTERVAL_MS", "5000")) resource = Resource.create( { "service.name": service_name, "sdk.name": SDK_NAME, "sdk.version": sdk_version, "telemetry.source": "client-sdk", } ) metric_exporter = OTLPMetricExporter(endpoint=_metrics_endpoint()) metric_reader = PeriodicExportingMetricReader( metric_exporter, export_interval_millis=export_interval_ms, ) _METER_PROVIDER = MeterProvider(resource=resource, metric_readers=[metric_reader]) meter = _METER_PROVIDER.get_meter(SDK_NAME, sdk_version) _LOGGER_PROVIDER = LoggerProvider(resource=resource) log_exporter = OTLPLogExporter(endpoint=_logs_endpoint()) _LOGGER_PROVIDER.add_log_record_processor(BatchLogRecordProcessor(log_exporter)) # Attach only one OTLP handler to the SDK logger. Do not attach to root logger. if not any(getattr(handler, "_starry_sdk_otel_handler", False) for handler in logger.handlers): otel_handler = LoggingHandler(level=logging.INFO, logger_provider=_LOGGER_PROVIDER) setattr(otel_handler, "_starry_sdk_otel_handler", True) logger.addHandler(otel_handler) _HANDLES = TelemetryHandles( request_counter=meter.create_counter( "starry.sdk.client.requests", unit="1", description="Total SDK client calls.", ), error_counter=meter.create_counter( "starry.sdk.client.errors", unit="1", description="Total SDK client calls ending in an exception.", ), duration_histogram=meter.create_histogram( "starry.sdk.client.request.duration.ms", unit="ms", description="SDK client call latency in milliseconds.", ), logger=logger, enabled=True, ) atexit.register(shutdown_telemetry) return _HANDLES except Exception: # Telemetry must not break SDK business behavior. logging.getLogger("starry_client_sdk.telemetry").debug("SDK telemetry setup failed", exc_info=True) _HANDLES = _noop_handles() return _HANDLES def force_flush(timeout_millis: int = 5000) -> None: """Flush telemetry buffers. Useful in short-lived CLI/demo processes.""" for provider in (_METER_PROVIDER, _LOGGER_PROVIDER): if provider is None: continue try: provider.force_flush(timeout_millis=timeout_millis) except Exception: logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry force_flush failed", exc_info=True) def shutdown_telemetry() -> None: for provider in (_METER_PROVIDER, _LOGGER_PROVIDER): if provider is None: continue try: provider.shutdown() except Exception: logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry shutdown failed", exc_info=True)