From 024bbfc79fd314341abeff75c4760c75ab1f0ff1 Mon Sep 17 00:00:00 2001 From: xkm Date: Thu, 30 Apr 2026 00:48:34 +0800 Subject: [PATCH] demo with sdk and backend --- .gitignore | 1 + demo-app/Dockerfile | 15 ++ demo-app/main.py | 46 ++++++ demo-app/requirements.txt | 1 + docker-compose.yml | 123 +++++++++++++++ fluent-bit.conf | 69 +++++++++ starry-sdk/README.md | 12 ++ starry-sdk/pyproject.toml | 21 +++ starry-sdk/starry_client_sdk/__init__.py | 11 ++ starry-sdk/starry_client_sdk/client.py | 125 +++++++++++++++ starry-sdk/starry_client_sdk/telemetry.py | 178 ++++++++++++++++++++++ starry-sdk/starry_client_sdk/version.py | 1 + 12 files changed, 603 insertions(+) create mode 100644 .gitignore create mode 100644 demo-app/Dockerfile create mode 100644 demo-app/main.py create mode 100644 demo-app/requirements.txt create mode 100644 docker-compose.yml create mode 100644 fluent-bit.conf create mode 100644 starry-sdk/README.md create mode 100644 starry-sdk/pyproject.toml create mode 100644 starry-sdk/starry_client_sdk/__init__.py create mode 100644 starry-sdk/starry_client_sdk/client.py create mode 100644 starry-sdk/starry_client_sdk/telemetry.py create mode 100644 starry-sdk/starry_client_sdk/version.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/demo-app/Dockerfile b/demo-app/Dockerfile new file mode 100644 index 0000000..ee2a453 --- /dev/null +++ b/demo-app/Dockerfile @@ -0,0 +1,15 @@ +FROM docker.1ms.run/python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY starry-sdk /app/starry-sdk +COPY demo-app/requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt && \ + pip install --no-cache-dir -e /app/starry-sdk + +COPY demo-app/main.py /app/main.py + +CMD ["python", "/app/main.py"] diff --git a/demo-app/main.py b/demo-app/main.py new file mode 100644 index 0000000..630572f --- /dev/null +++ b/demo-app/main.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import os +import time + +from starry_client_sdk import StarryClient, StarryNotFoundError, force_flush + + +def _bool_env(name: str, default: bool) -> bool: + raw = os.getenv(name) + if raw is None: + return default + return raw.strip().lower() not in {"0", "false", "f", "no", "n", "off"} + + +def _paths() -> list[str]: + raw = os.getenv("DEMO_PATHS", ",__sdk_demo_not_found__") + return [item.strip() for item in raw.split(",")] + + +def main() -> None: + client = StarryClient() + loop = _bool_env("DEMO_LOOP", True) + interval_seconds = float(os.getenv("DEMO_INTERVAL_SECONDS", "5")) + paths = _paths() + + while True: + for path in paths: + try: + text = client.get(path) + print(f"OK path={path!r} bytes={len(text)}") + except StarryNotFoundError as exc: + print(f"NOT_FOUND path={path!r} error={exc}") + except Exception as exc: + print(f"ERROR path={path!r} type={exc.__class__.__name__} error={exc}") + + # Demo app is short-looping, so flush to make logs/metrics visible quickly. + force_flush() + + if not loop: + break + time.sleep(interval_seconds) + + +if __name__ == "__main__": + main() diff --git a/demo-app/requirements.txt b/demo-app/requirements.txt new file mode 100644 index 0000000..30d3758 --- /dev/null +++ b/demo-app/requirements.txt @@ -0,0 +1 @@ +# SDK dependencies are declared in starry-sdk/pyproject.toml. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b97f001 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,123 @@ +services: + fluentbit: + depends_on: + vlagent: + condition: service_started + required: true + image: cr.fluentbit.io/fluent/fluent-bit:3.1.7 + restart: always + networks: + default: null + ports: + - 4318:4318 + volumes: + - ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf + victorialogs-1: + command: + - -storageDataPath=/vlogs + - -loggerFormat=json + - -datadog.streamFields=service,hostname,ddsource + - -journald.streamFields=_HOSTNAME,_SYSTEMD_UNIT,_PID + - -journald.ignoreFields=MESSAGE_ID,INVOCATION_ID,USER_INVOCATION_ID + - -journald.ignoreFields=_BOOT_ID,_MACHINE_ID,_SYSTEMD_INVOCATION_ID,_STREAM_ID,_UID + deploy: + replicas: 1 + healthcheck: + test: + - CMD + - wget + - -qO- + - http://127.0.0.1:9428/health + timeout: 1s + interval: 1s + retries: 10 + image: docker.io/victoriametrics/victoria-logs:v1.50.0 + networks: + default: null + ports: + - mode: ingress + target: 9428 + published: "9428" + protocol: tcp + volumes: + - type: volume + source: victorialogs-1 + target: /vlogs + volume: {} + victoriametrics: + command: + - -storageDataPath=/vmsingle + - -loggerFormat=json + healthcheck: + test: + - CMD + - wget + - -qO- + - http://127.0.0.1:8428/health + timeout: 1s + interval: 1s + retries: 10 + image: victoriametrics/victoria-metrics:v1.132.0 + networks: + default: null + ports: + - 8428:8428 + volumes: + - type: volume + source: victoriametrics + target: /vmsingle + volume: {} + vlagent: + command: + - --remoteWrite.tmpDataPath=/vlagent + - --remoteWrite.url=http://victorialogs-1:9428/insert/native + - --syslog.listenAddr.tcp=0.0.0.0:8094 + depends_on: + victorialogs-1: + condition: service_healthy + required: true + healthcheck: + test: + - CMD + - wget + - -qO- + - http://127.0.0.1:9429/health + timeout: 1s + interval: 1s + retries: 10 + image: victoriametrics/vlagent:v1.50.0 + networks: + default: null + volumes: + - type: volume + source: vlagent + target: /vlagent + volume: {} + demo-app: + build: + context: . + dockerfile: demo-app/Dockerfile + environment: + STARRYSDK_TELEMETRY_ENABLED: "true" + STARRYSDK_SERVICE_NAME: starry-python-sdk-demo + STARRYSDK_METRIC_EXPORT_INTERVAL_MS: "5000" + #OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318 + DEMO_LOOP: "true" + DEMO_INTERVAL_SECONDS: "5" + DEMO_PATHS: ",__sdk_demo_not_found__" + DEFAULT_OTLP_ENDPOINT: "http://fluentbit:4318" + extra_hosts: + - "host.docker.internal:host-gateway" +networks: + default: + name: fluentbit-oltp_default +volumes: + victorialogs-1: + name: fluentbit-oltp_victorialogs-1 + external: true + victoriametrics: + name: fluentbit-oltp_victoriametrics + external: true + vlagent: + name: fluentbit-oltp_vlagent + external: true diff --git a/fluent-bit.conf b/fluent-bit.conf new file mode 100644 index 0000000..c937b55 --- /dev/null +++ b/fluent-bit.conf @@ -0,0 +1,69 @@ +[SERVICE] + Flush 1 + Log_Level info + Parsers_File parsers.conf + + # 方便调试 Fluent Bit 自身状态;不要对公网暴露 + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port 2020 + + # 可选:给 Fluent Bit 文件缓冲目录,配合 compose 里的 volume + storage.path /buffers + storage.sync normal + storage.checksum off + storage.backlog.mem_limit 64M + +# Python SDK / OpenTelemetry SDK 发到这里: +# http://vmauth:8427/v1/logs +# http://vmauth:8427/v1/metrics +# vmauth 会转发到 fluentbit:4318 +[INPUT] + Name opentelemetry + Listen 0.0.0.0 + Port 4318 + + # OTLP/HTTP 通常期望 200;Fluent Bit 默认是 201 + Successful_Response_Code 200 + + # 保持默认行为:/v1/logs -> v1_logs, /v1/metrics -> v1_metrics + Tag_From_Uri true + + Buffer_Chunk_Size 1M + Buffer_Max_Size 10M + Threaded On + +# Python SDK 发来的 metrics -> VictoriaMetrics remote_write +[OUTPUT] + Name prometheus_remote_write + Match * + Host victoriametrics + Port 8428 + Uri /api/v1/write + + # 可选公共 label,便于区分来源 + Add_Label otel_pipeline fluent-bit + Workers 2 + +# Python SDK 发来的 logs -> vlagent -> VictoriaLogs +[OUTPUT] + Name opentelemetry + Match * + Host vlagent + Port 9429 + + Logs_Uri /insert/opentelemetry/v1/logs + + # VictoriaLogs 摄取日志时的字段映射。 + # Python OTel log body 通常能被自动处理;这里保留多个候选字段更稳。 + Header VL-Msg-Field body,Body,message,msg,log,_msg + + # 只把低基数字段作为 stream fields,避免 path、status_code 这类字段爆炸。 + Header VL-Stream-Fields service.name,service.namespace,service.version,deployment.environment,host.name + + Compress gzip + Workers 2 + +[OUTPUT] + Name stdout + Match * diff --git a/starry-sdk/README.md b/starry-sdk/README.md new file mode 100644 index 0000000..e0dadf4 --- /dev/null +++ b/starry-sdk/README.md @@ -0,0 +1,12 @@ +# starry-client-sdk demo + +Demo SDK: request `https://blog.starryskymeow.top/{path}`, return response text, and raise `StarryNotFoundError` on HTTP 404. Telemetry is enabled by default and exported with OTLP/HTTP. + +Environment variables: + +- `STARRYSDK_TELEMETRY_ENABLED`: default `true`; set `false` to disable SDK telemetry. +- `STARRYSDK_SERVICE_NAME`: default `starry-python-sdk-consumer`. +- `STARRYSDK_METRIC_EXPORT_INTERVAL_MS`: default `5000`. +- `OTEL_EXPORTER_OTLP_ENDPOINT`: default `http://localhost:4318`; SDK appends `/v1/metrics` and `/v1/logs` for OTLP/HTTP. +- `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`: optional explicit metrics endpoint. +- `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT`: optional explicit logs endpoint. diff --git a/starry-sdk/pyproject.toml b/starry-sdk/pyproject.toml new file mode 100644 index 0000000..c37295f --- /dev/null +++ b/starry-sdk/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "starry-client-sdk" +version = "0.1.0" +description = "Demo Python SDK with default-on OpenTelemetry metrics and exception logs." +readme = "README.md" +requires-python = ">=3.9" +dependencies = [ + "requests>=2.32.0,<3.0", + "opentelemetry-api==1.41.1", + "opentelemetry-sdk==1.41.1", + "opentelemetry-exporter-otlp-proto-http==1.41.1", + "opentelemetry-instrumentation-logging==0.62b1" +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["starry_client_sdk*"] diff --git a/starry-sdk/starry_client_sdk/__init__.py b/starry-sdk/starry_client_sdk/__init__.py new file mode 100644 index 0000000..3de61a0 --- /dev/null +++ b/starry-sdk/starry_client_sdk/__init__.py @@ -0,0 +1,11 @@ +from .client import StarryClient, StarryNotFoundError, StarrySdkError +from .telemetry import force_flush +from .version import __version__ + +__all__ = [ + "StarryClient", + "StarrySdkError", + "StarryNotFoundError", + "force_flush", + "__version__", +] diff --git a/starry-sdk/starry_client_sdk/client.py b/starry-sdk/starry_client_sdk/client.py new file mode 100644 index 0000000..8e1ec93 --- /dev/null +++ b/starry-sdk/starry_client_sdk/client.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import time +from dataclasses import dataclass +from typing import Any, Optional + +import requests + +from .telemetry import SDK_NAME, configure_telemetry +from .version import __version__ + +DEFAULT_BASE_URL = "https://blog.starryskymeow.top" + + +class StarrySdkError(Exception): + """Base exception for this demo SDK.""" + + +class StarryNotFoundError(StarrySdkError): + """Raised when the upstream service returns HTTP 404.""" + + def __init__(self, *, path: str, url: str) -> None: + self.path = path + self.url = url + self.status_code = 404 + super().__init__(f"Resource not found: path={path!r}, url={url!r}") + + +@dataclass(frozen=True) +class _CallContext: + path: str + url: str + sdk_interface: str = "get" + http_method: str = "GET" + + +def _normalize_path(path: str) -> str: + if path is None: + raise ValueError("path must be a string, got None") + return str(path).lstrip("/") + + +def _metric_path(path: str) -> str: + # Keep metrics low-cardinality: drop query strings and collapse an empty path to "/". + normalized = _normalize_path(path).split("?", 1)[0] + return f"/{normalized}" if normalized else "/" + + +def _build_url(base_url: str, path: str) -> str: + normalized = _normalize_path(path) + if normalized: + return f"{base_url.rstrip('/')}/{normalized}" + return f"{base_url.rstrip('/')}/" + + +class StarryClient: + """Demo client SDK. + + `get(path)` requests `https://blog.starryskymeow.top/{path}` and returns `str`. + HTTP 404 is converted to `StarryNotFoundError` and logged through OpenTelemetry. + """ + + def __init__( + self, + *, + base_url: str = DEFAULT_BASE_URL, + timeout_seconds: float = 10.0, + sdk_version: str = __version__, + service_name: Optional[str] = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.timeout_seconds = timeout_seconds + self.sdk_version = sdk_version + self._telemetry = configure_telemetry(service_name=service_name, sdk_version=sdk_version) + + def get(self, path: str = "") -> str: + normalized_path = _normalize_path(path) + url = _build_url(self.base_url, normalized_path) + context = _CallContext(path=normalized_path, url=url) + + attrs: dict[str, Any] = { + "sdk_name": SDK_NAME, + "sdk_version": self.sdk_version, + "sdk_interface": context.sdk_interface, + "http_method": context.http_method, + "url_path": _metric_path(normalized_path), + "outcome": "unknown", + "http_status_code": 0, + } + + start = time.perf_counter() + try: + response = requests.get(url, timeout=self.timeout_seconds) + attrs["http_status_code"] = response.status_code + + if response.status_code == 404: + attrs["outcome"] = "error" + attrs["error_type"] = "StarryNotFoundError" + raise StarryNotFoundError(path=normalized_path, url=url) + + response.raise_for_status() + attrs["outcome"] = "success" + return response.text + except Exception as exc: + attrs["outcome"] = "error" + attrs.setdefault("error_type", exc.__class__.__name__) + self._telemetry.logger.exception( + "Starry SDK request error", + extra={ + "sdk_name": SDK_NAME, + "sdk_version": self.sdk_version, + "sdk_interface": context.sdk_interface, + "http_method": context.http_method, + "http_status_code": attrs.get("http_status_code", 0), + "url_path": attrs["url_path"], + "error_type": attrs["error_type"], + }, + ) + raise + finally: + duration_ms = (time.perf_counter() - start) * 1000.0 + self._telemetry.request_counter.add(1, attributes=attrs) + self._telemetry.duration_histogram.record(duration_ms, attributes=attrs) + if attrs.get("outcome") == "error": + self._telemetry.error_counter.add(1, attributes=attrs) diff --git a/starry-sdk/starry_client_sdk/telemetry.py b/starry-sdk/starry_client_sdk/telemetry.py new file mode 100644 index 0000000..5375bfa --- /dev/null +++ b/starry-sdk/starry_client_sdk/telemetry.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +import atexit +import logging +import os +import threading +from dataclasses import dataclass +from typing import Any, Optional + +from .version import __version__ + +SDK_NAME = "starry-client-sdk" +DEFAULT_SERVICE_NAME = "starry-python-sdk-consumer" +DEFAULT_OTLP_ENDPOINT = "http://host.docker.internal:4318" + +_LOCK = threading.Lock() +_HANDLES: Optional["TelemetryHandles"] = None +_METER_PROVIDER: Any = None +_LOGGER_PROVIDER: Any = None + + +class _NoopCounter: + def add(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None: + return None + + +class _NoopHistogram: + def record(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None: + return None + + +@dataclass(frozen=True) +class TelemetryHandles: + request_counter: Any + error_counter: Any + duration_histogram: Any + logger: logging.Logger + enabled: bool + + +def _truthy(value: str) -> bool: + return value.strip().lower() not in {"0", "false", "f", "no", "n", "off", "disabled"} + + +def telemetry_enabled() -> bool: + return _truthy(os.getenv("STARRYSDK_TELEMETRY_ENABLED", "true")) + + +def _base_endpoint() -> str: + return os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", DEFAULT_OTLP_ENDPOINT).rstrip("/") + + +def _metrics_endpoint() -> str: + return os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", f"{_base_endpoint()}/v1/metrics") + + +def _logs_endpoint() -> str: + return os.getenv("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT", f"{_base_endpoint()}/v1/logs") + + +def _noop_handles() -> TelemetryHandles: + return TelemetryHandles( + request_counter=_NoopCounter(), + error_counter=_NoopCounter(), + duration_histogram=_NoopHistogram(), + logger=logging.getLogger("starry_client_sdk"), + enabled=False, + ) + + +def configure_telemetry(*, service_name: Optional[str] = None, sdk_version: str = __version__) -> TelemetryHandles: + """Configure default-on, non-blocking telemetry for this SDK. + + This demo intentionally configures local OpenTelemetry providers owned by the SDK, so the SDK can + be observable by default without overwriting an application's global OpenTelemetry configuration. + Export failures must never break business calls; if setup fails, the SDK falls back to no-op meters. + """ + + global _HANDLES, _METER_PROVIDER, _LOGGER_PROVIDER + + with _LOCK: + if _HANDLES is not None: + return _HANDLES + + logger = logging.getLogger("starry_client_sdk") + logger.setLevel(logging.INFO) + + if not telemetry_enabled(): + _HANDLES = _noop_handles() + return _HANDLES + + try: + from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter + from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter + from opentelemetry.instrumentation.logging.handler import LoggingHandler + from opentelemetry.sdk._logs import LoggerProvider + from opentelemetry.sdk._logs.export import BatchLogRecordProcessor + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + from opentelemetry.sdk.resources import Resource + + service_name = service_name or os.getenv("STARRYSDK_SERVICE_NAME", DEFAULT_SERVICE_NAME) + export_interval_ms = int(os.getenv("STARRYSDK_METRIC_EXPORT_INTERVAL_MS", "5000")) + + resource = Resource.create( + { + "service.name": service_name, + "sdk.name": SDK_NAME, + "sdk.version": sdk_version, + "telemetry.source": "client-sdk", + } + ) + + metric_exporter = OTLPMetricExporter(endpoint=_metrics_endpoint()) + metric_reader = PeriodicExportingMetricReader( + metric_exporter, + export_interval_millis=export_interval_ms, + ) + _METER_PROVIDER = MeterProvider(resource=resource, metric_readers=[metric_reader]) + meter = _METER_PROVIDER.get_meter(SDK_NAME, sdk_version) + + _LOGGER_PROVIDER = LoggerProvider(resource=resource) + log_exporter = OTLPLogExporter(endpoint=_logs_endpoint()) + _LOGGER_PROVIDER.add_log_record_processor(BatchLogRecordProcessor(log_exporter)) + + # Attach only one OTLP handler to the SDK logger. Do not attach to root logger. + if not any(getattr(handler, "_starry_sdk_otel_handler", False) for handler in logger.handlers): + otel_handler = LoggingHandler(level=logging.INFO, logger_provider=_LOGGER_PROVIDER) + setattr(otel_handler, "_starry_sdk_otel_handler", True) + logger.addHandler(otel_handler) + + _HANDLES = TelemetryHandles( + request_counter=meter.create_counter( + "starry.sdk.client.requests", + unit="1", + description="Total SDK client calls.", + ), + error_counter=meter.create_counter( + "starry.sdk.client.errors", + unit="1", + description="Total SDK client calls ending in an exception.", + ), + duration_histogram=meter.create_histogram( + "starry.sdk.client.request.duration.ms", + unit="ms", + description="SDK client call latency in milliseconds.", + ), + logger=logger, + enabled=True, + ) + atexit.register(shutdown_telemetry) + return _HANDLES + except Exception: # Telemetry must not break SDK business behavior. + logging.getLogger("starry_client_sdk.telemetry").debug("SDK telemetry setup failed", exc_info=True) + _HANDLES = _noop_handles() + return _HANDLES + + +def force_flush(timeout_millis: int = 5000) -> None: + """Flush telemetry buffers. Useful in short-lived CLI/demo processes.""" + + for provider in (_METER_PROVIDER, _LOGGER_PROVIDER): + if provider is None: + continue + try: + provider.force_flush(timeout_millis=timeout_millis) + except Exception: + logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry force_flush failed", exc_info=True) + + +def shutdown_telemetry() -> None: + for provider in (_METER_PROVIDER, _LOGGER_PROVIDER): + if provider is None: + continue + try: + provider.shutdown() + except Exception: + logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry shutdown failed", exc_info=True) diff --git a/starry-sdk/starry_client_sdk/version.py b/starry-sdk/starry_client_sdk/version.py new file mode 100644 index 0000000..3dc1f76 --- /dev/null +++ b/starry-sdk/starry_client_sdk/version.py @@ -0,0 +1 @@ +__version__ = "0.1.0"