demo with sdk and backend

This commit is contained in:
xkm
2026-04-30 00:48:34 +08:00
commit 024bbfc79f
12 changed files with 603 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.DS_Store

15
demo-app/Dockerfile Normal file
View File

@@ -0,0 +1,15 @@
FROM docker.1ms.run/python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
COPY starry-sdk /app/starry-sdk
COPY demo-app/requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt && \
pip install --no-cache-dir -e /app/starry-sdk
COPY demo-app/main.py /app/main.py
CMD ["python", "/app/main.py"]

46
demo-app/main.py Normal file
View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import os
import time
from starry_client_sdk import StarryClient, StarryNotFoundError, force_flush
def _bool_env(name: str, default: bool) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() not in {"0", "false", "f", "no", "n", "off"}
def _paths() -> list[str]:
raw = os.getenv("DEMO_PATHS", ",__sdk_demo_not_found__")
return [item.strip() for item in raw.split(",")]
def main() -> None:
client = StarryClient()
loop = _bool_env("DEMO_LOOP", True)
interval_seconds = float(os.getenv("DEMO_INTERVAL_SECONDS", "5"))
paths = _paths()
while True:
for path in paths:
try:
text = client.get(path)
print(f"OK path={path!r} bytes={len(text)}")
except StarryNotFoundError as exc:
print(f"NOT_FOUND path={path!r} error={exc}")
except Exception as exc:
print(f"ERROR path={path!r} type={exc.__class__.__name__} error={exc}")
# Demo app is short-looping, so flush to make logs/metrics visible quickly.
force_flush()
if not loop:
break
time.sleep(interval_seconds)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1 @@
# SDK dependencies are declared in starry-sdk/pyproject.toml.

123
docker-compose.yml Normal file
View File

@@ -0,0 +1,123 @@
services:
fluentbit:
depends_on:
vlagent:
condition: service_started
required: true
image: cr.fluentbit.io/fluent/fluent-bit:3.1.7
restart: always
networks:
default: null
ports:
- 4318:4318
volumes:
- ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf
victorialogs-1:
command:
- -storageDataPath=/vlogs
- -loggerFormat=json
- -datadog.streamFields=service,hostname,ddsource
- -journald.streamFields=_HOSTNAME,_SYSTEMD_UNIT,_PID
- -journald.ignoreFields=MESSAGE_ID,INVOCATION_ID,USER_INVOCATION_ID
- -journald.ignoreFields=_BOOT_ID,_MACHINE_ID,_SYSTEMD_INVOCATION_ID,_STREAM_ID,_UID
deploy:
replicas: 1
healthcheck:
test:
- CMD
- wget
- -qO-
- http://127.0.0.1:9428/health
timeout: 1s
interval: 1s
retries: 10
image: docker.io/victoriametrics/victoria-logs:v1.50.0
networks:
default: null
ports:
- mode: ingress
target: 9428
published: "9428"
protocol: tcp
volumes:
- type: volume
source: victorialogs-1
target: /vlogs
volume: {}
victoriametrics:
command:
- -storageDataPath=/vmsingle
- -loggerFormat=json
healthcheck:
test:
- CMD
- wget
- -qO-
- http://127.0.0.1:8428/health
timeout: 1s
interval: 1s
retries: 10
image: victoriametrics/victoria-metrics:v1.132.0
networks:
default: null
ports:
- 8428:8428
volumes:
- type: volume
source: victoriametrics
target: /vmsingle
volume: {}
vlagent:
command:
- --remoteWrite.tmpDataPath=/vlagent
- --remoteWrite.url=http://victorialogs-1:9428/insert/native
- --syslog.listenAddr.tcp=0.0.0.0:8094
depends_on:
victorialogs-1:
condition: service_healthy
required: true
healthcheck:
test:
- CMD
- wget
- -qO-
- http://127.0.0.1:9429/health
timeout: 1s
interval: 1s
retries: 10
image: victoriametrics/vlagent:v1.50.0
networks:
default: null
volumes:
- type: volume
source: vlagent
target: /vlagent
volume: {}
demo-app:
build:
context: .
dockerfile: demo-app/Dockerfile
environment:
STARRYSDK_TELEMETRY_ENABLED: "true"
STARRYSDK_SERVICE_NAME: starry-python-sdk-demo
STARRYSDK_METRIC_EXPORT_INTERVAL_MS: "5000"
#OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318
DEMO_LOOP: "true"
DEMO_INTERVAL_SECONDS: "5"
DEMO_PATHS: ",__sdk_demo_not_found__"
DEFAULT_OTLP_ENDPOINT: "http://fluentbit:4318"
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
default:
name: fluentbit-oltp_default
volumes:
victorialogs-1:
name: fluentbit-oltp_victorialogs-1
external: true
victoriametrics:
name: fluentbit-oltp_victoriametrics
external: true
vlagent:
name: fluentbit-oltp_vlagent
external: true

69
fluent-bit.conf Normal file
View File

@@ -0,0 +1,69 @@
[SERVICE]
Flush 1
Log_Level info
Parsers_File parsers.conf
# 方便调试 Fluent Bit 自身状态;不要对公网暴露
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port 2020
# 可选:给 Fluent Bit 文件缓冲目录,配合 compose 里的 volume
storage.path /buffers
storage.sync normal
storage.checksum off
storage.backlog.mem_limit 64M
# Python SDK / OpenTelemetry SDK 发到这里:
# http://vmauth:8427/v1/logs
# http://vmauth:8427/v1/metrics
# vmauth 会转发到 fluentbit:4318
[INPUT]
Name opentelemetry
Listen 0.0.0.0
Port 4318
# OTLP/HTTP 通常期望 200Fluent Bit 默认是 201
Successful_Response_Code 200
# 保持默认行为:/v1/logs -> v1_logs, /v1/metrics -> v1_metrics
Tag_From_Uri true
Buffer_Chunk_Size 1M
Buffer_Max_Size 10M
Threaded On
# Python SDK 发来的 metrics -> VictoriaMetrics remote_write
[OUTPUT]
Name prometheus_remote_write
Match *
Host victoriametrics
Port 8428
Uri /api/v1/write
# 可选公共 label便于区分来源
Add_Label otel_pipeline fluent-bit
Workers 2
# Python SDK 发来的 logs -> vlagent -> VictoriaLogs
[OUTPUT]
Name opentelemetry
Match *
Host vlagent
Port 9429
Logs_Uri /insert/opentelemetry/v1/logs
# VictoriaLogs 摄取日志时的字段映射。
# Python OTel log body 通常能被自动处理;这里保留多个候选字段更稳。
Header VL-Msg-Field body,Body,message,msg,log,_msg
# 只把低基数字段作为 stream fields避免 path、status_code 这类字段爆炸。
Header VL-Stream-Fields service.name,service.namespace,service.version,deployment.environment,host.name
Compress gzip
Workers 2
[OUTPUT]
Name stdout
Match *

12
starry-sdk/README.md Normal file
View File

@@ -0,0 +1,12 @@
# starry-client-sdk demo
Demo SDK: request `https://blog.starryskymeow.top/{path}`, return response text, and raise `StarryNotFoundError` on HTTP 404. Telemetry is enabled by default and exported with OTLP/HTTP.
Environment variables:
- `STARRYSDK_TELEMETRY_ENABLED`: default `true`; set `false` to disable SDK telemetry.
- `STARRYSDK_SERVICE_NAME`: default `starry-python-sdk-consumer`.
- `STARRYSDK_METRIC_EXPORT_INTERVAL_MS`: default `5000`.
- `OTEL_EXPORTER_OTLP_ENDPOINT`: default `http://localhost:4318`; SDK appends `/v1/metrics` and `/v1/logs` for OTLP/HTTP.
- `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`: optional explicit metrics endpoint.
- `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT`: optional explicit logs endpoint.

21
starry-sdk/pyproject.toml Normal file
View File

@@ -0,0 +1,21 @@
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "starry-client-sdk"
version = "0.1.0"
description = "Demo Python SDK with default-on OpenTelemetry metrics and exception logs."
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"requests>=2.32.0,<3.0",
"opentelemetry-api==1.41.1",
"opentelemetry-sdk==1.41.1",
"opentelemetry-exporter-otlp-proto-http==1.41.1",
"opentelemetry-instrumentation-logging==0.62b1"
]
[tool.setuptools.packages.find]
where = ["."]
include = ["starry_client_sdk*"]

View File

@@ -0,0 +1,11 @@
from .client import StarryClient, StarryNotFoundError, StarrySdkError
from .telemetry import force_flush
from .version import __version__
__all__ = [
"StarryClient",
"StarrySdkError",
"StarryNotFoundError",
"force_flush",
"__version__",
]

View File

@@ -0,0 +1,125 @@
from __future__ import annotations
import time
from dataclasses import dataclass
from typing import Any, Optional
import requests
from .telemetry import SDK_NAME, configure_telemetry
from .version import __version__
DEFAULT_BASE_URL = "https://blog.starryskymeow.top"
class StarrySdkError(Exception):
"""Base exception for this demo SDK."""
class StarryNotFoundError(StarrySdkError):
"""Raised when the upstream service returns HTTP 404."""
def __init__(self, *, path: str, url: str) -> None:
self.path = path
self.url = url
self.status_code = 404
super().__init__(f"Resource not found: path={path!r}, url={url!r}")
@dataclass(frozen=True)
class _CallContext:
path: str
url: str
sdk_interface: str = "get"
http_method: str = "GET"
def _normalize_path(path: str) -> str:
if path is None:
raise ValueError("path must be a string, got None")
return str(path).lstrip("/")
def _metric_path(path: str) -> str:
# Keep metrics low-cardinality: drop query strings and collapse an empty path to "/".
normalized = _normalize_path(path).split("?", 1)[0]
return f"/{normalized}" if normalized else "/"
def _build_url(base_url: str, path: str) -> str:
normalized = _normalize_path(path)
if normalized:
return f"{base_url.rstrip('/')}/{normalized}"
return f"{base_url.rstrip('/')}/"
class StarryClient:
"""Demo client SDK.
`get(path)` requests `https://blog.starryskymeow.top/{path}` and returns `str`.
HTTP 404 is converted to `StarryNotFoundError` and logged through OpenTelemetry.
"""
def __init__(
self,
*,
base_url: str = DEFAULT_BASE_URL,
timeout_seconds: float = 10.0,
sdk_version: str = __version__,
service_name: Optional[str] = None,
) -> None:
self.base_url = base_url.rstrip("/")
self.timeout_seconds = timeout_seconds
self.sdk_version = sdk_version
self._telemetry = configure_telemetry(service_name=service_name, sdk_version=sdk_version)
def get(self, path: str = "") -> str:
normalized_path = _normalize_path(path)
url = _build_url(self.base_url, normalized_path)
context = _CallContext(path=normalized_path, url=url)
attrs: dict[str, Any] = {
"sdk_name": SDK_NAME,
"sdk_version": self.sdk_version,
"sdk_interface": context.sdk_interface,
"http_method": context.http_method,
"url_path": _metric_path(normalized_path),
"outcome": "unknown",
"http_status_code": 0,
}
start = time.perf_counter()
try:
response = requests.get(url, timeout=self.timeout_seconds)
attrs["http_status_code"] = response.status_code
if response.status_code == 404:
attrs["outcome"] = "error"
attrs["error_type"] = "StarryNotFoundError"
raise StarryNotFoundError(path=normalized_path, url=url)
response.raise_for_status()
attrs["outcome"] = "success"
return response.text
except Exception as exc:
attrs["outcome"] = "error"
attrs.setdefault("error_type", exc.__class__.__name__)
self._telemetry.logger.exception(
"Starry SDK request error",
extra={
"sdk_name": SDK_NAME,
"sdk_version": self.sdk_version,
"sdk_interface": context.sdk_interface,
"http_method": context.http_method,
"http_status_code": attrs.get("http_status_code", 0),
"url_path": attrs["url_path"],
"error_type": attrs["error_type"],
},
)
raise
finally:
duration_ms = (time.perf_counter() - start) * 1000.0
self._telemetry.request_counter.add(1, attributes=attrs)
self._telemetry.duration_histogram.record(duration_ms, attributes=attrs)
if attrs.get("outcome") == "error":
self._telemetry.error_counter.add(1, attributes=attrs)

View File

@@ -0,0 +1,178 @@
from __future__ import annotations
import atexit
import logging
import os
import threading
from dataclasses import dataclass
from typing import Any, Optional
from .version import __version__
SDK_NAME = "starry-client-sdk"
DEFAULT_SERVICE_NAME = "starry-python-sdk-consumer"
DEFAULT_OTLP_ENDPOINT = "http://host.docker.internal:4318"
_LOCK = threading.Lock()
_HANDLES: Optional["TelemetryHandles"] = None
_METER_PROVIDER: Any = None
_LOGGER_PROVIDER: Any = None
class _NoopCounter:
def add(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None:
return None
class _NoopHistogram:
def record(self, amount: int | float, attributes: Optional[dict[str, Any]] = None) -> None:
return None
@dataclass(frozen=True)
class TelemetryHandles:
request_counter: Any
error_counter: Any
duration_histogram: Any
logger: logging.Logger
enabled: bool
def _truthy(value: str) -> bool:
return value.strip().lower() not in {"0", "false", "f", "no", "n", "off", "disabled"}
def telemetry_enabled() -> bool:
return _truthy(os.getenv("STARRYSDK_TELEMETRY_ENABLED", "true"))
def _base_endpoint() -> str:
return os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", DEFAULT_OTLP_ENDPOINT).rstrip("/")
def _metrics_endpoint() -> str:
return os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", f"{_base_endpoint()}/v1/metrics")
def _logs_endpoint() -> str:
return os.getenv("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT", f"{_base_endpoint()}/v1/logs")
def _noop_handles() -> TelemetryHandles:
return TelemetryHandles(
request_counter=_NoopCounter(),
error_counter=_NoopCounter(),
duration_histogram=_NoopHistogram(),
logger=logging.getLogger("starry_client_sdk"),
enabled=False,
)
def configure_telemetry(*, service_name: Optional[str] = None, sdk_version: str = __version__) -> TelemetryHandles:
"""Configure default-on, non-blocking telemetry for this SDK.
This demo intentionally configures local OpenTelemetry providers owned by the SDK, so the SDK can
be observable by default without overwriting an application's global OpenTelemetry configuration.
Export failures must never break business calls; if setup fails, the SDK falls back to no-op meters.
"""
global _HANDLES, _METER_PROVIDER, _LOGGER_PROVIDER
with _LOCK:
if _HANDLES is not None:
return _HANDLES
logger = logging.getLogger("starry_client_sdk")
logger.setLevel(logging.INFO)
if not telemetry_enabled():
_HANDLES = _noop_handles()
return _HANDLES
try:
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.instrumentation.logging.handler import LoggingHandler
from opentelemetry.sdk._logs import LoggerProvider
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
service_name = service_name or os.getenv("STARRYSDK_SERVICE_NAME", DEFAULT_SERVICE_NAME)
export_interval_ms = int(os.getenv("STARRYSDK_METRIC_EXPORT_INTERVAL_MS", "5000"))
resource = Resource.create(
{
"service.name": service_name,
"sdk.name": SDK_NAME,
"sdk.version": sdk_version,
"telemetry.source": "client-sdk",
}
)
metric_exporter = OTLPMetricExporter(endpoint=_metrics_endpoint())
metric_reader = PeriodicExportingMetricReader(
metric_exporter,
export_interval_millis=export_interval_ms,
)
_METER_PROVIDER = MeterProvider(resource=resource, metric_readers=[metric_reader])
meter = _METER_PROVIDER.get_meter(SDK_NAME, sdk_version)
_LOGGER_PROVIDER = LoggerProvider(resource=resource)
log_exporter = OTLPLogExporter(endpoint=_logs_endpoint())
_LOGGER_PROVIDER.add_log_record_processor(BatchLogRecordProcessor(log_exporter))
# Attach only one OTLP handler to the SDK logger. Do not attach to root logger.
if not any(getattr(handler, "_starry_sdk_otel_handler", False) for handler in logger.handlers):
otel_handler = LoggingHandler(level=logging.INFO, logger_provider=_LOGGER_PROVIDER)
setattr(otel_handler, "_starry_sdk_otel_handler", True)
logger.addHandler(otel_handler)
_HANDLES = TelemetryHandles(
request_counter=meter.create_counter(
"starry.sdk.client.requests",
unit="1",
description="Total SDK client calls.",
),
error_counter=meter.create_counter(
"starry.sdk.client.errors",
unit="1",
description="Total SDK client calls ending in an exception.",
),
duration_histogram=meter.create_histogram(
"starry.sdk.client.request.duration.ms",
unit="ms",
description="SDK client call latency in milliseconds.",
),
logger=logger,
enabled=True,
)
atexit.register(shutdown_telemetry)
return _HANDLES
except Exception: # Telemetry must not break SDK business behavior.
logging.getLogger("starry_client_sdk.telemetry").debug("SDK telemetry setup failed", exc_info=True)
_HANDLES = _noop_handles()
return _HANDLES
def force_flush(timeout_millis: int = 5000) -> None:
"""Flush telemetry buffers. Useful in short-lived CLI/demo processes."""
for provider in (_METER_PROVIDER, _LOGGER_PROVIDER):
if provider is None:
continue
try:
provider.force_flush(timeout_millis=timeout_millis)
except Exception:
logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry force_flush failed", exc_info=True)
def shutdown_telemetry() -> None:
for provider in (_METER_PROVIDER, _LOGGER_PROVIDER):
if provider is None:
continue
try:
provider.shutdown()
except Exception:
logging.getLogger("starry_client_sdk.telemetry").debug("Telemetry shutdown failed", exc_info=True)

View File

@@ -0,0 +1 @@
__version__ = "0.1.0"