Monitoring
Application Performance Monitoring
- Logging Configuration
python
# logging_config.py
import logging
import logging.config
import json
from datetime import datetime
LOGGING_CONFIG = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'json': {
'class': 'pythonjsonlogger.jsonlogger.JsonFormatter',
'format': '%(asctime)s %(name)s %(levelname)s %(message)s'
},
'standard': {
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'json',
'level': 'INFO',
},
'file': {
'class': 'logging.handlers.RotatingFileHandler',
'filename': '/var/log/myapp/app.log',
'maxBytes': 10485760, # 10MB
'backupCount': 5,
'formatter': 'json',
'level': 'DEBUG',
},
},
'loggers': {
'': {
'handlers': ['console', 'file'],
'level': 'INFO',
'propagate': False,
},
'myapp': {
'handlers': ['console', 'file'],
'level': 'DEBUG',
'propagate': False,
},
},
}
logging.config.dictConfig(LOGGING_CONFIG)
- Metrics Collection
python
# metrics.py
import time
import psutil
import threading
from prometheus_client import Counter, Histogram, Gauge, start_http_server
# Prometheus metrics
REQUEST_COUNT = Counter('app_requests_total', 'Total app requests', ['method', 'endpoint'])
REQUEST_LATENCY = Histogram('app_request_duration_seconds', 'Request latency')
ACTIVE_CONNECTIONS = Gauge('app_active_connections', 'Active connections')
MEMORY_USAGE = Gauge('app_memory_usage_bytes', 'Memory usage in bytes')
CPU_USAGE = Gauge('app_cpu_usage_percent', 'CPU usage percentage')
def collect_system_metrics():
"""Collect system metrics periodically"""
while True:
MEMORY_USAGE.set(psutil.Process().memory_info().rss)
CPU_USAGE.set(psutil.cpu_percent())
time.sleep(10)
# Start metrics collection
metrics_thread = threading.Thread(target=collect_system_metrics, daemon=True)
metrics_thread.start()
# Start metrics server
start_http_server(8000)
- Health Check Endpoints
python
# health.py
from flask import Flask, jsonify
import psutil
import redis
import psycopg2
app = Flask(__name__)
@app.route('/health')
def health_check():
"""Basic health check"""
return jsonify({'status': 'healthy', 'timestamp': time.time()})
@app.route('/health/detailed')
def detailed_health_check():
"""Detailed health check with dependencies"""
health_status = {
'status': 'healthy',
'timestamp': time.time(),
'checks': {}
}
# Check database connectivity
try:
conn = psycopg2.connect(DATABASE_URL)
conn.close()
health_status['checks']['database'] = 'healthy'
except Exception as e:
health_status['checks']['database'] = f'unhealthy: {str(e)}'
health_status['status'] = 'unhealthy'
# Check Redis connectivity
try:
redis_client.ping()
health_status['checks']['redis'] = 'healthy'
except Exception as e:
health_status['checks']['redis'] = f'unhealthy: {str(e)}'
health_status['status'] = 'unhealthy'
# Check system resources
memory_percent = psutil.virtual_memory().percent
cpu_percent = psutil.cpu_percent()
health_status['checks']['memory'] = f'{memory_percent}%'
health_status['checks']['cpu'] = f'{cpu_percent}%'
if memory_percent > 90 or cpu_percent > 90:
health_status['status'] = 'degraded'
return jsonify(health_status)
@app.route('/ready')
def readiness_check():
"""Kubernetes readiness probe"""
return jsonify({'ready': True})
@app.route('/live')
def liveness_check():
"""Kubernetes liveness probe"""
return jsonify({'alive': True})
Observability Integration
- OpenTelemetry Configuration
python
# tracing.py
from opentelemetry import trace
from opentelemetry.exporter.jaeger.thrift import JaegerExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
# Set up tracing
trace.set_tracer_provider(TracerProvider())
tracer = trace.get_tracer(__name__)
# Configure Jaeger exporter
jaeger_exporter = JaegerExporter(
agent_host_name="localhost",
agent_port=6831,
)
span_processor = BatchSpanProcessor(jaeger_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
# Auto-instrument Flask, requests, and psycopg2
FlaskInstrumentor().instrument()
RequestsInstrumentor().instrument()
Psycopg2Instrumentor().instrument()