Skip to main content

Support

Common Issues and Solutions

1.Issue: MLflow Server Won't Start


bash

# Check logs
journalctl -u mlflow -n 50
# Check database connection
psql -U mlflow_user -d mlflow_db -c "SELECT 1;"
# Check port availability
netstat -tlnp | grep :5000
# Restart service
systemctl restart mlflow

2.Issue: High Memory Usage

bash
# Check memory usage
ps aux | grep mlflow | sort -k4 -nr
# Monitor memory over time
watch -n 1 'free -h && ps aux | grep mlflow | head -5'
# Restart with memory limits
systemctl edit mlflow
# Add:
# [Service]
# MemoryLimit=2G

3.Issue: Database Connection Errors

bash

# Check database status
systemctl status postgresql
# Check connections
psql -U mlflow_user -d mlflow_db -c "SELECT count(*) FROM pg_stat_activity;"
# Reset connections
psql -U postgres -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='mlflow_db';"

4.Issue: Slow API Response

bash

# Check database performance
psql -U mlflow_user -d mlflow_db -c "SELECT query, calls, mean_time FROM pg_stat_statements ORDER BY mean_time DESC LIMIT 10;"
# Optimize database
psql -U mlflow_user -d mlflow_db -c "VACUUM ANALYZE;"
# Check indexes
psql -U mlflow_user -d mlflow_db -c "SELECT schemaname, tablename, indexname FROM pg_indexes WHERE schemaname='public';"

5.Troubleshooting Scripts

python

# mlflow_diagnostics.py
import mlflow
import requests
import psycopg2
from mlflow.tracking import MlflowClient
def check_mlflow_health():
"""Comprehensive health check"""
results = {}
# Check server health
try:
response = requests.get("http://localhost:5000/health", timeout=10)
results['server'] = response.status_code == 200
except:
results['server'] = False
# Check database connectivity
try:
conn = psycopg2.connect(
host="localhost",
database="mlflow_db",
user="mlflow_user",
password="password"
)
conn.close()
results['database'] = True
except:
results['database'] = False
# Check API functionality
try:
client = MlflowClient()
experiments = client.list_experiments()
results['api'] = True
except:
results['api'] = False
return results
if __name__ == "__main__":
health = check_mlflow_health()
print(f"Health Check Results: {health}")

6.Support Contacts