Skip to main content

Maintenance

Daily Maintenance Tasks

bash
#!/bin/bash
# daily_maintenance.sh
# Check service status
systemctl status mlflow | grep -q "active (running)"
if [ $? -ne 0 ]; then
echo "MLflow service is not running"
systemctl restart mlflow
fi
# Check disk space
DISK_USAGE=$(df /opt/mlflow | tail -1 | awk '{print $5}' | sed 's/%//')
if [ $DISK_USAGE -gt 80 ]; then
echo "Disk usage is above 80%: ${DISK_USAGE}%"
# Clean old artifacts
find /opt/mlflow/artifacts -type f -mtime +30 -delete
fi
# Check database connections
mlflow server --backend-store-uri postgresql://mlflow_user:password@localhost:5432/mlflow_db --dry-run

Weekly Maintenance Tasks


bash
#!/bin/bash
# weekly_maintenance.sh
# Database maintenance
psql -U mlflow_user -d mlflow_db -c "VACUUM ANALYZE;"
# Clean old experiment runs
python3 << EOF
import mlflow
from mlflow.tracking import MlflowClient
from datetime import datetime, timedelta
client = MlflowClient()
cutoff_date = datetime.now() - timedelta(days=90)
# Delete old runs
experiments = client.list_experiments()
for exp in experiments:
runs = client.search_runs(experiment_ids=[exp.experiment_id])
for run in runs:
if run.info.start_time < cutoff_date.timestamp() * 1000:
client.delete_run(run.info.run_id)
EOF
# Log rotation
logrotate -f /etc/logrotate.d/mlflow

Monthly Maintenance Tasks

bash

#!/bin/bash
# monthly_maintenance.sh
# Full database backup
pg_dump -U mlflow_user mlflow_db > /backup/mlflow_db_$(date +%Y%m%d).sql
# Artifact storage cleanup
find /opt/mlflow/artifacts -type d -empty -delete
# Update MLflow
pip install --upgrade mlflow==3.0.1
# Security updates
apt update && apt upgrade -y

Backup Strategy


bash

#!/bin/bash
# backup_mlflow.sh
BACKUP_DIR="/backup/mlflow"
DATE=$(date +%Y%m%d_%H%M%S)
# Create backup directory
mkdir -p $BACKUP_DIR
# Database backup
pg_dump -U mlflow_user mlflow_db | gzip > $BACKUP_DIR/mlflow_db_$DATE.sql.gz
# Configuration backup
tar -czf $BACKUP_DIR/mlflow_config_$DATE.tar.gz /opt/mlflow/config/
# Artifacts backup (incremental)
rsync -av --delete /opt/mlflow/artifacts/ $BACKUP_DIR/artifacts/
# S3 backup (if configured)
aws s3 sync /opt/mlflow/artifacts s3://mlflow-backup-bucket/artifacts/
# Cleanup old backups
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete
find $BACKUP_DIR -name "*.tar.gz" -mtime +30 -delete