Backup & Restore
This guide covers backup strategies, procedures, and restore operations for NopeSight, ensuring data protection and business continuity.
Backup Strategy
Overview
Backup Schedule
backup_schedule:
full_backup:
frequency: weekly
day: sunday
time: "02:00"
retention: 4_weeks
incremental_backup:
frequency: daily
time: "02:00"
retention: 7_days
transaction_logs:
frequency: hourly
retention: 24_hours
snapshots:
frequency: every_4_hours
retention: 48_hours
Database Backup
MongoDB Backup
#!/bin/bash
# MongoDB backup script
BACKUP_DIR="/backup/mongodb"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="nopesight_${TIMESTAMP}"
# Create backup directory
mkdir -p ${BACKUP_DIR}/${BACKUP_NAME}
# Perform backup
mongodump \
--uri="${MONGODB_URI}" \
--out="${BACKUP_DIR}/${BACKUP_NAME}" \
--gzip \
--oplog
# Create archive
tar -czf ${BACKUP_DIR}/${BACKUP_NAME}.tar.gz \
-C ${BACKUP_DIR} ${BACKUP_NAME}
# Remove uncompressed backup
rm -rf ${BACKUP_DIR}/${BACKUP_NAME}
# Upload to cloud storage
aws s3 cp ${BACKUP_DIR}/${BACKUP_NAME}.tar.gz \
s3://nopesight-backups/mongodb/
PostgreSQL Backup
#!/bin/bash
# PostgreSQL backup script
BACKUP_DIR="/backup/postgresql"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="nopesight_${TIMESTAMP}.sql.gz"
# Create backup directory
mkdir -p ${BACKUP_DIR}
# Perform backup
pg_dump \
-h ${DB_HOST} \
-U ${DB_USER} \
-d ${DB_NAME} \
--no-owner \
--no-privileges \
| gzip > ${BACKUP_DIR}/${BACKUP_NAME}
# Upload to cloud storage
aws s3 cp ${BACKUP_DIR}/${BACKUP_NAME} \
s3://nopesight-backups/postgresql/
Application Backup
Configuration Files
#!/bin/bash
# Configuration backup script
CONFIG_DIRS=(
"/etc/nopesight"
"/opt/nopesight/config"
"/var/lib/nopesight/custom"
)
BACKUP_DIR="/backup/config"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="config_${TIMESTAMP}.tar.gz"
# Create backup
tar -czf ${BACKUP_DIR}/${BACKUP_NAME} \
${CONFIG_DIRS[@]} \
--exclude='*.log' \
--exclude='*.tmp'
# Encrypt backup
openssl enc -aes-256-cbc \
-salt \
-in ${BACKUP_DIR}/${BACKUP_NAME} \
-out ${BACKUP_DIR}/${BACKUP_NAME}.enc \
-k ${BACKUP_PASSWORD}
# Remove unencrypted backup
rm ${BACKUP_DIR}/${BACKUP_NAME}
File Attachments
#!/usr/bin/env python3
# File attachment backup script
import os
import boto3
import hashlib
from datetime import datetime
class AttachmentBackup:
def __init__(self):
self.s3 = boto3.client('s3')
self.bucket = 'nopesight-backups'
self.source_dir = '/var/lib/nopesight/attachments'
def backup_attachments(self):
"""Backup all attachments to S3"""
backup_manifest = []
for root, dirs, files in os.walk(self.source_dir):
for file in files:
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, self.source_dir)
# Calculate checksum
checksum = self.calculate_checksum(file_path)
# Upload to S3
s3_key = f"attachments/{relative_path}"
self.s3.upload_file(
file_path,
self.bucket,
s3_key,
ExtraArgs={
'Metadata': {
'checksum': checksum,
'backup_date': datetime.now().isoformat()
}
}
)
backup_manifest.append({
'file': relative_path,
'checksum': checksum,
's3_key': s3_key
})
# Save manifest
self.save_manifest(backup_manifest)
def calculate_checksum(self, file_path):
"""Calculate SHA256 checksum of file"""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
if __name__ == "__main__":
backup = AttachmentBackup()
backup.backup_attachments()
Automated Backup
Backup Automation Script
#!/bin/bash
# Master backup automation script
set -euo pipefail
# Configuration
BACKUP_ROOT="/backup"
LOG_FILE="${BACKUP_ROOT}/backup.log"
NOTIFICATION_EMAIL="admin@company.com"
# Logging function
log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" | tee -a ${LOG_FILE}
}
# Error handling
handle_error() {
log "ERROR: Backup failed at line $1"
send_notification "Backup Failed" "Backup process failed. Check logs for details."
exit 1
}
trap 'handle_error $LINENO' ERR
# Send notification
send_notification() {
local subject=$1
local message=$2
echo "${message}" | mail -s "NopeSight Backup: ${subject}" ${NOTIFICATION_EMAIL}
}
# Main backup process
main() {
log "Starting backup process"
# Database backup
log "Backing up database"
/opt/nopesight/scripts/backup-database.sh
# Configuration backup
log "Backing up configuration"
/opt/nopesight/scripts/backup-config.sh
# Attachments backup
log "Backing up attachments"
python3 /opt/nopesight/scripts/backup-attachments.py
# Verify backups
log "Verifying backups"
/opt/nopesight/scripts/verify-backups.sh
# Cleanup old backups
log "Cleaning up old backups"
/opt/nopesight/scripts/cleanup-backups.sh
log "Backup process completed successfully"
send_notification "Backup Successful" "All backup tasks completed successfully."
}
# Run main function
main
Backup Verification
#!/usr/bin/env python3
# Backup verification script
import os
import json
import hashlib
import boto3
from datetime import datetime, timedelta
class BackupVerifier:
def __init__(self):
self.s3 = boto3.client('s3')
self.bucket = 'nopesight-backups'
self.verification_results = []
def verify_all_backups(self):
"""Verify all recent backups"""
# Verify database backups
self.verify_database_backups()
# Verify configuration backups
self.verify_config_backups()
# Verify attachment backups
self.verify_attachment_backups()
# Generate report
self.generate_verification_report()
def verify_database_backups(self):
"""Verify database backup integrity"""
# List recent backups
response = self.s3.list_objects_v2(
Bucket=self.bucket,
Prefix='mongodb/',
MaxKeys=10
)
for obj in response.get('Contents', []):
# Download and verify
local_path = f"/tmp/{os.path.basename(obj['Key'])}"
self.s3.download_file(self.bucket, obj['Key'], local_path)
# Test restore
test_result = self.test_database_restore(local_path)
self.verification_results.append({
'type': 'database',
'file': obj['Key'],
'size': obj['Size'],
'date': obj['LastModified'],
'status': 'passed' if test_result else 'failed'
})
# Cleanup
os.remove(local_path)
def test_database_restore(self, backup_file):
"""Test database restore"""
try:
# Test restore to temporary database
os.system(f"mongorestore --gzip --archive={backup_file} --nsInclude='test.*'")
return True
except Exception as e:
print(f"Restore test failed: {e}")
return False
Restore Procedures
Database Restore
#!/bin/bash
# Database restore script
RESTORE_FILE=$1
RESTORE_POINT=$2
if [ -z "$RESTORE_FILE" ]; then
echo "Usage: $0 <backup_file> [restore_point]"
exit 1
fi
# Stop application
systemctl stop nopesight
# Backup current database
mongodump --uri="${MONGODB_URI}" --out="/backup/pre-restore"
# Restore database
if [ -z "$RESTORE_POINT" ]; then
# Full restore
mongorestore \
--uri="${MONGODB_URI}" \
--drop \
--gzip \
--archive="${RESTORE_FILE}"
else
# Point-in-time restore
mongorestore \
--uri="${MONGODB_URI}" \
--drop \
--gzip \
--archive="${RESTORE_FILE}" \
--oplogReplay \
--oplogLimit="${RESTORE_POINT}"
fi
# Verify restore
mongo ${MONGODB_URI} --eval "db.stats()"
# Start application
systemctl start nopesight
Disaster Recovery
disaster_recovery:
rto: 4_hours # Recovery Time Objective
rpo: 1_hour # Recovery Point Objective
procedures:
total_failure:
steps:
- Activate DR site
- Restore latest full backup
- Apply incremental backups
- Restore transaction logs
- Update DNS records
- Verify functionality
- Switch traffic to DR site
partial_failure:
steps:
- Identify failed components
- Isolate affected systems
- Restore from snapshots
- Verify data integrity
- Resume operations
data_corruption:
steps:
- Stop all writes
- Identify corruption scope
- Restore to last known good state
- Apply transaction logs
- Verify data integrity
- Resume operations
Backup Monitoring
Monitoring Dashboard
# Backup monitoring configuration
monitoring_config = {
'backup_metrics': {
'backup_success_rate': {
'query': 'sum(backup_success) / sum(backup_attempts) * 100',
'threshold': 95,
'alert': 'critical'
},
'backup_duration': {
'query': 'avg(backup_duration_seconds)',
'threshold': 3600,
'alert': 'warning'
},
'backup_size_growth': {
'query': 'rate(backup_size_bytes[7d])',
'threshold': 0.1, # 10% weekly growth
'alert': 'info'
}
},
'alerts': {
'backup_failed': {
'condition': 'backup_status != "success"',
'severity': 'critical',
'notification': ['email', 'slack']
},
'backup_missing': {
'condition': 'time() - last_backup_time > 86400',
'severity': 'high',
'notification': ['email']
}
}
}
Backup Reports
-- Daily backup report query
SELECT
backup_type,
COUNT(*) as total_backups,
SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) as successful,
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
AVG(duration_seconds) as avg_duration,
SUM(size_bytes) / 1024 / 1024 / 1024 as total_size_gb
FROM backup_logs
WHERE backup_date >= CURRENT_DATE - INTERVAL '1 day'
GROUP BY backup_type
ORDER BY backup_type;
Best Practices
1. 3-2-1 Rule
- 3 copies of data
- 2 different storage media
- 1 offsite copy
2. Regular Testing
- Monthly restore tests
- Quarterly DR drills
- Annual full recovery test
3. Security
- Encrypt all backups
- Secure backup credentials
- Audit backup access
4. Documentation
- Maintain restore procedures
- Document dependencies
- Update contact information
Troubleshooting
Common Issues
troubleshooting:
backup_failures:
insufficient_space:
symptom: "Backup fails with 'No space left on device'"
solution:
- Check disk space
- Clean up old backups
- Increase storage allocation
permission_denied:
symptom: "Backup fails with 'Permission denied'"
solution:
- Check backup user permissions
- Verify file ownership
- Review SELinux contexts
network_timeout:
symptom: "Cloud upload fails with timeout"
solution:
- Check network connectivity
- Increase timeout values
- Use multipart uploads
restore_issues:
version_mismatch:
symptom: "Restore fails with version error"
solution:
- Check database versions
- Use compatible restore tools
- Consider upgrade path
corruption_detected:
symptom: "Restore reports data corruption"
solution:
- Try alternative backup
- Use recovery tools
- Contact support