mirror of
https://github.com/SamyRai/turash.git
synced 2025-12-26 23:01:33 +00:00
- Initialize git repository - Add comprehensive .gitignore for Go projects - Install golangci-lint v2.6.0 (latest v2) globally - Configure .golangci.yml with appropriate linters and formatters - Fix all formatting issues (gofmt) - Fix all errcheck issues (unchecked errors) - Adjust complexity threshold for validation functions - All checks passing: build, test, vet, lint
509 lines
14 KiB
Markdown
509 lines
14 KiB
Markdown
## 16. DevOps & Infrastructure
|
|
|
|
### Deployment Architecture
|
|
|
|
#### Application Architecture
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ Load Balancer │
|
|
│ (AWS ALB/NLB) │
|
|
└─────────────────┬───────────────────────────────────────────────┘
|
|
│
|
|
┌─────────────┼─────────────┐
|
|
│ │ │
|
|
┌───▼───┐ ┌─────▼─────┐ ┌───▼───┐
|
|
│ API │ │ Worker │ │ Web │
|
|
│Gateway│ │ Services │ │ Front │
|
|
│(Kong) │ │(Matching) │ │(Next) │
|
|
└───┬───┘ └─────┬─────┘ └───────┘
|
|
│ │
|
|
┌───▼─────────────▼─────────────────┐
|
|
│ Service Mesh │
|
|
│ (Istio/Linkerd) │
|
|
│ │
|
|
│ ┌─────────────┬─────────────┐ │
|
|
│ │ Neo4j │ PostgreSQL │ │
|
|
│ │ Cluster │ + PostGIS │ │
|
|
│ └─────────────┴─────────────┘ │
|
|
│ │
|
|
│ ┌─────────────────────────────────┐
|
|
│ │ Redis Cluster │
|
|
│ │ (Cache + PubSub + Jobs) │
|
|
│ └─────────────────────────────────┘
|
|
└───────────────────────────────────┘
|
|
```
|
|
|
|
#### Infrastructure Components
|
|
|
|
**Production Stack**:
|
|
- **Cloud Provider**: AWS (EKS) or Google Cloud (GKE)
|
|
- **Kubernetes**: Managed Kubernetes service
|
|
- **Load Balancing**: AWS ALB/NLB or GCP Load Balancer
|
|
- **CDN**: CloudFront or Cloudflare for static assets
|
|
- **Object Storage**: S3 or GCS for backups and assets
|
|
- **Monitoring**: Prometheus + Grafana (managed)
|
|
- **Logging**: Loki or CloudWatch
|
|
|
|
**Development Stack**:
|
|
- **Local Development**: Docker Compose + Kind (Kubernetes in Docker)
|
|
- **CI/CD**: GitHub Actions with self-hosted runners
|
|
- **Preview Environments**: Ephemeral environments per PR
|
|
|
|
### Infrastructure as Code
|
|
|
|
#### Terraform Configuration Structure
|
|
```
|
|
infrastructure/
|
|
├── environments/
|
|
│ ├── dev/
|
|
│ │ ├── main.tf
|
|
│ │ ├── variables.tf
|
|
│ │ └── outputs.tf
|
|
│ ├── staging/
|
|
│ │ └── ...
|
|
│ └── prod/
|
|
│ └── ...
|
|
├── modules/
|
|
│ ├── eks/
|
|
│ ├── rds/
|
|
│ ├── elasticache/
|
|
│ ├── networking/
|
|
│ └── monitoring/
|
|
├── shared/
|
|
│ ├── providers.tf
|
|
│ ├── versions.tf
|
|
│ └── backend.tf
|
|
└── scripts/
|
|
├── init.sh
|
|
└── plan.sh
|
|
```
|
|
|
|
#### Core Infrastructure Module
|
|
```hcl
|
|
# infrastructure/modules/eks/main.tf
|
|
module "eks" {
|
|
source = "terraform-aws-modules/eks/aws"
|
|
version = "~> 19.0"
|
|
|
|
cluster_name = var.cluster_name
|
|
cluster_version = "1.27"
|
|
|
|
vpc_id = var.vpc_id
|
|
subnet_ids = var.private_subnets
|
|
|
|
# Managed node groups
|
|
eks_managed_node_groups = {
|
|
general = {
|
|
instance_types = ["t3.large"]
|
|
min_size = 1
|
|
max_size = 10
|
|
desired_size = 3
|
|
|
|
labels = {
|
|
Environment = var.environment
|
|
NodeGroup = "general"
|
|
}
|
|
}
|
|
|
|
matching = {
|
|
instance_types = ["c6i.xlarge"] # CPU-optimized for matching engine
|
|
min_size = 2
|
|
max_size = 20
|
|
desired_size = 5
|
|
|
|
labels = {
|
|
Environment = var.environment
|
|
NodeGroup = "matching"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
#### Database Infrastructure
|
|
```hcl
|
|
# infrastructure/modules/database/main.tf
|
|
resource "aws_db_instance" "postgresql" {
|
|
identifier = "${var.environment}-city-resource-graph"
|
|
engine = "postgres"
|
|
engine_version = "15.4"
|
|
instance_class = "db.r6g.large"
|
|
allocated_storage = 100
|
|
max_allocated_storage = 1000
|
|
storage_type = "gp3"
|
|
|
|
# Enable PostGIS
|
|
parameter_group_name = aws_db_parameter_group.postgis.name
|
|
|
|
# Multi-AZ for production
|
|
multi_az = var.environment == "prod"
|
|
backup_retention_period = 30
|
|
|
|
# Security
|
|
vpc_security_group_ids = [aws_security_group.database.id]
|
|
db_subnet_group_name = aws_db_subnet_group.database.name
|
|
|
|
# Monitoring
|
|
enabled_cloudwatch_logs_exports = ["postgresql", "upgrade"]
|
|
monitoring_interval = 60
|
|
monitoring_role_arn = aws_iam_role.rds_enhanced_monitoring.arn
|
|
}
|
|
|
|
resource "aws_db_parameter_group" "postgis" {
|
|
family = "postgres15"
|
|
name = "${var.environment}-postgis"
|
|
|
|
parameter {
|
|
name = "shared_preload_libraries"
|
|
value = "postgis"
|
|
}
|
|
}
|
|
```
|
|
|
|
### Kubernetes Configuration
|
|
|
|
#### Application Deployment
|
|
```yaml
|
|
# k8s/base/deployment.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: city-resource-graph-api
|
|
spec:
|
|
replicas: 3
|
|
selector:
|
|
matchLabels:
|
|
app: city-resource-graph-api
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: city-resource-graph-api
|
|
spec:
|
|
containers:
|
|
- name: api
|
|
image: cityresourcegraph/api:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
env:
|
|
- name: DATABASE_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: database-secret
|
|
key: url
|
|
- name: REDIS_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: redis-secret
|
|
key: url
|
|
resources:
|
|
requests:
|
|
memory: "512Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "1Gi"
|
|
cpu: "500m"
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: 8080
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
```
|
|
|
|
#### Service Mesh Configuration
|
|
```yaml
|
|
# k8s/base/istio.yaml
|
|
apiVersion: networking.istio.io/v1alpha3
|
|
kind: VirtualService
|
|
metadata:
|
|
name: city-resource-graph-api
|
|
spec:
|
|
http:
|
|
- match:
|
|
- uri:
|
|
prefix: "/api/v1"
|
|
route:
|
|
- destination:
|
|
host: city-resource-graph-api
|
|
subset: v1
|
|
- match:
|
|
- uri:
|
|
prefix: "/api/v2"
|
|
route:
|
|
- destination:
|
|
host: city-resource-graph-api
|
|
subset: v2
|
|
---
|
|
apiVersion: networking.istio.io/v1alpha3
|
|
kind: DestinationRule
|
|
metadata:
|
|
name: city-resource-graph-api
|
|
spec:
|
|
host: city-resource-graph-api
|
|
subsets:
|
|
- name: v1
|
|
labels:
|
|
version: v1
|
|
- name: v2
|
|
labels:
|
|
version: v2
|
|
```
|
|
|
|
### CI/CD Pipeline
|
|
|
|
#### GitHub Actions Workflow
|
|
```yaml
|
|
# .github/workflows/deploy.yml
|
|
name: Deploy to Kubernetes
|
|
|
|
on:
|
|
push:
|
|
branches: [main]
|
|
pull_request:
|
|
branches: [main]
|
|
|
|
env:
|
|
REGISTRY: ghcr.io
|
|
IMAGE_NAME: ${{ github.repository }}
|
|
|
|
jobs:
|
|
test:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v4
|
|
with:
|
|
go-version: '1.21'
|
|
- name: Test
|
|
run: |
|
|
go test -v -race -coverprofile=coverage.out ./...
|
|
go tool cover -html=coverage.out -o coverage.html
|
|
|
|
build-and-push:
|
|
needs: test
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: read
|
|
packages: write
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Log in to registry
|
|
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
|
|
|
- name: Extract metadata
|
|
id: meta
|
|
uses: docker/metadata-action@v5
|
|
with:
|
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
|
|
- name: Build and push Docker image
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .
|
|
push: true
|
|
tags: ${{ steps.meta.outputs.tags }}
|
|
labels: ${{ steps.meta.outputs.labels }}
|
|
|
|
deploy:
|
|
needs: build-and-push
|
|
runs-on: ubuntu-latest
|
|
if: github.ref == 'refs/heads/main'
|
|
environment: production
|
|
steps:
|
|
- name: Deploy to Kubernetes
|
|
uses: azure/k8s-deploy@v4
|
|
with:
|
|
namespace: production
|
|
manifests: |
|
|
k8s/production/deployment.yaml
|
|
k8s/production/service.yaml
|
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
|
kubectl-version: latest
|
|
```
|
|
|
|
#### Database Migration Strategy
|
|
```yaml
|
|
# k8s/jobs/migration.yaml
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: database-migration
|
|
spec:
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: migrate
|
|
image: migrate/migrate:latest
|
|
command: ["migrate", "-path", "/migrations", "-database", "$(DATABASE_URL)", "up"]
|
|
env:
|
|
- name: DATABASE_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: database-secret
|
|
key: url
|
|
volumeMounts:
|
|
- name: migrations
|
|
mountPath: /migrations
|
|
volumes:
|
|
- name: migrations
|
|
configMap:
|
|
name: database-migrations
|
|
restartPolicy: Never
|
|
```
|
|
|
|
### Monitoring & Observability
|
|
|
|
#### Prometheus Configuration
|
|
```yaml
|
|
# k8s/monitoring/prometheus.yaml
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: city-resource-graph-alerts
|
|
spec:
|
|
groups:
|
|
- name: city-resource-graph
|
|
rules:
|
|
- alert: HighErrorRate
|
|
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High error rate detected"
|
|
description: "Error rate is {{ $value | printf \"%.2f\" }}%"
|
|
|
|
- alert: MatchingEngineSlow
|
|
expr: histogram_quantile(0.95, rate(matching_duration_seconds_bucket[5m])) > 2
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Matching engine is slow"
|
|
description: "95th percentile matching duration is {{ $value | printf \"%.2f\" }}s"
|
|
```
|
|
|
|
#### Grafana Dashboards
|
|
- **Application Metrics**: Response times, error rates, throughput
|
|
- **Business Metrics**: Match conversion rates, user engagement, revenue
|
|
- **Infrastructure Metrics**: CPU/memory usage, database connections, cache hit rates
|
|
- **Domain Metrics**: Matching accuracy, economic value calculations
|
|
|
|
### Security & Compliance
|
|
|
|
#### Infrastructure Security
|
|
```yaml
|
|
# k8s/security/network-policy.yaml
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: NetworkPolicy
|
|
metadata:
|
|
name: api-to-database
|
|
spec:
|
|
podSelector:
|
|
matchLabels:
|
|
app: city-resource-graph-api
|
|
policyTypes:
|
|
- Egress
|
|
egress:
|
|
- to:
|
|
- podSelector:
|
|
matchLabels:
|
|
app: postgresql
|
|
ports:
|
|
- protocol: TCP
|
|
port: 5432
|
|
- to:
|
|
- podSelector:
|
|
matchLabels:
|
|
app: neo4j
|
|
ports:
|
|
- protocol: TCP
|
|
port: 7687
|
|
```
|
|
|
|
#### Secrets Management
|
|
- **AWS Secrets Manager** or **GCP Secret Manager** for production
|
|
- **Sealed Secrets** for Kubernetes-native secret management
|
|
- **External Secrets Operator** for automatic secret rotation
|
|
|
|
### Backup & Disaster Recovery
|
|
|
|
#### Database Backups
|
|
```bash
|
|
# Daily automated backup
|
|
pg_dump --host=$DB_HOST --username=$DB_USER --dbname=$DB_NAME \
|
|
--format=custom --compress=9 --file=/backups/$(date +%Y%m%d_%H%M%S).backup
|
|
|
|
# Point-in-time recovery capability
|
|
# Retention: 30 days for daily, 1 year for weekly
|
|
```
|
|
|
|
#### Disaster Recovery
|
|
- **Multi-region deployment** for production
|
|
- **Cross-region backup replication**
|
|
- **Automated failover** with Route 53 health checks
|
|
- **Recovery Time Objective (RTO)**: 4 hours
|
|
- **Recovery Point Objective (RPO)**: 1 hour
|
|
|
|
### Cost Optimization
|
|
|
|
#### Resource Optimization
|
|
```yaml
|
|
# k8s/autoscaling/hpa.yaml
|
|
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata:
|
|
name: city-resource-graph-api-hpa
|
|
spec:
|
|
scaleTargetRef:
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
name: city-resource-graph-api
|
|
minReplicas: 3
|
|
maxReplicas: 20
|
|
metrics:
|
|
- type: Resource
|
|
resource:
|
|
name: cpu
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: 70
|
|
- type: Resource
|
|
resource:
|
|
name: memory
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: 80
|
|
```
|
|
|
|
#### Cloud Cost Management
|
|
- **Reserved Instances**: 70% of baseline capacity
|
|
- **Spot Instances**: For batch processing and development
|
|
- **Auto-scaling**: Scale-to-zero for development environments
|
|
- **Cost Allocation Tags**: Track costs by service, environment, team
|
|
|
|
### Documentation
|
|
|
|
**Technical Documentation**:
|
|
1. **API Documentation**: OpenAPI/Swagger specification, interactive API explorer (Swagger UI, ReDoc), code examples
|
|
2. **Architecture Diagrams**: C4 model diagrams (Context, Container, Component, Code), sequence diagrams, data flow diagrams, deployment architecture
|
|
3. **Runbooks**: Operational procedures, troubleshooting guides, incident response procedures
|
|
|
|
**Developer Documentation**:
|
|
- Getting Started Guide: Local setup, development workflow
|
|
- Contributing Guide: Code standards, PR process
|
|
- Architecture Decisions: ADR index
|
|
- API Client Libraries: SDKs for popular languages
|
|
|
|
---
|
|
|
|
|