Skip to content
English
On this page

Ejercicio: Microservicios con AWS EKS - CI/CD

plaintext
microservices-eks/
├── .github/
│   └── workflows/
│       ├── ci-dev.yml
│       ├── ci-stg.yml
│       └── ci-prod.yml

├── apps/
│   ├── api-service/
│   │   ├── src/
│   │   ├── Dockerfile
│   │   └── package.json
│   │
│   └── worker-service/
│       ├── src/
│       ├── Dockerfile
│       └── package.json

├── infrastructure/
│   ├── terraform/
│   │   ├── eks/
│   │   │   ├── main.tf
│   │   │   └── variables.tf
│   │   │
│   │   ├── rds/
│   │   │   ├── main.tf
│   │   │   └── variables.tf
│   │   │
│   │   └── modules/
│   │       ├── networking/
│   │       └── security/
│   │
│   └── kubernetes/
│       ├── base/
│       │   ├── api-service/
│       │   │   ├── deployment.yaml
│       │   │   ├── service.yaml
│       │   │   └── hpa.yaml
│       │   │
│       │   └── worker-service/
│       │       ├── deployment.yaml
│       │       └── service.yaml
│       │
│       └── overlays/
│           ├── dev/
│           │   ├── kustomization.yaml
│           │   └── patches/
│           ├── stg/
│           └── prod/

├── scripts/
│   ├── deploy.sh
│   ├── setup-eks.sh
│   └── setup-secrets.sh

└── docs/
    ├── architecture.md
    └── deployment.md

Parte 1: Configuración Base y EKS

En esta primera parte, configuraremos la infraestructura base usando Terraform para EKS y los componentes de red necesarios.

Infrastructure as Code con Terraform

1. Configuración de Red

hcl
# infrastructure/terraform/modules/networking/main.tf

provider "aws" {
  region = var.region
}

# VPC
resource "aws_vpc" "main" {
  cidr_block           = var.vpc_cidr
  enable_dns_hostnames = true
  enable_dns_support   = true

  tags = {
    Name        = "${var.environment}-vpc"
    Environment = var.environment
  }
}

# Public Subnets
resource "aws_subnet" "public" {
  count                   = length(var.availability_zones)
  vpc_id                  = aws_vpc.main.id
  cidr_block              = cidrsubnet(var.vpc_cidr, 8, count.index)
  availability_zone       = var.availability_zones[count.index]
  map_public_ip_on_launch = true

  tags = {
    Name                                        = "${var.environment}-public-${count.index + 1}"
    Environment                                 = var.environment
    "kubernetes.io/role/elb"                    = 1
    "kubernetes.io/cluster/${var.cluster_name}" = "shared"
  }
}

# Private Subnets
resource "aws_subnet" "private" {
  count             = length(var.availability_zones)
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 8, count.index + length(var.availability_zones))
  availability_zone = var.availability_zones[count.index]

  tags = {
    Name                                        = "${var.environment}-private-${count.index + 1}"
    Environment                                 = var.environment
    "kubernetes.io/role/internal-elb"           = 1
    "kubernetes.io/cluster/${var.cluster_name}" = "shared"
  }
}

# Internet Gateway
resource "aws_internet_gateway" "main" {
  vpc_id = aws_vpc.main.id

  tags = {
    Name        = "${var.environment}-igw"
    Environment = var.environment
  }
}

# NAT Gateway
resource "aws_nat_gateway" "main" {
  count         = length(var.availability_zones)
  allocation_id = aws_eip.nat[count.index].id
  subnet_id     = aws_subnet.public[count.index].id

  tags = {
    Name        = "${var.environment}-nat-${count.index + 1}"
    Environment = var.environment
  }
}

# Elastic IPs for NAT
resource "aws_eip" "nat" {
  count = length(var.availability_zones)
  vpc   = true

  tags = {
    Name        = "${var.environment}-eip-${count.index + 1}"
    Environment = var.environment
  }
}

# Route Tables
resource "aws_route_table" "public" {
  vpc_id = aws_vpc.main.id

  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.main.id
  }

  tags = {
    Name        = "${var.environment}-public-rt"
    Environment = var.environment
  }
}

resource "aws_route_table" "private" {
  count  = length(var.availability_zones)
  vpc_id = aws_vpc.main.id

  route {
    cidr_block     = "0.0.0.0/0"
    nat_gateway_id = aws_nat_gateway.main[count.index].id
  }

  tags = {
    Name        = "${var.environment}-private-rt-${count.index + 1}"
    Environment = var.environment
  }
}

# Route Table Associations
resource "aws_route_table_association" "public" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.public[count.index].id
  route_table_id = aws_route_table.public.id
}

resource "aws_route_table_association" "private" {
  count          = length(var.availability_zones)
  subnet_id      = aws_subnet.private[count.index].id
  route_table_id = aws_route_table.private[count.index].id
}

# Variables
variable "environment" {
  description = "Environment name"
  type        = string
}

variable "region" {
  description = "AWS region"
  type        = string
}

variable "vpc_cidr" {
  description = "CIDR block for VPC"
  type        = string
}

variable "availability_zones" {
  description = "List of availability zones"
  type        = list(string)
}

variable "cluster_name" {
  description = "Name of the EKS cluster"
  type        = string
}

# Outputs
output "vpc_id" {
  value = aws_vpc.main.id
}

output "public_subnet_ids" {
  value = aws_subnet.public[*].id
}

output "private_subnet_ids" {
  value = aws_subnet.private[*].id
}

2. Configuración de EKS

hcl
# infrastructure/terraform/eks/main.tf

module "eks" {
  source  = "terraform-aws-modules/eks/aws"
  version = "~> 19.0"

  cluster_name    = "${var.environment}-${var.cluster_name}"
  cluster_version = "1.27"

  vpc_id     = var.vpc_id
  subnet_ids = var.private_subnet_ids

  cluster_endpoint_public_access = true

  eks_managed_node_groups = {
    main = {
      min_size     = var.node_group_min_size
      max_size     = var.node_group_max_size
      desired_size = var.node_group_desired_size

      instance_types = ["t3.medium"]
      capacity_type  = "ON_DEMAND"

      labels = {
        Environment = var.environment
        Role       = "application"
      }

      tags = {
        Environment = var.environment
      }
    }
  }

  # OIDC Provider
  enable_irsa = true

  # Add-ons
  cluster_addons = {
    coredns = {
      most_recent = true
    }
    kube-proxy = {
      most_recent = true
    }
    vpc-cni = {
      most_recent = true
    }
    aws-ebs-csi-driver = {
      most_recent = true
    }
  }

  # Security Groups
  cluster_security_group_additional_rules = {
    ingress_nodes_ephemeral_ports_tcp = {
      description                = "Node to node ephemeral ports"
      protocol                  = "tcp"
      from_port                 = 1025
      to_port                   = 65535
      type                      = "ingress"
      source_node_security_group = true
    }
  }

  node_security_group_additional_rules = {
    ingress_self_all = {
      description = "Node to node all ports/protocols"
      protocol    = "-1"
      from_port   = 0
      to_port     = 0
      type        = "ingress"
      self        = true
    }
  }

  tags = {
    Environment = var.environment
    Terraform   = "true"
  }
}

# AWS Load Balancer Controller IAM Role
module "lb_controller_role" {
  source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"

  role_name                              = "${var.environment}-eks-aws-lb-controller"
  attach_load_balancer_controller_policy = true

  oidc_providers = {
    main = {
      provider_arn               = module.eks.oidc_provider_arn
      namespace_service_accounts = ["kube-system:aws-load-balancer-controller"]
    }
  }

  tags = {
    Environment = var.environment
  }
}

# Variables
variable "environment" {
  description = "Environment name"
  type        = string
}

variable "cluster_name" {
  description = "Name of the EKS cluster"
  type        = string
}

variable "vpc_id" {
  description = "VPC ID"
  type        = string
}

variable "private_subnet_ids" {
  description = "List of private subnet IDs"
  type        = list(string)
}

variable "node_group_min_size" {
  description = "Minimum size of the node group"
  type        = number
  default     = 2
}

variable "node_group_max_size" {
  description = "Maximum size of the node group"
  type        = number
  default     = 5
}

variable "node_group_desired_size" {
  description = "Desired size of the node group"
  type        = number
  default     = 3
}

# Outputs
output "cluster_endpoint" {
  value = module.eks.cluster_endpoint
}

output "cluster_certificate_authority_data" {
  value = module.eks.cluster_certificate_authority_data
}

output "cluster_name" {
  value = module.eks.cluster_name
}

3. Script de Setup

bash
#!/bin/bash
# scripts/setup-eks.sh

set -e

# Variables requeridas
ENVIRONMENT=$1
REGION=$2
CLUSTER_NAME="main-cluster"

# Validar parámetros
if [[ ! "$ENVIRONMENT" =~ ^(dev|stg|prod)$ ]]; then
    echo "Environment must be dev, stg, or prod"
    exit 1
fi

# Configurar AWS CLI
aws configure set default.region $REGION

# Crear directorio temporal para tfvars
mkdir -p tmp

# Generar archivo tfvars basado en el ambiente
cat > tmp/$ENVIRONMENT.tfvars <<EOF
environment         = "$ENVIRONMENT"
region             = "$REGION"
cluster_name       = "$CLUSTER_NAME"
vpc_cidr           = "10.0.0.0/16"
availability_zones = ["${REGION}a", "${REGION}b", "${REGION}c"]

node_group_min_size     = 2
node_group_max_size     = 5
node_group_desired_size = 3
EOF

# Inicializar y aplicar Terraform
cd infrastructure/terraform

# Inicializar Terraform
terraform init

# Crear plan
terraform plan -var-file="../../tmp/$ENVIRONMENT.tfvars" -out="../../tmp/$ENVIRONMENT.plan"

# Aplicar cambios
terraform apply "../../tmp/$ENVIRONMENT.plan"

# Actualizar kubeconfig
aws eks update-kubeconfig --name "$ENVIRONMENT-$CLUSTER_NAME" --region $REGION

# Instalar complementos necesarios
echo "Installing AWS Load Balancer Controller..."
helm repo add eks https://aws.github.io/eks-charts
helm repo update

helm upgrade -i aws-load-balancer-controller eks/aws-load-balancer-controller \
  -n kube-system \
  --set clusterName="$ENVIRONMENT-$CLUSTER_NAME" \
  --set serviceAccount.create=true \
  --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=$(aws iam get-role --role-name "$ENVIRONMENT-eks-aws-lb-controller" --query 'Role.Arn' --output text)

# Instalar Metrics Server
echo "Installing Metrics Server..."
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

# Instalar External DNS
echo "Installing External DNS..."
helm repo add external-dns https://kubernetes-sigs.github.io/external-dns/
helm repo update

helm upgrade -i external-dns external-dns/external-dns \
  -n kube-system \
  --set provider=aws \
  --set aws.region=$REGION \
  --set domainFilters[0]="$ENVIRONMENT.example.com"

echo "EKS cluster setup completed successfully!"

# Limpiar archivos temporales
rm -rf tmp/

# Mostrar información del cluster
echo "Cluster Info:"
kubectl cluster-info

Verificación de la Parte 1

1. Verificar Infraestructura:

  • [ ] VPC creada correctamente
  • [ ] Subnets públicas y privadas configuradas
  • [ ] NAT Gateways funcionando
  • [ ] Tablas de ruteo configuradas

2. Verificar EKS:

  • [ ] Cluster EKS creado y disponible
  • [ ] Nodos worker registrados
  • [ ] Add-ons instalados y funcionando
  • [ ] IAM roles configurados correctamente

3. Verificar Networking:

  • [ ] Load Balancer Controller funcionando
  • [ ] DNS configurado
  • [ ] Métricas disponibles
  • [ ] Seguridad configurada

Parte 2: Gestión de Secrets y Configuración

En esta parte, implementaremos la gestión de secretos y configuración usando AWS Secrets Manager y AWS Parameter Store.

1. Configuración de Secrets Manager y Parameter Store

hcl
# infrastructure/terraform/modules/secrets/main.tf

provider "aws" {
  region = var.region
}

# Parameter Store - Variables No Sensibles
resource "aws_ssm_parameter" "app_config" {
  for_each = {
    "app_port"              = var.app_port
    "log_level"            = var.environment == "prod" ? "info" : "debug"
    "max_connections"      = var.environment == "prod" ? "100" : "20"
    "worker_threads"       = var.environment == "prod" ? "10" : "5"
    "cache_ttl"           = var.environment == "prod" ? "3600" : "300"
    "api_rate_limit"      = var.environment == "prod" ? "1000" : "100"
    "metrics_enabled"     = "true"
    "tracing_enabled"     = var.environment == "prod" ? "true" : "false"
  }

  name        = "/${var.environment}/config/${each.key}"
  description = "Configuration parameter for ${each.key}"
  type        = "String"
  value       = each.value
  tier        = var.environment == "prod" ? "Advanced" : "Standard"

  tags = {
    Environment = var.environment
  }
}

# Secrets Manager - Variables Sensibles
resource "aws_secretsmanager_secret" "app_secrets" {
  for_each = toset([
    "database",
    "api_keys",
    "jwt",
    "external_services"
  ])

  name        = "${var.environment}/secrets/${each.key}"
  description = "Secrets for ${each.key}"

  tags = {
    Environment = var.environment
  }
}

# Valores iniciales para secrets (en producción deberían ser manejados fuera de Terraform)
resource "aws_secretsmanager_secret_version" "database" {
  secret_id = aws_secretsmanager_secret.app_secrets["database"].id
  secret_string = jsonencode({
    username = "app_user"
    password = var.db_password
    host     = var.db_host
    port     = var.db_port
    name     = "${var.environment}_db"
  })
}

resource "aws_secretsmanager_secret_version" "api_keys" {
  secret_id = aws_secretsmanager_secret.app_secrets["api_keys"].id
  secret_string = jsonencode({
    internal_api_key = var.internal_api_key
    external_api_key = var.external_api_key
  })
}

# IAM Role para acceso a secrets desde EKS
resource "aws_iam_role" "secrets_access" {
  name = "${var.environment}-eks-secrets-access"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRoleWithWebIdentity"
        Effect = "Allow"
        Principal = {
          Federated = var.oidc_provider_arn
        }
        Condition = {
          StringEquals = {
            "${var.oidc_provider}:sub": "system:serviceaccount:${var.k8s_namespace}:secrets-access"
          }
        }
      }
    ]
  })

  tags = {
    Environment = var.environment
  }
}

# IAM Policy para acceso a secrets
resource "aws_iam_role_policy" "secrets_access" {
  name = "${var.environment}-eks-secrets-access"
  role = aws_iam_role.secrets_access.id

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "secretsmanager:GetSecretValue",
          "secretsmanager:DescribeSecret",
          "ssm:GetParameter",
          "ssm:GetParameters",
          "ssm:GetParametersByPath"
        ]
        Resource = [
          "arn:aws:secretsmanager:${var.region}:${var.account_id}:secret:${var.environment}/*",
          "arn:aws:ssm:${var.region}:${var.account_id}:parameter/${var.environment}/*"
        ]
      }
    ]
  })
}

# Variables
variable "environment" {
  description = "Environment name"
  type        = string
}

variable "region" {
  description = "AWS region"
  type        = string
}

variable "account_id" {
  description = "AWS account ID"
  type        = string
}

variable "oidc_provider_arn" {
  description = "EKS OIDC Provider ARN"
  type        = string
}

variable "oidc_provider" {
  description = "EKS OIDC Provider URL"
  type        = string
}

variable "k8s_namespace" {
  description = "Kubernetes namespace"
  type        = string
}

variable "db_password" {
  description = "Database password"
  type        = string
  sensitive   = true
}

variable "db_host" {
  description = "Database host"
  type        = string
}

variable "db_port" {
  description = "Database port"
  type        = string
}

variable "internal_api_key" {
  description = "Internal API key"
  type        = string
  sensitive   = true
}

variable "external_api_key" {
  description = "External API key"
  type        = string
  sensitive   = true
}

variable "app_port" {
  description = "Application port"
  type        = string
  default     = "8080"
}

# Outputs
output "secrets_role_arn" {
  value = aws_iam_role.secrets_access.arn
}

2. Kubernetes ConfigMap y Secrets Integration

yaml
# infrastructure/kubernetes/base/secrets/secrets-store.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: secrets-access
  namespace: default
  annotations:
    eks.amazonaws.com/role-arn: ${SECRETS_ROLE_ARN}

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-service
spec:
  template:
    metadata:
      annotations:
        secrets.k8s.aws/sidecarInjectorWebhook: enabled
    spec:
      serviceAccountName: secrets-access
      containers:
      - name: api-service
        env:
        - name: DB_SECRET_ARN
          value: "arn:aws:secretsmanager:${REGION}:${ACCOUNT_ID}:secret:${ENVIRONMENT}/secrets/database"
        - name: API_KEYS_SECRET_ARN
          value: "arn:aws:secretsmanager:${REGION}:${ACCOUNT_ID}:secret:${ENVIRONMENT}/secrets/api_keys"
        - name: CONFIG_PATH
          value: "/${ENVIRONMENT}/config"
        envFrom:
        - configMapRef:
            name: app-config

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: app-config
data:
  APP_ENV: ${ENVIRONMENT}
  LOG_LEVEL: ${LOG_LEVEL}
  METRICS_ENABLED: "true"
  TRACING_ENABLED: ${TRACING_ENABLED}

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: aws-secrets-manager
data:
  region: ${REGION}
  secrets-prefix: /${ENVIRONMENT}/secrets/
  parameters-prefix: /${ENVIRONMENT}/config/

3. Script de Setup de Secrets

bash
#!/bin/bash
# scripts/setup-secrets.sh

set -e

ENVIRONMENT=$1
REGION=$2

# Validar ambiente
if [[ ! "$ENVIRONMENT" =~ ^(dev|stg|prod)$ ]]; then
    echo "Environment must be dev, stg, or prod"
    exit 1
fi

# Obtener Account ID
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)

# Generar passwords y keys seguros
DB_PASSWORD=$(openssl rand -base64 32)
INTERNAL_API_KEY=$(openssl rand -hex 32)
EXTERNAL_API_KEY=$(openssl rand -hex 32)

# Crear archivo de variables temporales para Terraform
cat > tmp/secrets.tfvars <<EOF
environment      = "${ENVIRONMENT}"
region          = "${REGION}"
account_id      = "${ACCOUNT_ID}"
db_password     = "${DB_PASSWORD}"
internal_api_key = "${INTERNAL_API_KEY}"
external_api_key = "${EXTERNAL_API_KEY}"
EOF

# Aplicar configuración de Terraform
cd infrastructure/terraform/modules/secrets
terraform init
terraform apply -var-file="../../../../tmp/secrets.tfvars"

# Crear ServiceAccount y ConfigMap en Kubernetes
SECRETS_ROLE_ARN=$(terraform output -raw secrets_role_arn)

# Reemplazar variables en template de Kubernetes
sed -e "s|\${ENVIRONMENT}|$ENVIRONMENT|g" \
    -e "s|\${REGION}|$REGION|g" \
    -e "s|\${ACCOUNT_ID}|$ACCOUNT_ID|g" \
    -e "s|\${SECRETS_ROLE_ARN}|$SECRETS_ROLE_ARN|g" \
    ../../kubernetes/base/secrets/secrets-store.yaml | kubectl apply -f -

echo "Secrets configuration completed successfully!"

# Limpiar archivos temporales
rm -f tmp/secrets.tfvars

# Mostrar información importante
echo "Important Information:"
echo "Environment: $ENVIRONMENT"
echo "Region: $REGION"
echo "Secrets Role ARN: $SECRETS_ROLE_ARN"
echo "Remember to store the credentials securely and update them periodically"

Verificación de la Parte 2

1. Verificar Secrets Manager:

  • [ ] Secrets creados correctamente
  • [ ] Valores iniciales configurados
  • [ ] Acceso desde EKS funcionando
  • [ ] Rotación de secrets configurada

2. Verificar Parameter Store:

  • [ ] Parámetros creados por ambiente
  • [ ] Valores correctos por ambiente
  • [ ] Acceso desde aplicaciones
  • [ ] Niveles de parámetros correctos

3. Verificar Kubernetes Integration:

  • [ ] ServiceAccount creado
  • [ ] IRSA configurado
  • [ ] ConfigMaps aplicados
  • [ ] Secrets montados correctamente

Parte 3: Servicio de Alta Disponibilidad y Redis

1. Configuración de ElastiCache Redis

hcl
# infrastructure/terraform/modules/redis/main.tf

resource "aws_elasticache_subnet_group" "redis" {
  name       = "${var.environment}-redis-subnet-group"
  subnet_ids = var.private_subnet_ids

  tags = {
    Environment = var.environment
  }
}

resource "aws_elasticache_replication_group" "redis" {
  replication_group_id          = "${var.environment}-redis-cluster"
  replication_group_description = "Redis cluster for ${var.environment}"
  node_type                    = var.environment == "prod" ? "cache.r6g.large" : "cache.t3.medium"
  port                         = 6379
  parameter_group_family       = "redis6.x"
  automatic_failover_enabled   = true
  engine                       = "redis"
  engine_version              = "6.x"
  num_cache_clusters          = var.environment == "prod" ? 3 : 2
  subnet_group_name           = aws_elasticache_subnet_group.redis.name
  security_group_ids          = [aws_security_group.redis.id]
  at_rest_encryption_enabled  = true
  transit_encryption_enabled  = true
  auth_token                 = var.auth_token
  maintenance_window         = "sun:05:00-sun:09:00"
  snapshot_window           = "00:00-04:00"
  snapshot_retention_limit  = var.environment == "prod" ? 7 : 1

  tags = {
    Environment = var.environment
  }
}

resource "aws_security_group" "redis" {
  name        = "${var.environment}-redis-sg"
  description = "Security group for Redis cluster"
  vpc_id      = var.vpc_id

  ingress {
    from_port       = 6379
    to_port         = 6379
    protocol        = "tcp"
    security_groups = [var.eks_security_group_id]
  }

  tags = {
    Environment = var.environment
  }
}

# Variables
variable "environment" {
  type = string
}

variable "vpc_id" {
  type = string
}

variable "private_subnet_ids" {
  type = list(string)
}

variable "eks_security_group_id" {
  type = string
}

variable "auth_token" {
  type      = string
  sensitive = true
}

# Outputs
output "redis_endpoint" {
  value = aws_elasticache_replication_group.redis.primary_endpoint_address
}

output "redis_port" {
  value = aws_elasticache_replication_group.redis.port
}

output "redis_security_group_id" {
  value = aws_security_group.redis.id
}

2. Servicio de Alta Disponibilidad (Session Service)

typescript
// apps/session-service/src/index.ts

import express from 'express';
import { createClient } from 'redis';
import { promisify } from 'util';
import { v4 as uuidv4 } from 'uuid';
import * as winston from 'winston';
import { SecretsManager } from 'aws-sdk';

interface SessionData {
  userId: string;
  data: any;
  expiresAt: number;
}

class SessionService {
  private redisClient: any;
  private logger: winston.Logger;
  private secretsManager: SecretsManager;

  constructor() {
    this.logger = winston.createLogger({
      level: process.env.LOG_LEVEL || 'info',
      format: winston.format.json(),
      transports: [
        new winston.transports.Console()
      ]
    });

    this.secretsManager = new SecretsManager({
      region: process.env.AWS_REGION
    });
  }

  async initialize() {
    try {
      // Obtener credenciales de Redis desde Secrets Manager
      const redisSecret = await this.secretsManager.getSecretValue({
        SecretId: process.env.REDIS_SECRET_ARN as string
      }).promise();

      const redisConfig = JSON.parse(redisSecret.SecretString as string);

      // Conectar a Redis
      this.redisClient = createClient({
        url: `redis://:${redisConfig.auth_token}@${redisConfig.host}:${redisConfig.port}`,
        socket: {
          tls: true,
          rejectUnauthorized: true
        }
      });

      // Promisify Redis methods
      this.redisClient.getAsync = promisify(this.redisClient.get).bind(this.redisClient);
      this.redisClient.setAsync = promisify(this.redisClient.set).bind(this.redisClient);
      this.redisClient.delAsync = promisify(this.redisClient.del).bind(this.redisClient);

      await this.redisClient.connect();

      this.logger.info('Successfully connected to Redis');
    } catch (error) {
      this.logger.error('Failed to initialize session service', error);
      throw error;
    }
  }

  async createSession(userId: string, data: any, ttlSeconds: number = 3600): Promise<string> {
    const sessionId = uuidv4();
    const sessionData: SessionData = {
      userId,
      data,
      expiresAt: Date.now() + (ttlSeconds * 1000)
    };

    await this.redisClient.setAsync(
      `session:${sessionId}`,
      JSON.stringify(sessionData),
      'EX',
      ttlSeconds
    );

    return sessionId;
  }

  async getSession(sessionId: string): Promise<SessionData | null> {
    const data = await this.redisClient.getAsync(`session:${sessionId}`);
    if (!data) return null;

    const sessionData: SessionData = JSON.parse(data);
    if (sessionData.expiresAt < Date.now()) {
      await this.deleteSession(sessionId);
      return null;
    }

    return sessionData;
  }

  async updateSession(sessionId: string, data: any): Promise<boolean> {
    const session = await this.getSession(sessionId);
    if (!session) return false;

    session.data = { ...session.data, ...data };
    const ttl = Math.floor((session.expiresAt - Date.now()) / 1000);

    await this.redisClient.setAsync(
      `session:${sessionId}`,
      JSON.stringify(session),
      'EX',
      ttl
    );

    return true;
  }

  async deleteSession(sessionId: string): Promise<boolean> {
    const result = await this.redisClient.delAsync(`session:${sessionId}`);
    return result === 1;
  }

  async cleanup(): Promise<void> {
    if (this.redisClient) {
      await this.redisClient.quit();
    }
  }
}

// Express API
const app = express();
const sessionService = new SessionService();

app.use(express.json());

// Health check endpoint
app.get('/health', (req, res) => {
  res.json({ status: 'healthy' });
});

// Create session
app.post('/sessions', async (req, res) => {
  try {
    const { userId, data, ttl } = req.body;
    const sessionId = await sessionService.createSession(userId, data, ttl);
    res.status(201).json({ sessionId });
  } catch (error) {
    res.status(500).json({ error: 'Failed to create session' });
  }
});

// Get session
app.get('/sessions/:sessionId', async (req, res) => {
  try {
    const session = await sessionService.getSession(req.params.sessionId);
    if (!session) {
      res.status(404).json({ error: 'Session not found' });
      return;
    }
    res.json(session);
  } catch (error) {
    res.status(500).json({ error: 'Failed to get session' });
  }
});

// Update session
app.put('/sessions/:sessionId', async (req, res) => {
  try {
    const updated = await sessionService.updateSession(req.params.sessionId, req.body.data);
    if (!updated) {
      res.status(404).json({ error: 'Session not found' });
      return;
    }
    res.json({ success: true });
  } catch (error) {
    res.status(500).json({ error: 'Failed to update session' });
  }
});

// Delete session
app.delete('/sessions/:sessionId', async (req, res) => {
  try {
    const deleted = await sessionService.deleteSession(req.params.sessionId);
    if (!deleted) {
      res.status(404).json({ error: 'Session not found' });
      return;
    }
    res.status(204).send();
  } catch (error) {
    res.status(500).json({ error: 'Failed to delete session' });
  }
});

// Initialize service
async function start() {
  try {
    await sessionService.initialize();
    const port = process.env.PORT || 3000;
    app.listen(port, () => {
      console.log(`Session service listening on port ${port}`);
    });
  } catch (error) {
    console.error('Failed to start service:', error);
    process.exit(1);
  }
}

start();

process.on('SIGTERM', async () => {
  await sessionService.cleanup();
  process.exit(0);
});

3. Kubernetes Deployment para Session Service

yaml
# infrastructure/kubernetes/base/session-service/deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: session-service
  labels:
    app: session-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: session-service
  template:
    metadata:
      labels:
        app: session-service
    spec:
      serviceAccountName: secrets-access
      containers:
      - name: session-service
        image: ${ECR_REGISTRY}/session-service:${IMAGE_TAG}
        ports:
        - containerPort: 3000
        env:
        - name: PORT
          value: "3000"
        - name: NODE_ENV
          value: ${ENVIRONMENT}
        - name: AWS_REGION
          value: ${AWS_REGION}
        - name: REDIS_SECRET_ARN
          value: "arn:aws:secretsmanager:${AWS_REGION}:${ACCOUNT_ID}:secret:${ENVIRONMENT}/redis"
        resources:
          requests:
            cpu: "100m"
            memory: "256Mi"
          limits:
            cpu: "500m"
            memory: "512Mi"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 5
          periodSeconds: 5
        startupProbe:
          httpGet:
            path: /health
            port: 3000
          failureThreshold: 30
          periodSeconds: 10

---
apiVersion: v1
kind: Service
metadata:
  name: session-service
spec:
  selector:
    app: session-service
  ports:
  - port: 80
    targetPort: 3000
  type: ClusterIP

---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: session-service
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: session-service
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: session-service
spec:
  podSelector:
    matchLabels:
      app: session-service
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - podSelector:
        matchLabels:
          type: api
    ports:
    - protocol: TCP
      port: 3000
  egress:
  - to:
    - ipBlock:
        cidr: ${VPC_CIDR}
    ports:
    - protocol: TCP
      port: 6379

Verificación de la Parte 3

1. Verificar Redis:

  • [ ] Cluster creado correctamente
  • [ ] Replicación funcionando
  • [ ] Encryption configurada
  • [ ] Conectividad desde EKS

2. Verificar Session Service:

  • [ ] Deployment exitoso
  • [ ] HPA funcionando
  • [ ] Network Policies aplicadas
  • [ ] Probes correctos

3. Verificar Alta Disponibilidad:

  • [ ] Múltiples réplicas funcionando
  • [ ] Failover automático
  • [ ] Balanceo de carga correcto
  • [ ] Recuperación ante fallos

Parte 4: CI/CD Pipeline y Monitoreo

1. GitHub Actions Pipeline

yaml
# .github/workflows/ci-cd.yaml

name: CI/CD Pipeline

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main, develop ]

env:
  AWS_REGION: us-east-1
  ECR_REPOSITORY: session-service
  EKS_CLUSTER_NAME: main-cluster

jobs:
  test:
    name: Test
    runs-on: ubuntu-latest
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Setup Node.js
      uses: actions/setup-node@v3
      with:
        node-version: '18'
        cache: 'npm'
    
    - name: Install dependencies
      run: npm ci
    
    - name: Run tests
      run: npm test
    
    - name: Run linting
      run: npm run lint
    
    - name: Run security audit
      run: npm audit

  build:
    name: Build
    needs: test
    runs-on: ubuntu-latest
    if: github.event_name == 'push'
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Configure AWS credentials
      uses: aws-actions/configure-aws-credentials@v2
      with:
        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        aws-region: ${{ env.AWS_REGION }}
    
    - name: Login to Amazon ECR
      id: login-ecr
      uses: aws-actions/amazon-ecr-login@v1
    
    - name: Build and push image
      env:
        ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
        IMAGE_TAG: ${{ github.sha }}
      run: |
        docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
        docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
    
    - name: Save image info for deployment
      run: |
        echo "::set-output name=image::${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}:${{ github.sha }}"

  deploy-dev:
    name: Deploy to Dev
    needs: build
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/develop'
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Configure AWS credentials
      uses: aws-actions/configure-aws-credentials@v2
      with:
        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        aws-region: ${{ env.AWS_REGION }}
    
    - name: Update kubeconfig
      run: aws eks update-kubeconfig --name $EKS_CLUSTER_NAME-dev
    
    - name: Deploy to Dev
      env:
        IMAGE: ${{ needs.build.outputs.image }}
      run: |
        cd infrastructure/kubernetes/overlays/dev
        kustomize edit set image session-service=$IMAGE
        kustomize build | kubectl apply -f -
        kubectl rollout status deployment/session-service -n default

  deploy-prod:
    name: Deploy to Production
    needs: [build, deploy-dev]
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/main'
    environment: production
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Configure AWS credentials
      uses: aws-actions/configure-aws-credentials@v2
      with:
        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        aws-region: ${{ env.AWS_REGION }}
    
    - name: Update kubeconfig
      run: aws eks update-kubeconfig --name $EKS_CLUSTER_NAME-prod
    
    - name: Deploy to Production
      env:
        IMAGE: ${{ needs.build.outputs.image }}
      run: |
        cd infrastructure/kubernetes/overlays/prod
        kustomize edit set image session-service=$IMAGE
        kustomize build | kubectl apply -f -
        kubectl rollout status deployment/session-service -n default

2. Monitoreo con CloudWatch Container Insights

yaml
# infrastructure/kubernetes/base/monitoring/container-insights.yaml

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: cloudwatch-agent
  namespace: amazon-cloudwatch
spec:
  selector:
    matchLabels:
      name: cloudwatch-agent
  template:
    metadata:
      labels:
        name: cloudwatch-agent
    spec:
      containers:
      - name: cloudwatch-agent
        image: public.ecr.aws/cloudwatch-agent/cloudwatch-agent:latest
        resources:
          limits:
            cpu: 200m
            memory: 200Mi
          requests:
            cpu: 200m
            memory: 200Mi
        env:
          - name: CLUSTER_NAME
            value: ${CLUSTER_NAME}
          - name: HOST_IP
            valueFrom:
              fieldRef:
                fieldPath: status.hostIP
          - name: HOST_NAME
            valueFrom:
              fieldRef:
                fieldPath: spec.nodeName
          - name: K8S_NAMESPACE
            valueFrom:
              fieldRef:
                fieldPath: metadata.namespace
        volumeMounts:
          - name: rootfs
            mountPath: /rootfs
            readOnly: true
          - name: dockersock
            mountPath: /var/run/docker.sock
            readOnly: true
          - name: varlibdocker
            mountPath: /var/lib/docker
            readOnly: true
          - name: sys
            mountPath: /sys
            readOnly: true
          - name: devdisk
            mountPath: /dev/disk
            readOnly: true
      volumes:
        - name: rootfs
          hostPath:
            path: /
        - name: dockersock
          hostPath:
            path: /var/run/docker.sock
        - name: varlibdocker
          hostPath:
            path: /var/lib/docker
        - name: sys
          hostPath:
            path: /sys
        - name: devdisk
          hostPath:
            path: /dev/disk/

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: amazon-cloudwatch
data:
  prometheus.yaml: |
    global:
      scrape_interval: 1m
      scrape_timeout: 10s
    scrape_configs:
    - job_name: 'kubernetes-pods'
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        target_label: __address__

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: cwagent-config
  namespace: amazon-cloudwatch
data:
  cwagentconfig.json: |
    {
      "logs": {
        "metrics_collected": {
          "kubernetes": {
            "cluster_name": "${CLUSTER_NAME}",
            "metrics_collection_interval": 60
          }
        },
        "force_flush_interval": 5
      }
    }

3. Implementación de Alarmas y Dashboards

hcl
# infrastructure/terraform/modules/monitoring/main.tf

resource "aws_cloudwatch_dashboard" "session_service" {
  dashboard_name = "${var.environment}-session-service"

  dashboard_body = jsonencode({
    widgets = [
      {
        type = "metric"
        properties = {
          metrics = [
            ["AWS/ECS", "CPUUtilization", "ServiceName", "session-service", "ClusterName", var.cluster_name]
          ]
          period = 300
          stat   = "Average"
          region = var.region
          title  = "CPU Utilization"
        }
      },
      {
        type = "metric"
        properties = {
          metrics = [
            ["AWS/ECS", "MemoryUtilization", "ServiceName", "session-service", "ClusterName", var.cluster_name]
          ]
          period = 300
          stat   = "Average"
          region = var.region
          title  = "Memory Utilization"
        }
      },
      {
        type = "metric"
        properties = {
          metrics = [
            ["AWS/ApplicationELB", "RequestCount", "TargetGroup", var.target_group_arn]
          ]
          period = 300
          stat   = "Sum"
          region = var.region
          title  = "Request Count"
        }
      },
      {
        type = "metric"
        properties = {
          metrics = [
            ["AWS/ApplicationELB", "TargetResponseTime", "TargetGroup", var.target_group_arn]
          ]
          period = 300
          stat   = "Average"
          region = var.region
          title  = "Response Time"
        }
      }
    ]
  })
}

# CPU Utilization Alarm
resource "aws_cloudwatch_metric_alarm" "cpu_utilization_high" {
  alarm_name          = "${var.environment}-session-service-cpu-utilization-high"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name         = "CPUUtilization"
  namespace           = "AWS/ECS"
  period             = "300"
  statistic          = "Average"
  threshold          = "80"
  alarm_description  = "CPU utilization is too high"
  alarm_actions      = [var.sns_topic_arn]

  dimensions = {
    ClusterName = var.cluster_name
    ServiceName = "session-service"
  }
}

# Memory Utilization Alarm
resource "aws_cloudwatch_metric_alarm" "memory_utilization_high" {
  alarm_name          = "${var.environment}-session-service-memory-utilization-high"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name         = "MemoryUtilization"
  namespace           = "AWS/ECS"
  period             = "300"
  statistic          = "Average"
  threshold          = "80"
  alarm_description  = "Memory utilization is too high"
  alarm_actions      = [var.sns_topic_arn]

  dimensions = {
    ClusterName = var.cluster_name
    ServiceName = "session-service"
  }
}

# Redis Connection Error Alarm
resource "aws_cloudwatch_metric_alarm" "redis_connection_errors" {
  alarm_name          = "${var.environment}-session-service-redis-connection-errors"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name         = "RedisConnectionErrors"
  namespace           = "CustomMetrics/SessionService"
  period             = "300"
  statistic          = "Sum"
  threshold          = "5"
  alarm_description  = "Redis connection errors detected"
  alarm_actions      = [var.sns_topic_arn]
}

# API Error Rate Alarm
resource "aws_cloudwatch_metric_alarm" "api_error_rate" {
  alarm_name          = "${var.environment}-session-service-error-rate"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = "2"
  metric_name         = "5XXError"
  namespace           = "AWS/ApplicationELB"
  period             = "300"
  statistic          = "Sum"
  threshold          = "10"
  alarm_description  = "API error rate is too high"
  alarm_actions      = [var.sns_topic_arn]

  dimensions = {
    TargetGroup = var.target_group_arn
  }
}

# SNS Topic for Alarms
resource "aws_sns_topic" "alarms" {
  name = "${var.environment}-session-service-alarms"
}

# Variables
variable "environment" {
  type = string
}

variable "region" {
  type = string
}

variable "cluster_name" {
  type = string
}

variable "target_group_arn" {
  type = string
}

variable "sns_topic_arn" {
  type = string
}

# Outputs
output "dashboard_url" {
  value = "https://${var.region}.console.aws.amazon.com/cloudwatch/home?region=${var.region}#dashboards:name=${aws_cloudwatch_dashboard.session_service.dashboard_name}"
}

output "sns_topic_arn" {
  value = aws_sns_topic.alarms.arn
}

Verificación de la Parte 4

1. Verificar CI/CD:

  • [ ] Pipeline ejecutando correctamente
  • [ ] Tests pasando
  • [ ] Builds exitosos
  • [ ] Deployments automáticos

2. Verificar Monitoreo:

  • [ ] Container Insights activo
  • [ ] Métricas recolectadas
  • [ ] Dashboards creados
  • [ ] Alarmas configuradas

3. Verificar Notificaciones:

  • [ ] SNS Topics creados
  • [ ] Alarmas enviando alertas
  • [ ] Notificaciones llegando
  • [ ] Escalamiento funcionando

Parte 5: Pruebas de Carga y Documentación

1. Pruebas de Carga con k6

javascript
// tests/load/session-service.js

import http from 'k6/http';
import { check, group, sleep } from 'k6';
import { Rate } from 'k6/metrics';

const errorRate = new Rate('errors');

export const options = {
  stages: [
    { duration: '2m', target: 100 }, // Ramp up to 100 users
    { duration: '5m', target: 100 }, // Stay at 100 users
    { duration: '2m', target: 200 }, // Ramp up to 200 users
    { duration: '5m', target: 200 }, // Stay at 200 users
    { duration: '2m', target: 300 }, // Ramp up to 300 users
    { duration: '5m', target: 300 }, // Stay at 300 users
    { duration: '2m', target: 0 },   // Ramp down to 0 users
  ],
  thresholds: {
    http_req_duration: ['p(95)<500'], // 95% of requests must complete below 500ms
    'http_req_duration{type:createSession}': ['p(95)<600'],
    'http_req_duration{type:getSession}': ['p(95)<400'],
    errors: ['rate<0.1'], // Error rate must be less than 10%
  },
};

const BASE_URL = __ENV.API_URL || 'http://localhost:3000';

const generateTestData = () => ({
  userId: `user-${Math.random().toString(36).substr(2, 9)}`,
  data: {
    lastAccess: new Date().toISOString(),
    userAgent: 'k6-test',
    ipAddress: '192.168.1.1'
  }
});

export function setup() {
  // Create initial test sessions
  const sessions = [];
  for (let i = 0; i < 10; i++) {
    const response = http.post(`${BASE_URL}/sessions`, JSON.stringify(generateTestData()), {
      headers: { 'Content-Type': 'application/json' }
    });
    if (response.status === 201) {
      sessions.push(response.json().sessionId);
    }
  }
  return { sessions };
}

export default function(data) {
  const testData = generateTestData();

  group('session_creation', function() {
    const response = http.post(
      `${BASE_URL}/sessions`,
      JSON.stringify(testData),
      {
        headers: { 'Content-Type': 'application/json' },
        tags: { type: 'createSession' }
      }
    );

    check(response, {
      'session created successfully': (r) => r.status === 201,
      'has valid session id': (r) => r.json().sessionId !== undefined,
    }) || errorRate.add(1);

    if (response.status === 201) {
      data.sessions.push(response.json().sessionId);
    }
  });

  group('session_retrieval', function() {
    if (data.sessions.length > 0) {
      const sessionId = data.sessions[Math.floor(Math.random() * data.sessions.length)];
      const response = http.get(
        `${BASE_URL}/sessions/${sessionId}`,
        { tags: { type: 'getSession' } }
      );

      check(response, {
        'session retrieved successfully': (r) => r.status === 200,
        'has valid data': (r) => r.json().data !== undefined,
      }) || errorRate.add(1);
    }
  });

  group('session_update', function() {
    if (data.sessions.length > 0) {
      const sessionId = data.sessions[Math.floor(Math.random() * data.sessions.length)];
      const updateData = {
        data: {
          lastAccess: new Date().toISOString(),
          updated: true
        }
      };

      const response = http.put(
        `${BASE_URL}/sessions/${sessionId}`,
        JSON.stringify(updateData),
        {
          headers: { 'Content-Type': 'application/json' },
          tags: { type: 'updateSession' }
        }
      );

      check(response, {
        'session updated successfully': (r) => r.status === 200,
      }) || errorRate.add(1);
    }
  });

  sleep(1);
}

export function teardown(data) {
  // Cleanup test sessions
  for (const sessionId of data.sessions) {
    http.del(`${BASE_URL}/sessions/${sessionId}`);
  }
}

2. Script de Pruebas de Carga Automatizado

bash
#!/bin/bash
# scripts/run-load-tests.sh

set -e

ENVIRONMENT=$1
REGION=$2
TEST_DURATION=${3:-"30m"}

# Validar parámetros
if [[ ! "$ENVIRONMENT" =~ ^(dev|stg|prod)$ ]]; then
    echo "Environment must be dev, stg, or prod"
    exit 1
fi

# Obtener URL del servicio
SERVICE_URL=$(aws ssm get-parameter \
    --name "/${ENVIRONMENT}/session-service/url" \
    --query "Parameter.Value" \
    --output text)

# Configurar métricas de CloudWatch
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
TEST_ID="load-test-${TIMESTAMP}"

# Crear directorio para resultados
mkdir -p test-results/$TEST_ID

# Ejecutar pruebas de carga
echo "Starting load test against $SERVICE_URL"
k6 run \
    -o json=test-results/$TEST_ID/raw.json \
    -o cloud \
    --tag environment=$ENVIRONMENT \
    --tag test_id=$TEST_ID \
    tests/load/session-service.js

# Procesar resultados
echo "Processing test results..."
python3 scripts/process_test_results.py \
    --input test-results/$TEST_ID/raw.json \
    --output test-results/$TEST_ID/summary.json

# Publicar métricas en CloudWatch
aws cloudwatch put-metric-data \
    --namespace "LoadTest/SessionService" \
    --metric-data \
    file://test-results/$TEST_ID/cloudwatch-metrics.json

# Generar reporte
echo "Generating test report..."
python3 scripts/generate_report.py \
    --input test-results/$TEST_ID/summary.json \
    --template templates/load-test-report.md \
    --output test-results/$TEST_ID/report.md

# Notificar resultados
if [ -f test-results/$TEST_ID/summary.json ]; then
    # Obtener SNS Topic ARN
    SNS_TOPIC_ARN=$(aws ssm get-parameter \
        --name "/${ENVIRONMENT}/monitoring/sns-topic" \
        --query "Parameter.Value" \
        --output text)

    # Enviar notificación
    aws sns publish \
        --topic-arn $SNS_TOPIC_ARN \
        --subject "Load Test Results - $ENVIRONMENT - $TIMESTAMP" \
        --message file://test-results/$TEST_ID/summary.json
fi

# Almacenar resultados en S3
aws s3 cp \
    test-results/$TEST_ID \
    s3://load-test-results-${ENVIRONMENT}/session-service/$TEST_ID \
    --recursive

echo "Load test completed. Results available in:"
echo "- Local: test-results/$TEST_ID"
echo "- S3: s3://load-test-results-${ENVIRONMENT}/session-service/$TEST_ID"

3. Documentación Técnica

Session Service - Technical Documentation

Overview

The Session Service is a high-availability solution for managing user sessions across microservices. It provides a distributed session storage system using Redis as the backend and ensures high availability through multiple redundancy mechanisms.

Architecture

Components

  1. Session Service API

    • Node.js/Express application
    • Handles session CRUD operations
    • Implements distributed locking
    • Provides health checks and monitoring
  2. Redis Cluster

    • ElastiCache for Redis
    • Multi-AZ deployment
    • Automatic failover
    • Encryption at rest and in transit
  3. Load Balancer

    • AWS Application Load Balancer
    • Health checks
    • SSL termination
    • Request distribution

High Availability Features

  1. Service Redundancy

    • Multiple pods across availability zones
    • Automatic pod scaling (HPA)
    • Pod anti-affinity rules
    • Rolling updates
  2. Data Redundancy

    • Redis replication
    • Automatic backups
    • Point-in-time recovery
    • Cross-region replication (optional)
  3. Network Resilience

    • Retry mechanisms
    • Circuit breakers
    • Timeout policies
    • Connection pooling

Deployment

Prerequisites

  1. AWS Account with appropriate permissions
  2. EKS cluster
  3. Redis cluster
  4. Required IAM roles and policies

Environment Setup

bash
# Setup infrastructure
./scripts/setup-eks.sh <environment> <region>

# Setup Redis
./scripts/setup-redis.sh <environment> <region>

# Setup monitoring
./scripts/setup-monitoring.sh <environment> <region>

Deployment Process

  1. Infrastructure Deployment

    bash
    cd infrastructure/terraform
    terraform init
    terraform apply -var-file=environments/<env>.tfvars
  2. Application Deployment

    bash
    # Deploy application
    kubectl apply -k kubernetes/overlays/<environment>
    
    # Verify deployment
    kubectl get pods -l app=session-service

Configuration

Environment Variables

VariableDescriptionDefaultRequired
PORTService port3000No
NODE_ENVEnvironmentdevelopmentNo
REDIS_SECRET_ARNRedis credentials secret ARN-Yes
AWS_REGIONAWS Region-Yes

Redis Configuration

  • Connection Pool

    • Min: 5
    • Max: 20
    • Idle timeout: 30s
  • Timeouts

    • Connect: 1s
    • Operation: 2s
    • Idle: 60s

Security

  1. Network Security

    • VPC isolation
    • Security groups
    • Network policies
    • TLS enforcement
  2. Authentication & Authorization

    • IAM roles
    • Service accounts
    • Secret management
    • Token validation
  3. Encryption

    • TLS in transit
    • At rest encryption
    • Key rotation
    • Certificate management

Monitoring

Metrics

  1. Service Metrics

    • Request rate
    • Error rate
    • Latency
    • Success rate
  2. Redis Metrics

    • Connection count
    • Memory usage
    • Cache hits/misses
    • Replication lag
  3. Custom Metrics

    • Session count
    • Session creation rate
    • Session expiration rate
    • Cache efficiency

Alerts

  1. Critical Alerts

    • High error rate (>1%)
    • High latency (p95 > 500ms)
    • Redis connection failures
    • Pod crashes
  2. Warning Alerts

    • Increased latency (p95 > 200ms)
    • High memory usage (>80%)
    • Cache miss rate (>20%)
    • Replica lag (>10s)

Troubleshooting

Common Issues

  1. Connection Issues

    • Check security groups
    • Verify network policies
    • Validate credentials
    • Check Redis status
  2. Performance Issues

    • Monitor Redis metrics
    • Check pod resources
    • Verify connection pools
    • Analyze slow queries
  3. Scaling Issues

    • Check HPA status
    • Verify node resources
    • Monitor pod metrics
    • Check cluster autoscaler

Logging

Logs are centralized in CloudWatch Logs:

bash
# View service logs
aws logs get-log-events \
    --log-group-name "/aws/containerinsights/{cluster}/application" \
    --log-stream-name "session-service-{pod}"

# View Redis logs
aws logs get-log-events \
    --log-group-name "/aws/elasticache/{cluster}"

Performance

Capacity Planning

  1. Pod Resources

    • CPU: 100m-500m
    • Memory: 256Mi-512Mi
    • Storage: N/A (stateless)
  2. Redis Sizing

    • Instance type: cache.r6g.large
    • Memory: 16GB
    • Connections: 1000
    • IOPS: 10000

Scaling Guidelines

  1. Horizontal Scaling

    • Min pods: 3
    • Max pods: 10
    • Scale up: CPU > 70%
    • Scale down: CPU < 50%
  2. Redis Scaling

    • Monitor memory usage
    • Monitor connection count
    • Monitor IOPS
    • Consider read replicas

Best Practices

  1. Development

    • Use feature flags
    • Follow GitOps
    • Implement CI/CD
    • Maintain test coverage
  2. Operations

    • Regular backups
    • Disaster recovery tests
    • Security scanning
    • Performance testing
  3. Monitoring

    • Use tracing
    • Implement logging
    • Monitor metrics
    • Set up alerts

Support

  • Documentation: /docs
  • Runbooks: /runbooks
  • Issue tracker: JIRA
  • Chat: Slack #session-service

Verificación Final del Sistema

1. Verificar Pruebas de Carga:

  • [ ] Scripts k6 ejecutando correctamente
  • [ ] Métricas siendo recolectadas
  • [ ] Reportes generándose
  • [ ] Notificaciones enviadas

2. Verificar Documentación:

  • [ ] Documentación técnica completa
  • [ ] Diagramas actualizados
  • [ ] Procedimientos documentados
  • [ ] Troubleshooting guide disponible

3. Verificar Sistema Completo:

  • [ ] Alta disponibilidad funcionando
  • [ ] Performance según requerimientos
  • [ ] Monitoreo activo
  • [ ] CI/CD operativo

Puntos Clave del Sistema

  1. Alta Disponibilidad

    • Multi-AZ deployment
    • Automatic failover
    • Load balancing
    • Auto-scaling
  2. Seguridad

    • Encryption en reposo y tránsito
    • IAM roles y políticas
    • Network policies
    • Secrets management
  3. Monitoreo

    • CloudWatch metrics
    • Container insights
    • Custom dashboards
    • Alerting
  4. DevOps

    • CI/CD pipeline
    • Infrastructure as Code
    • Automated testing
    • Documentation

Parte 2: Configuración de CI/CD

1. CodeCommit Repository

hcl
# infrastructure/terraform/cicd/codecommit.tf

resource "aws_codecommit_repository" "pedidos_app" {
  repository_name = "${var.environment}-pedidos-app"
  description     = "Repositorio para la aplicación de pedidos - ${var.environment}"

  tags = {
    Environment = var.environment
  }
}

# Trigger para notificar cambios
resource "aws_cloudwatch_event_rule" "codecommit_changes" {
  name        = "${var.environment}-codecommit-changes"
  description = "Detectar cambios en CodeCommit"

  event_pattern = jsonencode({
    source      = ["aws.codecommit"]
    detail-type = ["CodeCommit Repository State Change"]
    resources   = [aws_codecommit_repository.pedidos_app.arn]
    detail = {
      referenceType = ["branch"]
      referenceName = [var.environment]
    }
  })
}

resource "aws_cloudwatch_event_target" "start_pipeline" {
  rule      = aws_cloudwatch_event_rule.codecommit_changes.name
  target_id = "StartPipeline"
  arn       = aws_codepipeline.main_pipeline.arn
  role_arn  = aws_iam_role.events_role.arn
}

# IAM Role para eventos
resource "aws_iam_role" "events_role" {
  name = "${var.environment}-events-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "events.amazonaws.com"
        }
      }
    ]
  })
}

# Política para iniciar el pipeline
resource "aws_iam_role_policy" "start_pipeline" {
  name = "start-pipeline"
  role = aws_iam_role.events_role.id

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "codepipeline:StartPipelineExecution"
        ]
        Resource = aws_codepipeline.main_pipeline.arn
      }
    ]
  })
}

2. CodeBuild Project

hcl
# infrastructure/terraform/cicd/codebuild.tf

resource "aws_codebuild_project" "build_project" {
  name          = "${var.environment}-pedidos-build"
  description   = "Proyecto de construcción para la aplicación de pedidos"
  build_timeout = "30"
  service_role  = aws_iam_role.codebuild_role.arn

  source {
    type      = "CODEPIPELINE"
    buildspec = "buildspec.yml"
  }

  environment {
    compute_type                = "BUILD_GENERAL1_SMALL"
    image                      = "aws/codebuild/amazonlinux2-x86_64-standard:4.0"
    type                       = "LINUX_CONTAINER"
    image_pull_credentials_type = "CODEBUILD"
    privileged_mode            = true

    environment_variable {
      name  = "ENVIRONMENT"
      value = var.environment
    }

    environment_variable {
      name  = "AWS_REGION"
      value = var.region
    }

    environment_variable {
      name  = "ECR_REPOSITORY_URL"
      value = aws_ecr_repository.pedidos_app.repository_url
    }
  }

  artifacts {
    type = "CODEPIPELINE"
  }

  cache {
    type  = "LOCAL"
    modes = ["LOCAL_DOCKER_LAYER_CACHE", "LOCAL_SOURCE_CACHE"]
  }

  logs_config {
    cloudwatch_logs {
      group_name  = "/aws/codebuild/${var.environment}-pedidos-build"
      stream_name = "build-log"
    }
  }

  vpc_config {
    vpc_id             = var.vpc_id
    subnets           = var.private_subnet_ids
    security_group_ids = [aws_security_group.codebuild.id]
  }

  tags = {
    Environment = var.environment
  }
}

# Security Group para CodeBuild
resource "aws_security_group" "codebuild" {
  name        = "${var.environment}-codebuild-sg"
  description = "Security group para CodeBuild"
  vpc_id      = var.vpc_id

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Environment = var.environment
  }
}

# IAM Role para CodeBuild
resource "aws_iam_role" "codebuild_role" {
  name = "${var.environment}-codebuild-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "codebuild.amazonaws.com"
        }
      }
    ]
  })
}

# Políticas para CodeBuild
resource "aws_iam_role_policy" "codebuild_policy" {
  name = "${var.environment}-codebuild-policy"
  role = aws_iam_role.codebuild_role.id

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Resource = ["*"]
        Action = [
          "logs:CreateLogGroup",
          "logs:CreateLogStream",
          "logs:PutLogEvents",
          "ecr:GetAuthorizationToken",
          "ecr:BatchCheckLayerAvailability",
          "ecr:GetDownloadUrlForLayer",
          "ecr:GetRepositoryPolicy",
          "ecr:DescribeRepositories",
          "ecr:ListImages",
          "ecr:BatchGetImage",
          "ecr:InitiateLayerUpload",
          "ecr:UploadLayerPart",
          "ecr:CompleteLayerUpload",
          "ecr:PutImage",
          "ssm:GetParameter",
          "ssm:GetParameters",
          "ssm:GetParametersByPath"
        ]
      }
    ]
  })
}

3. BuildSpec Configuration

yaml
# buildspec.yml

version: 0.2

phases:
  install:
    runtime-versions:
      nodejs: 18
      docker: 20
    commands:
      - npm install -g npm@latest
      - npm install -g typescript
      - npm install -g jest

  pre_build:
    commands:
      # Obtener variables de entorno
      - export APP_CONFIG=$(aws ssm get-parameter --name "/${ENVIRONMENT}/app/common" --query "Parameter.Value" --output text)
      - export COMMIT_HASH=$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | cut -c 1-7)
      - export IMAGE_TAG=${ENVIRONMENT}-${COMMIT_HASH}
      
      # Login a ECR
      - aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ECR_REPOSITORY_URL}
      
      # Instalar dependencias
      - cd apps/pedidos-api
      - npm ci
      
      # Ejecutar pruebas
      - npm run test
      - npm run lint
      
      # Análisis de seguridad
      - npm audit
      - npm run security-scan

  build:
    commands:
      # Construir imágenes
      - docker build -t ${ECR_REPOSITORY_URL}:${IMAGE_TAG} .
      - docker tag ${ECR_REPOSITORY_URL}:${IMAGE_TAG} ${ECR_REPOSITORY_URL}:latest
      
      # Generar manifiestos de Kubernetes
      - cd ../../infrastructure/kubernetes/overlays/${ENVIRONMENT}
      - kustomize edit set image pedidos-api=${ECR_REPOSITORY_URL}:${IMAGE_TAG}
      - kustomize build > ../../k8s-manifests.yaml

  post_build:
    commands:
      # Publicar imágenes
      - docker push ${ECR_REPOSITORY_URL}:${IMAGE_TAG}
      - docker push ${ECR_REPOSITORY_URL}:latest
      
      # Actualizar parámetros de despliegue
      - aws ssm put-parameter --name "/${ENVIRONMENT}/deploy/image-tag" --value "${IMAGE_TAG}" --type "String" --overwrite
      
      # Generar reporte de pruebas
      - cd ../../../
      - npm run generate-test-report
      
artifacts:
  files:
    - infrastructure/k8s-manifests.yaml
    - scripts/deploy.sh
    - test-reports/**/*
  discard-paths: no

cache:
  paths:
    - 'node_modules/**/*'
    - '/root/.npm/**/*'

4. Script de Configuración de CI/CD

bash
#!/bin/bash
# scripts/setup-cicd.sh

set -e

ENVIRONMENT=$1
REGION=$2
REPOSITORY_NAME="pedidos-app"

# Validar parámetros
if [[ ! "$ENVIRONMENT" =~ ^(dev|stg|prod)$ ]]; then
    echo "El ambiente debe ser dev, stg o prod"
    exit 1
fi

echo "Configurando CI/CD para ambiente $ENVIRONMENT..."

# Crear archivo de configuración temporal
cat > /tmp/cicd-config.json <<EOF
{
  "environment": "${ENVIRONMENT}",
  "region": "${REGION}",
  "repository_name": "${REPOSITORY_NAME}",
  "branch_name": "${ENVIRONMENT}"
}
EOF

# Aplicar configuración de Terraform
cd infrastructure/terraform/cicd
terraform init
terraform apply -var-file=/tmp/cicd-config.json

# Configurar CodeCommit
REPO_URL=$(terraform output -raw repository_url)
git remote add aws $REPO_URL

# Configurar credenciales de Git
git config --local credential.helper '!aws codecommit credential-helper $@'
git config --local credential.UseHttpPath true

# Crear y configurar rama del ambiente
git checkout -b $ENVIRONMENT
git push aws $ENVIRONMENT

# Configurar triggers de webhook
aws codecommit create-trigger \
    --repository-name $REPOSITORY_NAME \
    --trigger-name webhook-trigger \
    --target-pipeline $(terraform output -raw pipeline_name)

echo "Configuración de CI/CD completada exitosamente"
echo "URL del repositorio: $REPO_URL"
echo "Pipeline configurado para rama: $ENVIRONMENT"

# Limpiar archivos temporales
rm /tmp/cicd-config.json

Verificación Parte 2:

1. Verificar CodeCommit:

  • [ ] Repositorio creado
  • [ ] Ramas configuradas
  • [ ] Triggers configurados
  • [ ] Acceso configurado

2. Verificar CodeBuild:

  • [ ] Proyecto creado
  • [ ] BuildSpec funcionando
  • [ ] Variables de entorno disponibles
  • [ ] Caché configurado

3. Verificar IAM:

  • [ ] Roles creados
  • [ ] Políticas asignadas
  • [ ] Permisos mínimos necesarios
  • [ ] Acceso entre servicios