Diferències
Ací es mostren les diferències entre la revisió seleccionada i la versió actual de la pàgina.
| Ambdós costats versió prèvia Revisió prèvia Següent revisió | Revisió prèvia | ||
| info:cursos:pue:devops:sesion7 [08/03/2019 10:03] – [otros] mate | info:cursos:pue:devops:sesion7 [04/06/2019 08:36] (actual) – [prometheus] mate | ||
|---|---|---|---|
| Línia 48: | Línia 48: | ||
| * [[https:// | * [[https:// | ||
| + | == prometheus | ||
| + | <code yaml; swarm-prometheus.yml> | ||
| + | version: " | ||
| + | |||
| + | networks: | ||
| + | net: | ||
| + | driver: " | ||
| + | proxy: | ||
| + | external: true | ||
| + | |||
| + | volumes: | ||
| + | prometheus: | ||
| + | driver_opts: | ||
| + | type: " | ||
| + | o: " | ||
| + | device: ":/ | ||
| + | | ||
| + | grafana: | ||
| + | driver_opts: | ||
| + | type: " | ||
| + | o: " | ||
| + | device: ":/ | ||
| + | | ||
| + | alertmanager: | ||
| + | driver_opts: | ||
| + | type: " | ||
| + | o: " | ||
| + | device: ":/ | ||
| + | |||
| + | configs: | ||
| + | # dockerd_config: | ||
| + | # file: / | ||
| + | node_rules: | ||
| + | file: / | ||
| + | task_rules: | ||
| + | file: / | ||
| + | |||
| + | services: | ||
| + | # dockerd-exporter: | ||
| + | # image: stefanprodan/ | ||
| + | # networks: | ||
| + | # - net | ||
| + | # environment: | ||
| + | # - DOCKER_GWBRIDGE_IP=172.18.0.1 | ||
| + | # configs: | ||
| + | # - source: dockerd_config | ||
| + | # target: / | ||
| + | # deploy: | ||
| + | # mode: global | ||
| + | # resources: | ||
| + | # limits: | ||
| + | # memory: 128M | ||
| + | # reservations: | ||
| + | # memory: 64M | ||
| + | |||
| + | cadvisor: | ||
| + | image: google/ | ||
| + | networks: | ||
| + | - net | ||
| + | command: -logtostderr -docker_only | ||
| + | volumes: | ||
| + | - / | ||
| + | - /:/ | ||
| + | - / | ||
| + | - / | ||
| + | - / | ||
| + | deploy: | ||
| + | mode: global | ||
| + | resources: | ||
| + | limits: | ||
| + | memory: 128M | ||
| + | reservations: | ||
| + | memory: 64M | ||
| + | |||
| + | grafana: | ||
| + | image: stefanprodan/ | ||
| + | networks: | ||
| + | - net | ||
| + | environment: | ||
| + | - GF_SECURITY_ADMIN_USER=${ADMIN_USER: | ||
| + | - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD: | ||
| + | - GF_USERS_ALLOW_SIGN_UP=false | ||
| + | #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL: | ||
| + | #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED: | ||
| + | #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS: | ||
| + | #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME: | ||
| + | #- GF_SMTP_HOST=${GF_SMTP_HOST: | ||
| + | #- GF_SMTP_USER=${GF_SMTP_USER} | ||
| + | #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} | ||
| + | volumes: | ||
| + | - grafana:/ | ||
| + | deploy: | ||
| + | mode: replicated | ||
| + | replicas: 1 | ||
| + | placement: | ||
| + | constraints: | ||
| + | - node.role == manager | ||
| + | resources: | ||
| + | limits: | ||
| + | memory: 128M | ||
| + | reservations: | ||
| + | memory: 64M | ||
| + | labels: | ||
| + | - traefik.frontend.rule=Host: | ||
| + | - traefik.port=3000 | ||
| + | - traefik.docker.network=proxy | ||
| + | networks: | ||
| + | - default | ||
| + | - net | ||
| + | - proxy | ||
| + | |||
| + | alertmanager: | ||
| + | image: stefanprodan/ | ||
| + | networks: | ||
| + | - net | ||
| + | environment: | ||
| + | - SLACK_URL=${SLACK_URL: | ||
| + | - SLACK_CHANNEL=${SLACK_CHANNEL: | ||
| + | - SLACK_USER=${SLACK_USER: | ||
| + | command: | ||
| + | - ' | ||
| + | - ' | ||
| + | volumes: | ||
| + | - alertmanager:/ | ||
| + | deploy: | ||
| + | mode: replicated | ||
| + | replicas: 1 | ||
| + | placement: | ||
| + | constraints: | ||
| + | - node.role == manager | ||
| + | resources: | ||
| + | limits: | ||
| + | memory: 128M | ||
| + | reservations: | ||
| + | memory: 64M | ||
| + | labels: | ||
| + | - traefik.frontend.rule=Host: | ||
| + | - traefik.port=9093 | ||
| + | - traefik.docker.network=proxy | ||
| + | - traefik.frontend.auth.basic.users=${ADMIN_USER}: | ||
| + | networks: | ||
| + | - default | ||
| + | - net | ||
| + | - proxy | ||
| + | |||
| + | unsee: | ||
| + | image: cloudflare/ | ||
| + | networks: | ||
| + | - net | ||
| + | environment: | ||
| + | - " | ||
| + | deploy: | ||
| + | mode: replicated | ||
| + | replicas: 1 | ||
| + | labels: | ||
| + | - traefik.frontend.rule=Host: | ||
| + | - traefik.enable=true | ||
| + | - traefik.port=8080 | ||
| + | - traefik.tags=${TRAEFIK_PUBLIC_TAG: | ||
| + | - traefik.docker.network=proxy | ||
| + | # Traefik service that listens to HTTP | ||
| + | - traefik.redirectorservice.frontend.entryPoints=http | ||
| + | - traefik.redirectorservice.frontend.redirect.entryPoint=https | ||
| + | # Traefik service that listens to HTTPS | ||
| + | - traefik.webservice.frontend.entryPoints=https | ||
| + | - traefik.frontend.auth.basic.users=admin: | ||
| + | networks: | ||
| + | - default | ||
| + | - net | ||
| + | - proxy | ||
| + | |||
| + | node-exporter: | ||
| + | image: stefanprodan/ | ||
| + | networks: | ||
| + | - net | ||
| + | environment: | ||
| + | - NODE_ID={{.Node.ID}} | ||
| + | volumes: | ||
| + | - / | ||
| + | - / | ||
| + | - /:/ | ||
| + | - / | ||
| + | command: | ||
| + | - ' | ||
| + | - ' | ||
| + | - ' | ||
| + | - ' | ||
| + | - ' | ||
| + | deploy: | ||
| + | mode: global | ||
| + | resources: | ||
| + | limits: | ||
| + | memory: 128M | ||
| + | reservations: | ||
| + | memory: 64M | ||
| + | |||
| + | prometheus: | ||
| + | image: stefanprodan/ | ||
| + | networks: | ||
| + | - net | ||
| + | command: | ||
| + | - ' | ||
| + | - ' | ||
| + | - ' | ||
| + | volumes: | ||
| + | - prometheus:/ | ||
| + | configs: | ||
| + | - source: node_rules | ||
| + | target: / | ||
| + | - source: task_rules | ||
| + | target: / | ||
| + | deploy: | ||
| + | mode: replicated | ||
| + | replicas: 1 | ||
| + | placement: | ||
| + | constraints: | ||
| + | - node.role == manager | ||
| + | resources: | ||
| + | limits: | ||
| + | memory: 2048M | ||
| + | reservations: | ||
| + | memory: 128M | ||
| + | labels: | ||
| + | - traefik.frontend.rule=Host: | ||
| + | # - traefik.enable=true | ||
| + | - traefik.port=9090 | ||
| + | - traefik.tags=traefik-public | ||
| + | - traefik.docker.network=proxy | ||
| + | # Traefik service that listens to HTTP | ||
| + | # - traefik.redirectorservice.frontend.entryPoints=http | ||
| + | # - traefik.redirectorservice.frontend.redirect.entryPoint=https | ||
| + | # - Traefik service that listens to HTTPS | ||
| + | # - traefik.webservice.frontend.entryPoints=https | ||
| + | # - traefik.frontend.auth.basic.users=admin: | ||
| + | |||
| + | networks: | ||
| + | - default | ||
| + | - net | ||
| + | - proxy | ||
| + | </ | ||
| + | |||
| + | <code yaml; swarm_node.rules.yaml> | ||
| + | groups: | ||
| + | - name: / | ||
| + | rules: | ||
| + | - alert: node_cpu_usage | ||
| + | expr: 100 - (avg(irate(node_cpu_seconds_total{mode=" | ||
| + | node_meta * 100) BY (node_name)) > 50 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: warning | ||
| + | annotations: | ||
| + | description: | ||
| + | $value}}%. | ||
| + | summary: CPU alert for Swarm node '{{ $labels.node_name }}' | ||
| + | - alert: node_memory_usage | ||
| + | expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) | ||
| + | * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: warning | ||
| + | annotations: | ||
| + | description: | ||
| + | $value}}%. | ||
| + | summary: Memory alert for Swarm node '{{ $labels.node_name }}' | ||
| + | - alert: node_disk_usage | ||
| + | expr: ((node_filesystem_size_bytes{mountpoint="/ | ||
| + | * 100 / node_filesystem_size_bytes{mountpoint="/ | ||
| + | node_meta > 85 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: warning | ||
| + | annotations: | ||
| + | description: | ||
| + | $value}}%. | ||
| + | summary: Disk alert for Swarm node '{{ $labels.node_name }}' | ||
| + | - alert: node_disk_fill_rate_6h | ||
| + | expr: predict_linear(node_filesystem_free_bytes{mountpoint="/ | ||
| + | GROUP_LEFT(node_name) node_meta < 0 | ||
| + | for: 1h | ||
| + | labels: | ||
| + | severity: critical | ||
| + | annotations: | ||
| + | description: | ||
| + | 6h. | ||
| + | summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' | ||
| + | |||
| + | </ | ||
| + | <code yaml; swarm_tasks.rules.yaml> | ||
| + | groups: | ||
| + | - name: / | ||
| + | rules: | ||
| + | - alert: task_high_cpu_usage_50 | ||
| + | expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~" | ||
| + | BY (container_label_com_docker_swarm_task_name, | ||
| + | * 100 > 50 | ||
| + | for: 1m | ||
| + | annotations: | ||
| + | description: | ||
| + | $labels.container_label_com_docker_swarm_node_id }}'' | ||
| + | $value}}%.' | ||
| + | summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name | ||
| + | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' | ||
| + | - alert: task_high_memory_usage_1g | ||
| + | expr: sum(container_memory_rss{container_label_com_docker_swarm_task_name=~" | ||
| + | BY (container_label_com_docker_swarm_task_name, | ||
| + | for: 1m | ||
| + | annotations: | ||
| + | description: | ||
| + | $labels.container_label_com_docker_swarm_node_id }}'' | ||
| + | $value}}.' | ||
| + | summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name | ||
| + | }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' | ||
| + | </ | ||
| == otros | == otros | ||
| * [[http:// | * [[http:// | ||