gpt4 book ai didi

使用docker部署grafana+prometheus配置

转载 作者:qq735679552 更新时间:2022-09-28 22:32:09 32 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章使用docker部署grafana+prometheus配置由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

docker-compose-monitor.yml 。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
version: '2'
 
networks:
   monitor:
     driver: bridge
 
services:
   influxdb:
     image: influxdb:latest
     container_name: tig-influxdb
     ports:
       - "18083:8083"
       - "18086:8086"
       - "18090:8090"
     env_file:
       - 'env.influxdb'
     volumes:
       # Data persistency
       # sudo mkdir -p ./influxdb/data
       - ./influxdb/data:/var/lib/influxdb
       # 配置docker里的时间为东八区时间
       - ./timezone:/etc/timezone:ro
       - ./localtime:/etc/localtime:ro
     restart: unless-stopped #停止后自动
 
   telegraf:
     image: telegraf:latest
     container_name: tig-telegraf
     links:
       - influxdb
     volumes:
       - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
       - ./timezone:/etc/timezone:ro
       - ./localtime:/etc/localtime:ro
     restart: unless-stopped
   prometheus:
     image: prom/prometheus
     container_name: prometheus
     hostname: prometheus
     restart: always
     volumes:
       - /home/qa/docker/grafana/prometheus.yml:/etc/prometheus/prometheus.yml
       - /home/qa/docker/grafana/node_down.yml:/etc/prometheus/node_down.yml
     ports:
       - '9090:9090'
     networks:
       - monitor
 
   alertmanager:
     image: prom/alertmanager
     container_name: alertmanager
     hostname: alertmanager
     restart: always
     volumes:
       - /home/qa/docker/grafana/alertmanager.yml:/etc/alertmanager/alertmanager.yml
     ports:
       - '9093:9093'
     networks:
       - monitor
 
   grafana:
     image: grafana/grafana:6.7.4
     container_name: grafana
     hostname: grafana
     restart: always
     ports:
       - '13000:3000'
     networks:
       - monitor
 
   node-exporter:
     image: quay.io/prometheus/node-exporter
     container_name: node-exporter
     hostname: node-exporter
     restart: always
     ports:
       - '9100:9100'
     networks:
       - monitor
 
   cadvisor:
     image: google/cadvisor:latest
     container_name: cadvisor
     hostname: cadvisor
     restart: always
     volumes:
       - /:/rootfs:ro
       - /var/run:/var/run:rw
       - /sys:/sys:ro
       - /var/lib/docker/:/var/lib/docker:ro
     ports:
       - '18080:8080'
     networks:
       - monitor

alertmanager.yml 。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
global:
   resolve_timeout: 5m
   smtp_from: '邮箱'
   smtp_smarthost: 'smtp.exmail.qq.com:25'
   smtp_auth_username: '邮箱'
   smtp_auth_password: '密码'
   smtp_require_tls: false
   smtp_hello: 'qq.com'
route:
   group_by: ['alertname']
   group_wait: 5s
   group_interval: 5s
   repeat_interval: 5m
   receiver: 'email'
receivers:
- name: 'email'
   email_configs:
   - to: '收件邮箱'
     send_resolved: true
inhibit_rules:
   - source_match:
       severity: 'critical'
     target_match:
       severity: 'warning'
     equal: ['alertname', 'dev', 'instance']

prometheus.yml 。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
global:
   scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
   evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
   # scrape_timeout is set to the global default (10s).
 
# Alertmanager configuration
alerting:
   alertmanagers:
   - static_configs:
     - targets: ['192.168.32.117:9093']
       # - alertmanager:9093
 
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
   - "node_down.yml"
   # - "node-exporter-alert-rules.yml"
   # - "first_rules.yml"
   # - "second_rules.yml"
 
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
   # IO存储节点组
   - job_name: 'io'
     scrape_interval: 8s
     static_configs:     #端口为node-exporter启动的端口 
       - targets: ['192.168.32.117:9100']
       - targets: ['192.168.32.196:9100']
       - targets: ['192.168.32.136:9100']
       - targets: ['192.168.32.193:9100']
       - targets: ['192.168.32.153:9100']
       - targets: ['192.168.32.185:9100']
       - targets: ['192.168.32.190:19100']
       - targets: ['192.168.32.192:9100']
 
   # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
   - job_name: 'cadvisor'
     static_configs:     #端口为cadvisor启动的端口
       - targets: ['192.168.32.117:18080']
       - targets: ['192.168.32.193:8080']
       - targets: ['192.168.32.153:8080']
       - targets: ['192.168.32.185:8080']
       - targets: ['192.168.32.190:18080']
       - targets: ['192.168.32.192:18080']

node_down.yml 。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
groups:
   - name: node_down
     rules:
       - alert: InstanceDown
         expr: up == 0
         for: 1m
         labels:
           user: test
         annotations:
           summary: 'Instance {{ $labels.instance }} down'
           description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.'
 
         #剩余内存小于10%
       - alert: 剩余内存小于10%
         expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
         for: 2m
         labels:
           severity: warning
         annotations:
           summary: Host out of memory (instance {{ $labels.instance }})
           description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
         #剩余磁盘小于10%
       - alert: 剩余磁盘小于10%
         expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
         for: 2m
         labels:
           severity: warning
         annotations:
           summary: Host out of disk space (instance {{ $labels.instance }})
           description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
         #cpu负载 > 80%
       - alert: CPU负载 > 80%
         expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
         for: 0m
         labels:
           severity: warning
         annotations:
           summary: Host high CPU load (instance {{ $labels.instance }})
           description: "CPU load is > 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

告警:https://awesome-prometheus-alerts.grep.to/rules#prometheus-self-monitoring 。

官网仪表盘:https://grafana.com/grafana/dashboards/ 。

到此这篇关于docker部署grafana+prometheus配置的文章就介绍到这了,更多相关docker部署grafana+prometheus内容请搜索我以前的文章或继续浏览下面的相关文章希望大家以后多多支持我! 。

原文链接:https://www.cnblogs.com/runzhao/p/15716274.html 。

最后此篇关于使用docker部署grafana+prometheus配置的文章就讲到这里了,如果你想了解更多关于使用docker部署grafana+prometheus配置的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

32 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com