monitoring - increase scrapeInterval to 120s, better selectors on Agent and memory decrease of agent from 360MB to 228MB
All checks were successful
Monitoring services CI/CD Pipeline / deploy (push) Successful in 18s
All checks were successful
Monitoring services CI/CD Pipeline / deploy (push) Successful in 18s
This commit is contained in:
@@ -29,6 +29,16 @@ helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-sta
|
|||||||
|
|
||||||
## deploy prometheus agent
|
## deploy prometheus agent
|
||||||
|
|
||||||
|
**requirements:**
|
||||||
|
|
||||||
|
On namespaces running prometheus monitors add label `prometheus-monitoring=enabled`
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
kubectl label namespace monitoring prometheus-monitoring=enabled --overwrite
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
kubectl apply -f ./prometheus-agent.yaml
|
kubectl apply -f ./prometheus-agent.yaml
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ kube-state-metrics: # ok tested!
|
|||||||
role: worker-node
|
role: worker-node
|
||||||
prometheus:
|
prometheus:
|
||||||
monitor:
|
monitor:
|
||||||
|
interval: "60s"
|
||||||
relabelings:
|
relabelings:
|
||||||
- targetLabel: cluster
|
- targetLabel: cluster
|
||||||
replacement: casa
|
replacement: casa
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ prometheus-node-exporter:
|
|||||||
prometheus:
|
prometheus:
|
||||||
monitor:
|
monitor:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
interval: "60s"
|
||||||
relabelings:
|
relabelings:
|
||||||
# https://github.com/dotdc/grafana-dashboards-kubernetes
|
# https://github.com/dotdc/grafana-dashboards-kubernetes
|
||||||
- action: replace
|
- action: replace
|
||||||
@@ -17,3 +18,8 @@ prometheus-node-exporter:
|
|||||||
# identification of cluster
|
# identification of cluster
|
||||||
- targetLabel: cluster
|
- targetLabel: cluster
|
||||||
replacement: casa
|
replacement: casa
|
||||||
|
|
||||||
|
# it seams to be an timestamp can not be an label!
|
||||||
|
- action: labeldrop
|
||||||
|
regex: __meta_kubernetes_endpoints_annotation_endpoints_kubernetes_io_last_change_trigger_time
|
||||||
|
|
||||||
|
|||||||
@@ -4,10 +4,11 @@ kubelet:
|
|||||||
enabled: true
|
enabled: true
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
|
interval: 60s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
|
||||||
enabled: true
|
enabled: true
|
||||||
## Enable scraping /metrics from kubelet's service
|
## Enable scraping /metrics from kubelet's service
|
||||||
kubelet: true
|
kubelet: true
|
||||||
|
cAdvisor: true
|
||||||
additionalLabels:
|
additionalLabels:
|
||||||
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent
|
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent
|
||||||
|
|
||||||
|
|||||||
@@ -5,16 +5,17 @@ metadata:
|
|||||||
name: prometheus-agent
|
name: prometheus-agent
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
serviceMonitorNamespaceSelector: {}
|
|
||||||
podMonitorNamespaceSelector: {}
|
podMonitorSelector: null
|
||||||
|
podMonitorNamespaceSelector: null
|
||||||
|
serviceMonitorNamespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
prometheus-monitoring: enabled
|
||||||
|
|
||||||
serviceMonitorSelector:
|
serviceMonitorSelector:
|
||||||
matchExpressions:
|
matchLabels:
|
||||||
- key: app.kubernetes.io/name
|
release: prometheus-stack
|
||||||
operator: Exists
|
|
||||||
podMonitorSelector:
|
|
||||||
matchExpressions:
|
|
||||||
- key: app.kubernetes.io/name
|
|
||||||
operator: Exists
|
|
||||||
replicas: 1
|
replicas: 1
|
||||||
remoteWrite:
|
remoteWrite:
|
||||||
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
|
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
|
||||||
@@ -25,7 +26,7 @@ spec:
|
|||||||
memory: 128Mi
|
memory: 128Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 200m
|
cpu: 200m
|
||||||
memory: 512Mi
|
memory: 300Mi
|
||||||
serviceAccountName: prometheus-agent
|
serviceAccountName: prometheus-agent
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
role: worker-node
|
role: worker-node
|
||||||
|
|||||||
Reference in New Issue
Block a user