monitoring - increase scrapeInterval to 120s, better selectors on Agent and memory decrease of agent from 360MB to 228MB
All checks were successful
Monitoring services CI/CD Pipeline / deploy (push) Successful in 18s

This commit is contained in:
Márcio Fernandes
2026-03-18 12:17:38 +00:00
parent fc12ab2dd9
commit a56f573a6f
5 changed files with 30 additions and 11 deletions

View File

@@ -29,6 +29,16 @@ helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-sta
## deploy prometheus agent
**requirements:**
On namespaces running prometheus monitors add label `prometheus-monitoring=enabled`
**Example:**
``` bash
kubectl label namespace monitoring prometheus-monitoring=enabled --overwrite
```
```bash
kubectl apply -f ./prometheus-agent.yaml
```

View File

@@ -8,6 +8,7 @@ kube-state-metrics: # ok tested!
role: worker-node
prometheus:
monitor:
interval: "60s"
relabelings:
- targetLabel: cluster
replacement: casa

View File

@@ -8,6 +8,7 @@ prometheus-node-exporter:
prometheus:
monitor:
enabled: true
interval: "60s"
relabelings:
# https://github.com/dotdc/grafana-dashboards-kubernetes
- action: replace
@@ -17,3 +18,8 @@ prometheus-node-exporter:
# identification of cluster
- targetLabel: cluster
replacement: casa
# it seams to be an timestamp can not be an label!
- action: labeldrop
regex: __meta_kubernetes_endpoints_annotation_endpoints_kubernetes_io_last_change_trigger_time

View File

@@ -4,10 +4,11 @@ kubelet:
enabled: true
namespace: kube-system
serviceMonitor:
interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
interval: 60s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
enabled: true
## Enable scraping /metrics from kubelet's service
kubelet: true
cAdvisor: true
additionalLabels:
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent

View File

@@ -5,16 +5,17 @@ metadata:
name: prometheus-agent
namespace: monitoring
spec:
serviceMonitorNamespaceSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelector: null
podMonitorNamespaceSelector: null
serviceMonitorNamespaceSelector:
matchLabels:
prometheus-monitoring: enabled
serviceMonitorSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: Exists
podMonitorSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: Exists
matchLabels:
release: prometheus-stack
replicas: 1
remoteWrite:
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
@@ -25,7 +26,7 @@ spec:
memory: 128Mi
limits:
cpu: 200m
memory: 512Mi
memory: 300Mi
serviceAccountName: prometheus-agent
nodeSelector:
role: worker-node