monitoring - increase scrapeInterval to 120s, better selectors on Agent and memory decrease of agent from 360MB to 228MB
All checks were successful
Monitoring services CI/CD Pipeline / deploy (push) Successful in 18s

This commit is contained in:
Márcio Fernandes
2026-03-18 12:17:38 +00:00
parent fc12ab2dd9
commit a56f573a6f
5 changed files with 30 additions and 11 deletions

View File

@@ -29,6 +29,16 @@ helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-sta
## deploy prometheus agent ## deploy prometheus agent
**requirements:**
On namespaces running prometheus monitors add label `prometheus-monitoring=enabled`
**Example:**
``` bash
kubectl label namespace monitoring prometheus-monitoring=enabled --overwrite
```
```bash ```bash
kubectl apply -f ./prometheus-agent.yaml kubectl apply -f ./prometheus-agent.yaml
``` ```

View File

@@ -8,6 +8,7 @@ kube-state-metrics: # ok tested!
role: worker-node role: worker-node
prometheus: prometheus:
monitor: monitor:
interval: "60s"
relabelings: relabelings:
- targetLabel: cluster - targetLabel: cluster
replacement: casa replacement: casa

View File

@@ -8,6 +8,7 @@ prometheus-node-exporter:
prometheus: prometheus:
monitor: monitor:
enabled: true enabled: true
interval: "60s"
relabelings: relabelings:
# https://github.com/dotdc/grafana-dashboards-kubernetes # https://github.com/dotdc/grafana-dashboards-kubernetes
- action: replace - action: replace
@@ -17,3 +18,8 @@ prometheus-node-exporter:
# identification of cluster # identification of cluster
- targetLabel: cluster - targetLabel: cluster
replacement: casa replacement: casa
# it seams to be an timestamp can not be an label!
- action: labeldrop
regex: __meta_kubernetes_endpoints_annotation_endpoints_kubernetes_io_last_change_trigger_time

View File

@@ -4,10 +4,11 @@ kubelet:
enabled: true enabled: true
namespace: kube-system namespace: kube-system
serviceMonitor: serviceMonitor:
interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483 interval: 60s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
enabled: true enabled: true
## Enable scraping /metrics from kubelet's service ## Enable scraping /metrics from kubelet's service
kubelet: true kubelet: true
cAdvisor: true
additionalLabels: additionalLabels:
app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent

View File

@@ -5,16 +5,17 @@ metadata:
name: prometheus-agent name: prometheus-agent
namespace: monitoring namespace: monitoring
spec: spec:
serviceMonitorNamespaceSelector: {}
podMonitorNamespaceSelector: {} podMonitorSelector: null
podMonitorNamespaceSelector: null
serviceMonitorNamespaceSelector:
matchLabels:
prometheus-monitoring: enabled
serviceMonitorSelector: serviceMonitorSelector:
matchExpressions: matchLabels:
- key: app.kubernetes.io/name release: prometheus-stack
operator: Exists
podMonitorSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: Exists
replicas: 1 replicas: 1
remoteWrite: remoteWrite:
- url: https://prometheus.monitoring.limbosolutions.com/api/v1/write - url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
@@ -25,7 +26,7 @@ spec:
memory: 128Mi memory: 128Mi
limits: limits:
cpu: 200m cpu: 200m
memory: 512Mi memory: 300Mi
serviceAccountName: prometheus-agent serviceAccountName: prometheus-agent
nodeSelector: nodeSelector:
role: worker-node role: worker-node