feat: migrated to k3s

2025-11-15 11:54:06 +00:00
parent a595747c2c
commit 709d6de566
34 changed files with 5903 additions and 802 deletions
--- a/monitoring/prometheus/deploy/README.md
+++ b/monitoring/prometheus/deploy/README.md
@@ -0,0 +1,34 @@
+# Prometheus Setup
+
+- <https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack>
+- <https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml>
+  
+## helm chart
+
+```bash
+#add repo
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+```
+
+**This helm chart, installs:**
+
+- crd
+- Operator
+- kubernetes services monitors
+
+```bash
+kubectl get namespace monitoring || kubectl create namespace monitoring
+helm upgrade --install prometheus-stack prometheus-community/kube-prometheus-stack \
+--namespace monitoring \
+--values=./helm/01-only-crd-and-operator.yaml \
+--values=./helm/02-kube-metrics.yaml \
+--values=./helm/03-node-exporter.yaml \
+--values=./helm/04-kubelet.yaml \
+--values=./helm/10-testing-values.yaml
+```
+
+## deploy prometheus agent
+
+```bash
+kubectl apply -f ./prometheus-agent.yaml
+```
--- a/monitoring/prometheus/deploy/helm/01-only-crd-and-operator.yaml
+++ b/monitoring/prometheus/deploy/helm/01-only-crd-and-operator.yaml
@@ -0,0 +1,48 @@
+# values.yaml to install only Prometheus Operator and CRDs
+
+# Disable all components except the operator
+defaultRules:
+  create: false
+
+alertmanager:
+  enabled: false
+
+grafana:
+  enabled: false
+
+kubeStateMetrics:
+  enabled: false
+
+nodeExporter:
+  enabled: false
+
+prometheus:
+  enabled: false
+
+coreDns:
+  enabled: false
+
+kubeControllerManager:
+  enabled: false
+
+kubeEtcd:
+  enabled: false
+
+kubeProxy:
+  enabled: false
+
+kubeScheduler:
+  enabled: false
+
+prometheusOperator:
+  enabled: true
+  createCustomResource: true
+  tls:
+    enabled: false
+  admissionWebhooks:
+    enabled: false
+  cleanupCustomResource: false
+  serviceMonitor:
+    selfMonitor: false
+  kubeletService:
+    enabled: false
--- a/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml
+++ b/monitoring/prometheus/deploy/helm/02-kube-metrics.yaml
@@ -0,0 +1,29 @@
+kubeStateMetrics:
+  enabled: true
+
+kube-state-metrics: # ok tested!
+  prometheus:
+    monitor:
+      relabelings:
+        - targetLabel: cluster
+          replacement: casa
+      additionalLabels:
+        app.kubernetes.io/name: prometheus-kube-state-metrics # !important: selector used by agent
+
+coreDns:  # ok tested!
+  enabled: true
+  serviceMonitor:
+    relabelings:
+      - targetLabel: cluster
+        replacement: casa
+    additionalLabels:
+      app.kubernetes.io/name: prometheus-stack-coredns # !important: selector used by agent
+
+kubeApiServer:  # ok tested!
+  enabled: true
+  serviceMonitor:
+    relabelings:
+      - targetLabel: cluster
+        replacement: casa
+    additionalLabels:
+      app.kubernetes.io/name: prometheus-stack-apiserver # !important: selector used by agent
--- a/monitoring/prometheus/deploy/helm/03-node-exporter.yaml
+++ b/monitoring/prometheus/deploy/helm/03-node-exporter.yaml
@@ -0,0 +1,19 @@
+# Deploy node exporter as a daemonset to all nodes
+nodeExporter:
+  enabled: true
+
+ 
+# job node exporter
+prometheus-node-exporter:
+  prometheus:
+    monitor:
+      enabled: true
+      relabelings:
+      # https://github.com/dotdc/grafana-dashboards-kubernetes
+      - action: replace
+        sourceLabels: [__meta_kubernetes_pod_node_name]
+        targetLabel: nodename
+      
+      # identification of cluster
+      - targetLabel: cluster
+        replacement: casa
--- a/monitoring/prometheus/deploy/helm/04-kubelet.yaml
+++ b/monitoring/prometheus/deploy/helm/04-kubelet.yaml
@@ -0,0 +1,83 @@
+prometheusOperator:
+  kubeletService:
+    enabled: true
+    # requires manual creation of service #prom-kublet-service
+
+
+# Used file to testing new options and configurations
+# Should be the laste file to be loaded
+kubelet:
+  enabled: true
+  namespace: kube-system
+  serviceMonitor:
+    interval: 30s #WARN: Error on ingesting out-of-order samples. https://github.com/prometheus-community/helm-charts/issues/5483
+    enabled: true 
+    ## Enable scraping /metrics from kubelet's service
+    kubelet: true
+    additionalLabels:
+      app.kubernetes.io/name: prometheus-kubelet # !important: selector used by agent
+    
+    probesMetricRelabelings:
+      - targetLabel: cluster
+        replacement: casa
+
+      - sourceLabels: [__name__, image]
+        separator: ;
+        regex: container_([a-z_]+);
+        replacement: $1
+        action: drop
+      - sourceLabels: [__name__]
+        separator: ;
+        regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
+        replacement: $1
+        action: drop
+
+
+#     # RelabelConfigs to apply to samples before scraping
+#     # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
+#     #
+#     # metrics_path is required to match upstream rules and charts
+    cAdvisorRelabelings:
+    
+      - targetLabel: cluster
+        replacement: casa
+
+      - action: replace
+        sourceLabels: [__metrics_path__]
+        targetLabel: metrics_path
+      - sourceLabels: [__meta_kubernetes_pod_node_name]
+        separator: ;
+        regex: ^(.*)$
+        targetLabel: nodename
+        replacement: $1
+        action: replace
+
+#     # RelabelConfigs to apply to samples before scraping
+#     # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
+#     #
+    probesRelabelings:
+
+
+      - targetLabel: cluster
+        replacement: casa
+
+      - action: replace
+        sourceLabels: [__metrics_path__]
+        targetLabel: metrics_path
+
+      - sourceLabels: [__meta_kubernetes_pod_node_name]
+        separator: ;
+        regex: ^(.*)$
+        targetLabel: nodename
+        replacement: $1
+        action: replace
+
+    resourceRelabelings:
+   
+      - targetLabel: cluster
+        replacement: casa
+
+      - action: replace
+        sourceLabels: [__metrics_path__]
+        targetLabel: metrics_path
+    
--- a/monitoring/prometheus/deploy/helm/10-testing-values.yaml
+++ b/monitoring/prometheus/deploy/helm/10-testing-values.yaml
@@ -0,0 +1 @@
+# use for testing
--- a/monitoring/prometheus/deploy/helm/original-values.yaml
+++ b/monitoring/prometheus/deploy/helm/original-values.yaml
--- a/monitoring/prometheus/deploy/prometheus-agent.yaml
+++ b/monitoring/prometheus/deploy/prometheus-agent.yaml
@@ -0,0 +1,69 @@
+
+apiVersion: monitoring.coreos.com/v1alpha1
+kind: PrometheusAgent
+metadata:
+  name: prometheus-agent
+  namespace: monitoring
+spec:
+  serviceMonitorNamespaceSelector: {}
+  podMonitorNamespaceSelector: {}
+  serviceMonitorSelector:
+    matchExpressions:
+      - key: app.kubernetes.io/name
+        operator: Exists
+  podMonitorSelector:
+    matchExpressions:
+      - key: app.kubernetes.io/name
+        operator: Exists
+  replicas: 1
+  remoteWrite:
+    - url: https://prometheus.monitoring.limbosolutions.com/api/v1/write
+  scrapeInterval: 60s
+  resources:
+    requests:
+      cpu: 100m
+      memory: 128Mi
+    limits:
+      cpu: 200m
+      memory: 180Mi
+  serviceAccountName: prometheus-agent
+---
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: prometheus-agent
+rules:
+  - apiGroups: [""]
+    resources: ["nodes", "nodes/metrics", "nodes/proxy", "services", "endpoints", "pods"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["monitoring.coreos.com"]
+    resources: ["servicemonitors", "podmonitors"]
+    verbs: ["get", "list", "watch"]
+  - nonResourceURLs:
+      - /metrics
+      - /metrics/cadvisor
+      - /metrics/probes
+    verbs: ["get"]
+---
+
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: prometheus-agent-monitoring
+roleRef:
+  kind: ClusterRole
+  name: prometheus-agent
+  apiGroup: rbac.authorization.k8s.io
+subjects:
+  - kind: ServiceAccount
+    name: prometheus-agent
+    namespace: monitoring
+
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: prometheus-agent
+  namespace: monitoring
--- a/monitoring/promtail/.gitignore
+++ b/monitoring/promtail/.gitignore
@@ -0,0 +1 @@
+**.local.**
--- a/monitoring/promtail/README.md
+++ b/monitoring/promtail/README.md
@@ -0,0 +1,12 @@
+# Promtail
+
+``` bash
+kubectl get namespace monitoring || kubectl create namespace monitoring
+
+# add repo
+helm repo add grafana https://grafana.github.io/helm-charts
+# Install & Upgrade
+helm upgrade --install promtail grafana/promtail --namespace monitoring \
+--values=./values.yaml \
+--values=./values.local.yaml
+```
--- a/monitoring/promtail/values.yaml
+++ b/monitoring/promtail/values.yaml
@@ -0,0 +1,46 @@
+config:
+  clients:
+    - url: "????" #replaced values.local.yaml. Example: https://lokiserver/loki/api/v1/push
+    # by default all scrap configs had node_name  
+  snippets:
+
+    extraScrapeConfigs: |
+      #scrape config for syslog
+      - job_name: host-journald
+        journal:
+          json: true
+          max_age: 24h
+          path: /var/log/host/journal
+          labels:
+            job: journald
+        relabel_configs:
+          - source_labels: ['__journal__systemd_unit']
+            target_label: 'journal_systemd_unit'
+          - source_labels: ['__journal_syslog_identifier']
+            target_label: 'journal_syslog_identifier'
+          - source_labels: ['__journal__hostname']
+            target_label: 'journal_hostname'
+          - target_label: 'host'
+            replacement: '${HOSTNAME}'
+          - target_label: 'cluster'
+            replacement: 'casa' 
+extraArgs:
+  - --config.expand-env=true
+extraVolumes:
+  - name: node-logs
+    hostPath:
+      path: /var/log
+
+extraVolumeMounts:
+  - name: node-logs
+    mountPath: /var/log/host
+    readOnly: true
+
+resources: 
+ limits:
+   cpu: 200m
+   memory: 100Mi
+ requests:
+   cpu: 100m
+   memory: 50Mi
+