Skip to content

Soak Test

Source: hello-pepr-soak

Pepr watches for Pods with labels api and bug and for Secrets with label deletedeletedelete in pepr-demo namespace.

A successful soak should result in:

  1. No pods in the pepr-demo namespace
  2. No secrets in the pepr-demo namespace

The Watcher deployment is running at LOG_LEVEL debug while the admission deployment is on info to keep the irrelevant noise down.

Create a k3d cluster with audit logging enabled

cat <<EOF > audit-policy.yaml
apiVersion: audit.k8s.io/v1
kind: Policy
rules:
- level: Metadata
EOF
k3d cluster create auditer \
--k3s-arg '--kube-apiserver-arg=audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml@server:*' \
--k3s-arg '--kube-apiserver-arg=audit-log-path=/var/log/kubernetes/audit.log@server:*' \
--k3s-arg '--kube-apiserver-arg=audit-log-format=json@server:*' \
--volume $(pwd)/audit-policy.yaml:/etc/kubernetes/policies/audit-policy.yaml

View audit logs

Terminal window
docker exec -it k3d-auditer-server-0 cat /var/log/kubernetes/audit.log

Create a kind cluster with auditing.

cat <<EOF > audit-policy.yaml
apiVersion: audit.k8s.io/v1
kind: Policy
rules:
- level: Metadata
EOF
cat <<EOF > kind-config.yaml
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
apiServer:
# enable auditing flags on the API server
extraArgs:
audit-log-path: /var/log/kubernetes/kube-apiserver-audit.log
audit-policy-file: /etc/kubernetes/policies/audit-policy.yaml
# mount new files / directories on the control plane
extraVolumes:
- name: audit-policies
hostPath: /etc/kubernetes/policies
mountPath: /etc/kubernetes/policies
readOnly: true
pathType: "DirectoryOrCreate"
- name: "audit-logs"
hostPath: "/var/log/kubernetes"
mountPath: "/var/log/kubernetes"
readOnly: false
pathType: DirectoryOrCreate
# mount the local file on the control plane
extraMounts:
- hostPath: ./audit-policy.yaml
containerPath: /etc/kubernetes/policies/audit-policy.yaml
readOnly: true
EOF
kind create cluster --config kind-config.yaml

Make sure you got audit logs

Terminal window
docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log

Troubleshoot

Terminal window
docker exec kind-control-plane ls /etc/kubernetes/policies

expected

Terminal window
audit-policy.yaml

API Server contain the mounts and arugments?

Terminal window
docker exec kind-control-plane cat /etc/kubernetes/manifests/kube-apiserver.yaml | grep audit

expected

- --audit-log-path=/var/log/kubernetes/kube-apiserver-audit.log
- --audit-policy-file=/etc/kubernetes/policies/audit-policy.yaml
name: audit-logs
name: audit-policies
name: audit-logs
name: audit-policies

Download istioctl

Terminal window
curl -L https://istio.io/downloadIstio | sh -
sudo mv istio-*/bin/istioctl /usr/local/bin/
Terminal window
istioctl install --set profile=demo -y

Deploy the module and watch logs in one terminal

istioctl install --set profile=demo -y
kubectl apply -f dist
kubectl apply -f -<<EOF
apiVersion: v1
kind: Namespace
metadata:
creationTimestamp: null
name: pepr-demo
labels:
istio-injection: enabled
spec: {}
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen0
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
"zarf.dev/agent": "ignore"
bug: "reproduce"
api: "call"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen1
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
EOF
k label ns pepr-system istio-injection=enabled
k delete po -n pepr-system --all --force
k run curler -n pepr-system --image=nginx
sleep 10;
k exec -it curler -n pepr-system -- curl -k https://pepr-6233c672-7fca-5603-8e90-771828dd30fa-watcher/metrics

Logs

Terminal window
kubectl logs -n pepr-system -l pepr.dev/controller=watcher -f | jq 'select(.url != "/healthz")'

Create 10 CronJob(s) that produces 10 pods with sidecars every 60 seconds

kubectl apply -f -<<EOF
apiVersion: v1
kind: Namespace
metadata:
creationTimestamp: null
name: pepr-demo
labels:
istio-injection: enabled
spec: {}
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen0
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
"zarf.dev/agent": "ignore"
bug: "reproduce"
api: "call"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen1
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen2
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
"zarf.dev/agent": "ignore"
bug: "reproduce"
api: "call"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen3
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
"zarf.dev/agent": "ignore"
bug: "reproduce"
api: "call"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen4
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
"zarf.dev/agent": "ignore"
bug: "reproduce"
api: "call"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen5
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen6
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen7
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen8
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
---
apiVersion: batch/v1
kind: CronJob
metadata:
creationTimestamp: null
name: podgen9
namespace: pepr-demo
spec:
jobTemplate:
metadata:
creationTimestamp: null
name: podgen
spec:
ttlSecondsAfterFinished: 5
template:
metadata:
creationTimestamp: null
labels:
bug: "reproduce"
api: "call"
"zarf.dev/agent": "ignore"
spec:
containers:
- image: ubuntu
command: ["sh","-c","sleep 10"]
name: sleepanddie
resources: {}
restartPolicy: Never
schedule: 0/1 * * * *
status: {}
EOF

Watch for secrets and pods, because if any stick around then you have reproduced the issue:

Terminal window
while true; do
kubectl get secret,po -n pepr-demo --no-headers
sleep 5
clear
done

Watch the Watch Controller pods:

Terminal window
kubectl logs -n pepr-system -l pepr.dev/controller=watcher -f | jq 'select(.url != "/healthz")'

Watch the audit logs

Terminal window
while true; do
docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log | tail | jq
sleep 5
done
Terminal window
helm repo add kubeshark https://helm.kubeshark.co
helm install kubeshark kubeshark/kubeshark

Port-forward to the UI

Terminal window
kubectl port-forward service/kubeshark-front 8899:80