文章

k8s - 卸载与安装

k8s - 卸载与安装

k8s 卸载与安装

个人建议先看上一篇 《k8s - 安装》,里面包含一些失败教训。

主机信息-系统、硬件

内核6.1.0-35-amd64 //uname -r

1
2
3
4
5
6
7
8
9
PRETTY_NAME="Debian GNU/Linux 12 (bookworm)"
NAME="Debian GNU/Linux"
VERSION_ID="12"
VERSION="12 (bookworm)"
VERSION_CODENAME=bookworm
ID=debian
HOME_URL="https://www.debian.org/"
SUPPORT_URL="https://www.debian.org/support"
BUG_REPORT_URL="https://bugs.debian.org/"
1
2
3
4
5
Tasks:  90 total,   1 running,  89 sleeping,   0 stopped,   0 zombie
%Cpu0  :  0.0 us,  0.0 sy,  0.0 ni,100.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st 
%Cpu1  :  0.0 us,  0.0 sy,  0.0 ni,100.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st 
MiB Mem :   3722.8 total,   2740.4 free,    453.2 used,    758.3 buff/cache     
MiB Swap:      0.0 total,      0.0 free,      0.0 used.   3269.6 avail Mem 

docker安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

## docker
## https://docs.docker.com/engine/install/debian/
#  apt update
#  apt install ca-certificates curl
#  install -m 0755 -d /etc/apt/keyrings
#  curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
#  chmod a+r /etc/apt/keyrings/docker.asc

# Add the repository to Apt sources:
#  tee /etc/apt/sources.list.d/docker.sources <<EOF
# Types: deb
# URIs: https://download.docker.com/linux/debian
# Suites: $(. /etc/os-release && echo "$VERSION_CODENAME")
# Components: stable
# Signed-By: /etc/apt/keyrings/docker.asc
# EOF

#  apt update

apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin

##Client: Docker Engine - Community
# Version:           29.3.1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#前置条件(转发 IPv4 并让 iptables 看到桥接流量)
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

# 应用 sysctl 参数而不重新启动
sudo sysctl --system

卸载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
systemctl stop kubelet
systemctl stop docker
systemctl stop containerd
#systemctl stop etcd
kubeadm reset -f

apt-get purge -y kubelet kubeadm kubectl kubernetes-cni cri-tools cri-o
apt-get autoremove -y

rm -rf /etc/kubernetes/
rm -rf /var/lib/kubelet/
rm -rf /var/lib/kubernetes/
rm -rf ~/.kube/

# 删除 Etcd 数据 (如果是 Master 节点)
rm -rf /var/lib/etcd/

# 删除 CNI 网络配置 (这能解决很多重装后的网络问题)
rm -rf /etc/cni/net.d/
rm -rf /var/lib/cni/
rm -rf /opt/cni/bin/

# 删除 K8s 相关的临时文件
rm -rf /var/run/kubernetes/

#关闭 Swap:sudo swapoff -a,并从 /etc/fstab 中删除 swap 行。

从kubelet 从v1.35 降级为v1.34,然后kubernetes-version v1.35.0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
KUBERNETES_VERSION=v1.34
CRIO_VERSION=v1.34

#
curl -fsSL https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/Release.key |
    gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg

echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/ /" |
    tee /etc/apt/sources.list.d/kubernetes.list
#
curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/$CRIO_VERSION/deb/Release.key |
    gpg --dearmor -o /etc/apt/keyrings/cri-o-apt-keyring.gpg

echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/$CRIO_VERSION/deb/ /" |
    tee /etc/apt/sources.list.d/cri-o.list
apt-get update
apt-get install -y kubelet kubeadm kubectl
# 这里都给安装到/usr/bin/目录了,需要建立软链接到 /usr/local/bin 目录

# 这里统一用containerd来管理容器运行时
#apt install -y  containerd.io
# 重新生成 kubelet 配置文件
kubeadm init phase kubelet-start

# 检查文件是否生成
ls -l /var/lib/kubelet/config.yaml
kubeadm reset -f

sudo mv /etc/containerd/config.toml /etc/containerd/config.toml.bak

这个文件默认给的比较不全,需要用containerd重新生成一个。

1
2
3
4
5
6
 containerd config default | tee /etc/containerd/config.toml
 
  sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml
  systemctl daemon-reload
  systemctl restart containerd
  systemctl status containerd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.0.58:6443 --token gdv0bn.iexx3g2fq7zhe06c \
        --discovery-token-ca-cert-hash sha256:68defa33354d0c4703b1d64a2fadf268287bb4942f6c866c9cff5903b9f504dc 

管理

1
2
3
mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config
1
2
3
4
5
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster
1
2
3
4
root@raha-0001:~# kubectl get nodes
NAME          STATUS     ROLES           AGE   VERSION
master-node   NotReady   control-plane   52m   v1.34.6
raha-0002     NotReady   <none>          11m   v1.34.6

安装网络

1
2
3
4
5
6
kubectl apply -f https://raw.githubusercontent.com/projectcalico/calico/v3.30.2/manifests/calico.yaml

#验证
kubectl get pods -n kube-system | grep -E 'flannel|calico|cilium'
# 这里有文件产生
ls /etc/cni/net.d/
1
2
3
4
5
#30s后,已正常
root@raha-0001:~# kubectl get nodes
NAME          STATUS   ROLES           AGE    VERSION
master-node   Ready    control-plane   100m   v1.34.6
raha-0002     Ready    <none>          59m    v1.34.6
1
2
# Ingress 资源需要一个控制器来生效。如果你还没有安装 Nginx Ingress Controller
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml

管理面板

安装metrics-server

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 未安装
root@raha-0001:~# kubectl top nodes
error: Metrics API not available
#进行安装
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

#
root@raha-0001:~# kubectl get apiservices | grep metrics
v1beta1.metrics.k8s.io              kube-system/metrics-server   False (MissingEndpoints)   57s

#Pod 根本没启动,或者一直在重启(CrashLoopBackOff)。
root@raha-0001:~# kubectl get pods -n kube-system | grep metrics-server
metrics-server-b4c746d8b-njp99             0/1     Running   0          10m

# 查看问题
root@raha-0001:~# kubectl logs -n kube-system  
E0328 03:52:30.421281       1 scraper.go:149] "Failed to scrape node" err="Get \"https://192.168.0.58:10250/metrics/resource\": tls: failed to verify certificate: x509: cannot validate certificate for 192.168.0.58 because it doesn't contain any IP SANs" node="master-node"
E0328 03:52:30.428324       1 scraper.go:149] "Failed to scrape node" err="Get \"https://192.168.0.151:10250/metrics/resource\": tls: failed to verify certificate: x509: cannot validate certificate for 192.168.0.151 because it doesn't contain any IP SANs" node="raha-0002"

# 解决问题:在打开的编辑器中,找到 spec.template.spec.containers[0].args 部分。在列表末尾添加 --kubelet-insecure-tls 参数。
kubectl edit deployment metrics-server -n kube-system
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-insecure-tls  # <--- 添加这一行
...

# 验证ok
root@raha-0001:~# kubectl get pods -n kube-system | grep metrics-server
metrics-server-5f54fb74d9-pbcgz            1/1     Running   0          34s

Kubernetes Dashboard

1
https://kubernetes.io/zh-cn/docs/tasks/access-application-cluster/web-ui-dashboard/
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
##helm 官方那个挂了 --不用了

kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml

#修改 Service 为 NodePort
#默认安装只能在集群内部访问,我们需要把它暴露出来。
kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard

#在打开的编辑器中,找到 spec: 部分,修改 type 并指定端口:
spec:
  type: NodePort  # 将 ClusterIP 改为 NodePort
  ports:
    - port: 443
      targetPort: 8443
      nodePort: 30000 # 添加这一行,指定访问端口 (范围 30000-32767)
 。。。
 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#创建admin-user管理员
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin-user
  namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin-user
  namespace: kubernetes-dashboard
EOF
1
2
3
4
5
# 创建一个登陆用的token
kubectl -n kubernetes-dashboard create token admin-user --duration=8760h

#eyJhbGciOiJSUzI1NiIsImtpZCI6IlVsdklURE0wOHAzeVNqNW9JX01TUFRDRUZ6X01Ec1h1TzFDbVJvcjAzUVkifQ.eyJhdWQiOlsiaHR0cHM6Ly9rdWJlcm5ldGVzLmRlZmF1bHQuc3ZjLmNsdXN0ZXIubG9jYWwiXSwiZXhwIjoxODA2MjE0NzY4LCJpYXQiOjE3NzQ2Nzg3NjgsImlzcyI6Imh0dHBzOi8va3ViZXJuZXRlcy5kZWZhdWx0LnN2Yy5jbHVzdGVyLmxvY2FsIiwianRpIjoiYzFlOGY1Y2MtMWM2Yi00ODVhLTg5N2UtYTk1ODY5NTYyZGMxIiwia3ViZXJuZXRlcy5pbyI6eyJuYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsInNlcnZpY2VhY2NvdW50Ijp7Im5hbWUiOiJhZG1pbi11c2VyIiwidWlkIjoiYzJjOTZkNjUtNGZiZS00ZDQxLTk2M2YtNWVkOGU3MzI4YWI1In19LCJuYmYiOjE3NzQ2Nzg3NjgsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlcm5ldGVzLWRhc2hib2FyZDphZG1pbi11c2VyIn0.SFJMod8Am4J5fvg3jyvvWkarPwjkFtr8quU3TYBFn0AXOtXIKucQlS3RSuDArpIns-UR5_rRzGOSaTh37ddQuy6yP4CCfvjaDZd92b01nzLn_mFn4fspaV9Lr1qowR-0f9YMrlSJH1EYrRGuAtjiz3pP2nOw2TyOW8GoYccANJiKgoH34G0BUauEF9PHcysRnUOZZygLSvWGMsJfr4iVTa1_1iyh9zyMxD2vfrz0V6n9ZKnaKFr6swl2Ofm3wZFryM35Rw6NYvgTwVXxGQ1kJN9hKUR3SkgSnGbx0MP5izvKouCCKBM8tUrqoWIPXEVUjomRDSonqriF3FkuUV7-Pg

本文由作者按照 CC BY 4.0 进行授权