k8s - 卸载与安装
k8s - 卸载与安装
k8s 卸载与安装
个人建议先看上一篇 《k8s - 安装》,里面包含一些失败教训。
主机信息-系统、硬件
内核6.1.0-35-amd64 //
uname -r
1
2
3
4
5
6
7
8
9
PRETTY_NAME="Debian GNU/Linux 12 (bookworm)"
NAME="Debian GNU/Linux"
VERSION_ID="12"
VERSION="12 (bookworm)"
VERSION_CODENAME=bookworm
ID=debian
HOME_URL="https://www.debian.org/"
SUPPORT_URL="https://www.debian.org/support"
BUG_REPORT_URL="https://bugs.debian.org/"
1
2
3
4
5
Tasks: 90 total, 1 running, 89 sleeping, 0 stopped, 0 zombie
%Cpu0 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu1 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 3722.8 total, 2740.4 free, 453.2 used, 758.3 buff/cache
MiB Swap: 0.0 total, 0.0 free, 0.0 used. 3269.6 avail Mem
docker安装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
## docker
## https://docs.docker.com/engine/install/debian/
# apt update
# apt install ca-certificates curl
# install -m 0755 -d /etc/apt/keyrings
# curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
# chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
# tee /etc/apt/sources.list.d/docker.sources <<EOF
# Types: deb
# URIs: https://download.docker.com/linux/debian
# Suites: $(. /etc/os-release && echo "$VERSION_CODENAME")
# Components: stable
# Signed-By: /etc/apt/keyrings/docker.asc
# EOF
# apt update
apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
##Client: Docker Engine - Community
# Version: 29.3.1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#前置条件(转发 IPv4 并让 iptables 看到桥接流量)
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# 应用 sysctl 参数而不重新启动
sudo sysctl --system
卸载
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
systemctl stop kubelet
systemctl stop docker
systemctl stop containerd
#systemctl stop etcd
kubeadm reset -f
apt-get purge -y kubelet kubeadm kubectl kubernetes-cni cri-tools cri-o
apt-get autoremove -y
rm -rf /etc/kubernetes/
rm -rf /var/lib/kubelet/
rm -rf /var/lib/kubernetes/
rm -rf ~/.kube/
# 删除 Etcd 数据 (如果是 Master 节点)
rm -rf /var/lib/etcd/
# 删除 CNI 网络配置 (这能解决很多重装后的网络问题)
rm -rf /etc/cni/net.d/
rm -rf /var/lib/cni/
rm -rf /opt/cni/bin/
# 删除 K8s 相关的临时文件
rm -rf /var/run/kubernetes/
#关闭 Swap:sudo swapoff -a,并从 /etc/fstab 中删除 swap 行。
从kubelet 从v1.35 降级为v1.34,然后kubernetes-version v1.35.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
KUBERNETES_VERSION=v1.34
CRIO_VERSION=v1.34
#
curl -fsSL https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/Release.key |
gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/ /" |
tee /etc/apt/sources.list.d/kubernetes.list
#
curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/$CRIO_VERSION/deb/Release.key |
gpg --dearmor -o /etc/apt/keyrings/cri-o-apt-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/$CRIO_VERSION/deb/ /" |
tee /etc/apt/sources.list.d/cri-o.list
apt-get update
apt-get install -y kubelet kubeadm kubectl
# 这里都给安装到/usr/bin/目录了,需要建立软链接到 /usr/local/bin 目录
# 这里统一用containerd来管理容器运行时
#apt install -y containerd.io
# 重新生成 kubelet 配置文件
kubeadm init phase kubelet-start
# 检查文件是否生成
ls -l /var/lib/kubelet/config.yaml
kubeadm reset -f
sudo mv /etc/containerd/config.toml /etc/containerd/config.toml.bak
这个文件默认给的比较不全,需要用containerd重新生成一个。
1 2 3 4 5 6 containerd config default | tee /etc/containerd/config.toml sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml systemctl daemon-reload systemctl restart containerd systemctl status containerd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.0.58:6443 --token gdv0bn.iexx3g2fq7zhe06c \
--discovery-token-ca-cert-hash sha256:68defa33354d0c4703b1d64a2fadf268287bb4942f6c866c9cff5903b9f504dc
管理
1
2
3
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
1
2
3
4
5
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster
1
2
3
4
root@raha-0001:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-node NotReady control-plane 52m v1.34.6
raha-0002 NotReady <none> 11m v1.34.6
安装网络
1
2
3
4
5
6
kubectl apply -f https://raw.githubusercontent.com/projectcalico/calico/v3.30.2/manifests/calico.yaml
#验证
kubectl get pods -n kube-system | grep -E 'flannel|calico|cilium'
# 这里有文件产生
ls /etc/cni/net.d/
1
2
3
4
5
#30s后,已正常
root@raha-0001:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-node Ready control-plane 100m v1.34.6
raha-0002 Ready <none> 59m v1.34.6
1
2
# Ingress 资源需要一个控制器来生效。如果你还没有安装 Nginx Ingress Controller
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml
管理面板
安装metrics-server
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 未安装
root@raha-0001:~# kubectl top nodes
error: Metrics API not available
#进行安装
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
#
root@raha-0001:~# kubectl get apiservices | grep metrics
v1beta1.metrics.k8s.io kube-system/metrics-server False (MissingEndpoints) 57s
#Pod 根本没启动,或者一直在重启(CrashLoopBackOff)。
root@raha-0001:~# kubectl get pods -n kube-system | grep metrics-server
metrics-server-b4c746d8b-njp99 0/1 Running 0 10m
# 查看问题
root@raha-0001:~# kubectl logs -n kube-system
E0328 03:52:30.421281 1 scraper.go:149] "Failed to scrape node" err="Get \"https://192.168.0.58:10250/metrics/resource\": tls: failed to verify certificate: x509: cannot validate certificate for 192.168.0.58 because it doesn't contain any IP SANs" node="master-node"
E0328 03:52:30.428324 1 scraper.go:149] "Failed to scrape node" err="Get \"https://192.168.0.151:10250/metrics/resource\": tls: failed to verify certificate: x509: cannot validate certificate for 192.168.0.151 because it doesn't contain any IP SANs" node="raha-0002"
# 解决问题:在打开的编辑器中,找到 spec.template.spec.containers[0].args 部分。在列表末尾添加 --kubelet-insecure-tls 参数。
kubectl edit deployment metrics-server -n kube-system
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-insecure-tls # <--- 添加这一行
...
# 验证ok
root@raha-0001:~# kubectl get pods -n kube-system | grep metrics-server
metrics-server-5f54fb74d9-pbcgz 1/1 Running 0 34s
Kubernetes Dashboard
1
https://kubernetes.io/zh-cn/docs/tasks/access-application-cluster/web-ui-dashboard/
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
##helm 官方那个挂了 --不用了
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
#修改 Service 为 NodePort
#默认安装只能在集群内部访问,我们需要把它暴露出来。
kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard
#在打开的编辑器中,找到 spec: 部分,修改 type 并指定端口:
spec:
type: NodePort # 将 ClusterIP 改为 NodePort
ports:
- port: 443
targetPort: 8443
nodePort: 30000 # 添加这一行,指定访问端口 (范围 30000-32767)
。。。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#创建admin-user管理员
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kubernetes-dashboard
EOF
1
2
3
4
5
# 创建一个登陆用的token
kubectl -n kubernetes-dashboard create token admin-user --duration=8760h
#eyJhbGciOiJSUzI1NiIsImtpZCI6IlVsdklURE0wOHAzeVNqNW9JX01TUFRDRUZ6X01Ec1h1TzFDbVJvcjAzUVkifQ.eyJhdWQiOlsiaHR0cHM6Ly9rdWJlcm5ldGVzLmRlZmF1bHQuc3ZjLmNsdXN0ZXIubG9jYWwiXSwiZXhwIjoxODA2MjE0NzY4LCJpYXQiOjE3NzQ2Nzg3NjgsImlzcyI6Imh0dHBzOi8va3ViZXJuZXRlcy5kZWZhdWx0LnN2Yy5jbHVzdGVyLmxvY2FsIiwianRpIjoiYzFlOGY1Y2MtMWM2Yi00ODVhLTg5N2UtYTk1ODY5NTYyZGMxIiwia3ViZXJuZXRlcy5pbyI6eyJuYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsInNlcnZpY2VhY2NvdW50Ijp7Im5hbWUiOiJhZG1pbi11c2VyIiwidWlkIjoiYzJjOTZkNjUtNGZiZS00ZDQxLTk2M2YtNWVkOGU3MzI4YWI1In19LCJuYmYiOjE3NzQ2Nzg3NjgsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlcm5ldGVzLWRhc2hib2FyZDphZG1pbi11c2VyIn0.SFJMod8Am4J5fvg3jyvvWkarPwjkFtr8quU3TYBFn0AXOtXIKucQlS3RSuDArpIns-UR5_rRzGOSaTh37ddQuy6yP4CCfvjaDZd92b01nzLn_mFn4fspaV9Lr1qowR-0f9YMrlSJH1EYrRGuAtjiz3pP2nOw2TyOW8GoYccANJiKgoH34G0BUauEF9PHcysRnUOZZygLSvWGMsJfr4iVTa1_1iyh9zyMxD2vfrz0V6n9ZKnaKFr6swl2Ofm3wZFryM35Rw6NYvgTwVXxGQ1kJN9hKUR3SkgSnGbx0MP5izvKouCCKBM8tUrqoWIPXEVUjomRDSonqriF3FkuUV7-Pg
本文由作者按照 CC BY 4.0 进行授权