使用 kubeadm 安装高可用 k8s 集群
- k8s 版本:1.28
- OS: CentOS Stream 9
官方文档
https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/
运行时架构
机器配置
- Control plane:3台
- Worker node:3台
- Apiserver VIP(本地部署可参考 haproxy + keepalive 方案,公司一般有 VIP 服务,提申请即可)
环境配置
在安装集群之前,需要先准备 k8s 的运行环境,具体步骤如下。
创建必要目录
mkdir -p /data/containerd \
/data/etcd \
~/deploy/kubeadm \
~/deploy/ingress-nginx \
~/deploy/metrics-server \
~/deploy/dashboard
禁用交换分区
vi /etc/fstab
# /etc/fstab
# Created by anaconda on Wed Aug 12 13:47:21 2015
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
UUID=96eb1a6e-6708-4159-9394-8734c9b35c79 / xfs defaults,noatime,nodev,nobarrier,inode64 0 0
UUID=555aef93-c347-475c-a81d-418e5899ba46 /home1 ext4 defaults,noatime,nodev,nobarrier 0 0
#UUID=6d17e220-d048-471b-b858-3ea95c2c1cae swap swap defaults 0 0
/home1 /home none defaults,bind 0 0
禁用防火墙、selinux、dnsmasq
systemctl disable --now firewalld
setenforce 0
vi /etc/sysconfig/selinux
vi /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of three two values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
SELINUXTYPE=targeted
扩大系统最大文件句柄数大于65535
vim /etc/security/limits.conf
* soft nofile 655350
* hard nofile 655350
配置 ipvs
yum install ipvsadm ipset sysstat conntrack libseccomp -y
# 内核版本 < 4.18 nf_conntrack_ipv4
cat <<EOF |sudo tee /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
br_netfilter
ipt_rpfilter
ipt_REJECT
ipip
EOF
systemctl daemon-reload && systemctl enable --now systemd-modules-load.service
k8s内核配置
modprobe br_netfilter
cat <<EOF | sudo tee /etc/sysctl.d/62-k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sysctl -p /etc/sysctl.d/62-k8s.conf
安装 containerd 容器运行时
https://github.com/containerd/containerd/blob/main/docs/man/containerd-config.toml.5.md
# 安装
yum config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo
yum install -y containerd.io
containerd config default > /etc/containerd/config.toml
vi /etc/containerd/config.toml
# 配置containerd
## 1. 由于默认使用xfs,ftype=0,容器不支持 overlayfs driver.
## 要么修改xfs配置:ftype=1 (or d_type=true),要么使用ext4(/home1)
## 2. 私有仓库必须要在这里配好,跟 docker 有区别
root = "/data/containerd"
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "registry.k8s.io/pause:3.9"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d"
# 启动
systemctl daemon-reload && systemctl enable --now containerd
systemctl daemon-reload && systemctl restart containerd
[可选] containerd 镜像加速配置及测试,需要安装 crictl,安装 kubeadm 会自动安装(安装步骤见后面安装 kubeadm 部分)
# 配置镜像加速
mkdir -p /etc/containerd/certs.d/docker.io \
/etc/containerd/certs.d/gcr.io \
/etc/containerd/certs.d/registry.k8s.io \
/etc/containerd/certs.d/quay.io
cat <<EOF | sudo tee /etc/containerd/certs.d/docker.io/hosts.toml
server = "https://docker.io"
[host."https://jy22hui5.mirror.aliyuncs.com"]
capabilities = ["pull", "resolve"]
[host."https://hub-mirror.c.163.com"]
capabilities = ["pull", "resolve"]
EOF
cat <<EOF | sudo tee /etc/containerd/certs.d/gcr.io/hosts.toml
server = "https://gcr.io"
[host."https://gcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
EOF
cat <<EOF | sudo tee /etc/containerd/certs.d/registry.k8s.io/hosts.toml
server = "https://registry.k8s.io"
[host."https://k8s.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
EOF
cat <<EOF | sudo tee /etc/containerd/certs.d/quay.io/hosts.toml
server = "https://quay.io"
[host."https://quay.mirrors.ustc.edu.cn"]
EOF
# 测试
crictl pull image --debug
重启
reboot
安装 k8s 集群
kubeadm 是 k8s 官方提供的集群管理工具,提供安装 / 升级集群、证书管理等功能,下面是使用 kubeadm 安装集群的具体步骤。
安装 kubeadm
# 设置yum源
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum list kubeadm.x86_64 --showduplicates | sort -r
# 安装
yum install -y kubeadm
# 配置
cat <<EOF | sudo tee /etc/sysconfig/kubelet
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"
EOF
# 提前开启kubelet
## 启动失败可忽略
systemctl daemon-reload && systemctl enable --now kubelet
# 配置crictl
crictl config runtime-endpoint unix:///run/containerd/containerd.sock
crictl config image-endpoint unix:///run/containerd/containerd.sock
# 检查crictl是否正常
crictl image list
# 自动补全
vi $HOME/.bashrc
...
source <(crictl completion bash)
source $HOME/.bashrc
初始化第一个 control plane 节点
首先,创建节点配置文件 kubeadm-config.yaml。
IMPORTANT
需要根据服务器替换文件中的参数
# ~/deploy/kubeadm/kubeadm-config.yaml
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
localAPIEndpoint:
# 本机IP
advertiseAddress: {local_ip}
bindPort: 6443
nodeRegistration:
# containerd 套接字
criSocket: unix:///run/containerd/containerd.sock
# 工作负载不在 controller plane 运行
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: v1.28.1
etcd:
local:
dataDir: /data/etcd
networking:
dnsDomain: cluster.local
# pod 网段
podSubnet: 172.168.0.0/16
# service 网段
serviceSubnet: 10.96.0.0/12
scheduler: {}
apiServer:
certSANs:
# 本机ip
- {local_ip}
timeoutForControlPlane: 4m0s
certificatesDir: /etc/kubernetes/pki
clusterName: my-k8s-cluster
controlPlaneEndpoint: {master_vip}:6443
controllerManager: {}
dns: {}
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
然后,使用以下命令初始化 control plane 节点:
cd ~/deploy/kubeadm
# 转换为最新版本的配置
kubeadm config migrate --old-config kubeadm-config.yaml \
--new-config kubeadm-config-release.yaml
# 提前拉取镜像(可检查是否可以正常拉取镜像)
kubeadm config images pull --config kubeadm-config-release.yaml
#初始化
kubeadm init --config kubeadm-config-release.yaml --upload-certs
成功后会显示以下信息:
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 10.113.228.179:6443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:e4e32f69e38947fea3cd84e292da186d45c0ecdfffed06fd634926ef6db39087 \
--control-plane --certificate-key 99bdaeb61f474f5864cf29f662fe17c727f09846fb915de7e282f87ee54a4445
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.113.228.179:6443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:e4e32f69e38947fea3cd84e292da186d45c0ecdfffed06fd634926ef6db39087
为节点配置 kubectl
这里我们选择第一个 control plane 节点配置 kubectl。
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 检查
kubectl get pod -n kube-system
# 设置别名和自动补全
vi ~/.bashrc
alias k=kubectl
source <(kubectl completion bash | sed s/kubectl/k/g)
source <(kubeadm completion bash)
source ~/.bashrc
# check
k get pod -n kube-system
将其他节点加入集群
Control plane:
kubeadm join 10.113.228.179:6443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:e4e32f69e38947fea3cd84e292da186d45c0ecdfffed06fd634926ef6db39087 \
--control-plane --certificate-key 99bdaeb61f474f5864cf29f662fe17c727f09846fb915de7e282f87ee54a4445
Worker node:
kubeadm join 10.113.228.179:6443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:e4e32f69e38947fea3cd84e292da186d45c0ecdfffed06fd634926ef6db39087
注意:若token过期,则需重新生成token:
kubeadm token create --print-join-command
kubeadm init phase upload-certs --upload-certs
全部加入完成后,可以使用以下命令查看集群的节点:
kubectl get node
NAME STATUS ROLES AGE VERSION
dev-vm-001 NotReady control-plane 84m v1.28.1
dev-vm-002 NotReady control-plane 32m v1.28.1
dev-vm-003 NotReady control-plane 19m v1.28.1
dev-vm-004 NotReady <none> 16m v1.28.1
dev-vm-005 NotReady <none> 16m v1.28.1
dev-vm-006 NotReady <none> 9m43s v1.28.1
[可选] 修改 kube-proxy 使用 ipvs 模式
使用以下命令更改 kube-proxy 配置:
# edit kube-proxy config
kubectl -n kube-system edit cm kube-proxy
将 config map 中的 mode
字段修改为 ipvs
:
apiVersion: v1
data:
config.conf: |-
#...
mode: "ipvs"
保存更改后,执行以下命令更新 kube-proxy
# restart kube-proxy pods
kubectl -n kube-system rollout restart daemonset kube-proxy
# check
curl 127.0.0.1:10249/proxyMode
安装必要插件
集群安装完成后,还需安装额外的插件为集群提供必须的功能,如网络、性能监控等。
安装 helm
Helm 是 k8s 的一个包管理工具,它可以帮助我们更方便的在将应用部署到 k8s 集群中。
在安装其他组件之前,我们首先在配置了 kubectl 的节点上安装 helm。
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
mv /usr/local/bin/helm /bin/
# 自动补全
vi ~/.bashrc
...
source <(helm completion bash)
source ~/.bashrc
安装 Antrea 网络插件
helm repo add antrea https://charts.antrea.io
helm upgrade --install antrea antrea/antrea \
--version 1.13.1 \
--create-namespace \
-n addons
kubectl get po -n kube-system -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
antrea-agent-4dxln 2/2 Running 0 35s 10.174.64.127 dev-vm-005 <none> <none>
antrea-agent-4s9vn 2/2 Running 0 35s 10.174.64.135 dev-vm-002 <none> <none>
antrea-agent-c4shq 2/2 Running 0 35s 10.174.71.107 dev-vm-001 <none> <none>
antrea-agent-czp9z 2/2 Running 0 35s 10.174.76.166 dev-vm-004 <none> <none>
antrea-agent-g5lq8 2/2 Running 0 35s 10.174.75.54 dev-vm-003 <none> <none>
antrea-agent-mtsm9 2/2 Running 0 35s 10.174.71.99 dev-vm-006 <none> <none>
antrea-controller-65f9d9fcfb-wjlb4 1/1 Running 0 35s 10.174.64.127 dev-vm-005 <none> <none>
antrea-ui-5dcc7dfb48-krz7v 2/2 Running 0 21s 172.168.5.2 dev-vm-006 <none> <none>
...
kubectl get node
NAME STATUS ROLES AGE VERSION
dev-vm-001 Ready control-plane 21h v1.28.1
dev-vm-002 Ready control-plane 20h v1.28.1
dev-vm-003 Ready control-plane 20h v1.28.1
dev-vm-004 Ready <none> 20h v1.28.1
dev-vm-005 Ready <none> 20h v1.28.1
dev-vm-006 Ready <none> 20h v1.28.1
安装 metrics-server
# 修改配置文件
cd ~/deploy/metrics-server
cat <<EOF | sudo tee values.yaml
args:
- --kubelet-preferred-address-types=InternalIP
- --kubelet-insecure-tls
EOF
# 启动
helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
helm upgrade --install metrics-server metrics-server/metrics-server \
--create-namespace \
-n addons \
-f values.yaml
# 检查
kubectl top node