集群安装

一、说明

生产环境中,建议使用小版本大于5Kubernetes版本,比如1.20.5以后的才可用于生产环境。

使用 kubeadm 部署高可用集群时,etcd集群可以放到 master 节点上可节约资源且方便管理,也可以使用外部的etcd集群,可用性更高(这种方式安装的集群,证书有效期只有一年,需要定期轮换)。

二、基本环境配置

环境检查官方文档

  • 兼容的 Linux 主机。
  • 每台机器CPU 2 核心、RAM 2 GB及以上。
  • 集群中的所有机器的网络彼此均能相互连接(公网和内网都可以)。
  • 节点之中不可以有重复的主机名、MAC 地址或 product_uuid
  • 开启机器上的某些端口
  • 交换分区的配置(建议禁用)。
  • Linux操作系统需要包含glibc环境。
Hostname IP OS Kernel VM specifications Desc
Master-121-123 192.168.111.121-123 CentOS 7.9 5.4 2c4g 100GB
Worker-124-126 192.168.111.124-126 CentOS 7.9 5.4 4c8g 100GB
LB-127 192.168.111.127 CentOS 7.9 5.4 2c4g 20 GB VIP: 192.168.111.100
Middleware-128 192.168.111.128 CentOS 7.9 5.4 4c8g 500GB MySQL/Redis…

K8s Service、Pod网段划分

Name DESC
Pod网段 172.16.0.0/12
Service网段 172.20.0.0/16

自动化工具准备

  • 配置runsh自动化工具,可相互连接到 master/worker 节点的Linux机器,比如:Master-121
runsh 自动化脚本工具
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
# Batch processing tools
# -->> User modification <<--
host_port=22
host_user="root"
# -->> User modification <<--
user_option=$1
host_ips=$2

case ${user_option} in
cmd)
exec_command=$(cat <<EOF
$3
EOF
)
for host_ip in $(cat ${host_ips});do
echo -e "--->>> ${host_ip} <<<---"
#result=$(ssh -p $host_port -T ${host_user}@${host_ip} -C ${exec_command})
ssh -p $host_port -T ${host_user}@${host_ip} -C "${exec_command}"
echo -e "\n"
#if [ $? -ne 0 ];then
# echo -e "Faild.\n"
#fi
done
;;

scp)
source_file_dir=$3
target_file_dir=$4
for host_ip in $(cat ${host_ips});do
echo -e "--->>> ${host_ip} <<<---"
scp -P ${host_port} ${source_file_dir} "${host_user}@${host_ip}:${target_file_dir}"
if [ $? -ne 0 ];then
echo -e "Faild.\n"
fi
done
;;

help)
echo 'Usage: runsh [ OPTIONS ] OBJECT COMMAND
where OPTIONS := { cmd | scp }
OBJECT := /path/to/ip_file_directory
COMMAND := { "bash command" |
/path/to/source_file_directory[Using "" to include multiple files] /path/to/target_file_directory
}'
;;

*)
echo -e "Object \"${user_option}\" is unknown, try 'runsh help'"
#echo -e "-runsh: ${user_option} Unknown command, try 'runsh help'"
exit 1
;;
esac
  • 配置免密(Master-121)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# deploy-node = master-121
# 生成密钥(deploy-node)
ssh-keygen -t rsa -f /root/.ssh/id_rsa -N ''
# cat /opt/deploy-k8s/ips/node.ips
192.168.111.121
192.168.111.122
192.168.111.123
192.168.111.124
192.168.111.125
192.168.111.127

# 根据提示输入密码
for host_ip in $(cat ips/node.ips);do
ssh-copy-id -i ~/.ssh/id_rsa.pub root@${host_ip}
done

# 为 Master/Worker节点添加主机名解析(deploy-node)
# 第一次执行可能会有点慢
runsh cmd ips/node.ips "echo '192.168.111.121 master-121
192.168.111.122 master-122
192.168.111.123 master-123
192.168.111.124 worker-124
192.168.111.125 worker-125
192.168.111.127 lb-127' >> /etc/hosts"

安装常用软件

  • 安装必备工具(所有节点)
1
2
3
4
5
6
# deploy-node
# 配置yum源
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
runsh scp ips/all.ips /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/
runsh cmd ips/node.ips "yum install -y vim wget net-tools yum-utils tree git psmisc device-mapper-persistent-data lvm2"

关闭 Swap 分区

  • 确保禁用Swap分区(Master/Worker节点)
1
2
3
4
5
6
7
# deploy-node
# 关闭swap分区
runsh cmd ips/node.ips "swapoff -a"
runsh cmd ips/node.ips "sed -i '/^[^#].*swap/s/^/#/' /etc/fstab"

# 检查
runsh cmd ips/node.ips "free -h | grep Swap"

检查 Mac 地址、product_uuid

  • 确保Mac地址唯一(Master/Worker节点)
  • 确保product_uuid唯一(Master/Worker节点)
1
2
3
# deploy-node
runsh cmd ips/node.ips "ip addr show ens33 | awk '/ether/{print \$2}'"
runsh cmd ips/node.ips "cat /sys/class/dmi/id/product_uuid"

关闭 firewalld、selinux、dnsmasq 服务

  • CentOS 7 需要关闭NetworkManager,CentOS 8 不需要(Master/Worker节点)
1
2
3
4
5
6
7
8
9
# deploy-node
runsh cmd ips/node.ips "systemctl disable --now firewalld; systemctl disable --now dnsmasqd; systemctl disable --now NetworkManager"
runsh cmd ips/node.ips "setenforce 0; sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"

# 检查
runsh cmd ips/node.ips "systemctl is-active firewalld; systemctl is-enabled firewalld"
runsh cmd ips/node.ips "systemctl is-active dnsmasq; systemctl is-enabled dnsmasq"
runsh cmd ips/node.ips "systemctl is-active NetworkManager; systemctl is-enabled NetworkManager"
runsh cmd ips/node.ips "getenforce; grep '^SELINUX=' /etc/selinux/config"

检查时区及时间同步

  • 确保时区准确并开启时间同步(所有节点)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# deploy-node
runsh cmd ips/all.ips "ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime"
runsh cmd ips/all.ips "echo 'Asia/Shanghai' > /etc/timezone"

# 安装时间同步服务
runsh cmd ips/all.ips "yum install -y chrony && systemctl enable chronyd --now"

# 查看时间同步
runsh cmd ips/node.ips "systemctl is-active chronyd; systemctl is-enabled chronyd; chronyc sources stats"

# 查看时区
runsh cmd ips/node.ips "date '+%F %T %z'"

# 手动同步时间
runsh cmd ips/node.ips "chronyc makestep"
runsh cmd ips/node.ips "systemctl restart chronyd"

配置limits.conf

  • 配置limit(所有节点)
1
2
3
4
5
6
7
8
# deploy-node
runsh cmd ips/all.ips "ulimit -SHn 65535"
runsh cmd ips/all.ips "echo '* soft nofile 65536
* hard nofile 131072
* soft nproc 65535
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited' >> /etc/security/limits.conf"

升级内核版本

  • 升级内核版本到5.4(所有节点)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# deploy-node
mkdir -p /opt/deploy-k8s/repos
wget -P /opt/deploy-k8s/repos https://elrepo.org/linux/kernel/el7/x86_64/RPMS/kernel-lt-5.4.275-1.el7.elrepo.x86_64.rpm
wget -P /opt/deploy-k8s/repos https://elrepo.org/linux/kernel/el7/x86_64/RPMS/kernel-lt-devel-5.4.275-1.el7.elrepo.x86_64.rpm

# 安装内核
runsh scp ips/all.ips "repos/kernel-lt.*.rpm" /usr/local/src/
runsh cmd ips/all.ips "cd /usr/local/src && yum localinstall -y kernel-lt*"

# 更改内核启动顺序
# 根据系统当前的内核配置和安装情况,自动生成 GRUB 的菜单配置文件
runsh cmd ips/node.ips "grub2-set-default 0 && grub2-mkconfig -o /etc/grub2.cfg"
runsh cmd ips/node.ips "grubby --args=\"user_namespace.enable=1\" --update-kernel=\"\$(grubby --default-kernel)\""

# 检查默认启动内核
runsh cmd ips/node.ips "grubby --default-kernel"

# 重启服务器
runsh cmd ips/node.ips "reboot"
  • yum 升级二选一(所有节点)
    1
    2
    3
    4
    5
    rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org &&\
    yum install -y https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm &&\
    kernel_version=$(yum --disablerepo="*" --enablerepo="elrepo-kernel" list available | awk '/lt.x86/{print $2}') &&\
    yum --enablerepo=elrepo-kernel install -y kernel-lt-devel-${kernel_version}.x86_64 kernel-lt-${kernel_version}.x86_64 &&\
    grub2-set-default 0
  • K8s必要内核参数配置(Master/Worker节点)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# cat conf/k8s-sysctl.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory = 1
net.ipv4.conf.all.route_localnet = 1

vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720

net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384

# 发送到master/worker节点
runsh scp ips/node.ips conf/k8s-sysctl.conf /etc/sysctl.d/k8s-sysctl.conf
runsh cmd ips/node.ips "sysctl --system"

# 其他内核参数(待确认)
net.core.netdev_max_backlog = 65535
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_max_syn_backlog = 65535
net.core.somaxconn = 65535
net.ipv4.tcp_max_tw_buckets = 8192
net.ipv4.ip_local_port_range = 10240 65000
vm.swappiness = 0

安装ipvsadm客户端工具

  • 所有节点安装
1
2
3
4
# deploy-node
# runsh cmd ips/node.ips "yum install -y ipset ipvsadm sysstat conntrack libseccomp"
runsh cmd ips/node.ips "yum install -y ipset ipvsadm"
runsh cmd ips/node.ips "which ipset ipvsadm"

开启ipvs和br_netfilter内核模块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# deploy-node
# mkdir -p /opt/deploy-k8s/conf
# cat conf/ipvs-modules.conf
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
br_netfilter
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip

# 加载模块
runsh scp ips/node.ips conf/ipvs-modules.conf /etc/modules-load.d/ipvs-modules.conf
runsh cmd ips/node.ips "systemctl enable systemd-modules-load.service --now"
# runsh cmd ips/node.ips "awk '{print \"modprobe --\",\$1}' /etc/modules-load.d/ipvs-module.conf | bash"

# 查看模块是否成功加载
runsh cmd ips/node.ips "lsmod | awk '/^ip_vs|^nf_conntrack|^br_netfilter/{print \$1}'"

三、基本组件安装

安装 runtime 组件

安装 containerd
  • 有两种部署方式
    • 1、使用docker-ce仓库安装
    • 2、使用二进制包安装

二进制包安装containerd(Master/Worker节点)

1
2
3
4
5
6
7
8
9
10
11
12
13
# deploy-node
mkdir -p /opt/deploy-k8s/containerd
wget https://github.com/containerd/containerd/releases/download/v1.7.16/containerd-1.7.16-linux-amd64.tar.gz -O /opt/deploy-k8s/containerd/containerd.tar.gz
wget https://github.com/opencontainers/runc/releases/download/v1.1.12/runc.amd64 -O /opt/deploy-k8s/containerd/runc

runsh scp ips/node.ips containerd/containerd.tar.gz /tmp/
runsh cmd ips/node.ips "mkdir -p /usr/local/containerd/etc"
runsh cmd ips/node.ips "tar xf /tmp/containerd.tar.gz -C /usr/local/containerd/"
runsh cmd ips/node.ips "ln -s /usr/local/containerd/bin/* /usr/local/bin/"
runsh scp ips/node.ips containerd/containerd.service /usr/lib/systemd/system/
runsh scp ips/node.ips containerd/config.toml /usr/local/containerd/etc/
chmod +x containerd/runc
runsh scp ips/node.ips containerd/runc /usr/local/bin/
  • containerd.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    # you may not use this file except in compliance with the License.
    # You may obtain a copy of the License at
    #
    # http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.

    [Unit]
    Description=containerd container runtime
    Documentation=https://containerd.io
    After=network.target local-fs.target

    [Service]
    ExecStartPre=-/sbin/modprobe overlay
    ExecStart=/usr/local/bin/containerd

    Type=notify
    Delegate=yes
    KillMode=process
    Restart=always
    RestartSec=5
    # Having non-zero Limit*s causes performance problems due to accounting overhead
    # in the kernel. We recommend using cgroups to do container-local accounting.
    LimitNPROC=infinity
    LimitCORE=infinity
    LimitNOFILE=infinity
    # Comment TasksMax if your systemd version does not supports it.
    # Only systemd 226 and above support this version.
    TasksMax=infinity
    OOMScoreAdjust=-999

    [Install]
    WantedBy=multi-user.target

containerd 配置文件中,已经添加了 docker.io镜像加速,SystemdCgroup = true以及sandbox_image

  • config.toml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    disabled_plugins = []
    imports = []
    oom_score = 0
    plugin_dir = ""
    required_plugins = []
    root = "/var/lib/containerd"
    state = "/run/containerd"
    temp = ""
    version = 2

    [cgroup]
    path = ""

    [debug]
    address = ""
    format = ""
    gid = 0
    level = ""
    uid = 0

    [grpc]
    address = "/run/containerd/containerd.sock"
    gid = 0
    max_recv_message_size = 16777216
    max_send_message_size = 16777216
    tcp_address = ""
    tcp_tls_ca = ""
    tcp_tls_cert = ""
    tcp_tls_key = ""
    uid = 0

    [metrics]
    address = ""
    grpc_histogram = false

    [plugins]

    [plugins."io.containerd.gc.v1.scheduler"]
    deletion_threshold = 0
    mutation_threshold = 100
    pause_threshold = 0.02
    schedule_delay = "0s"
    startup_delay = "100ms"

    [plugins."io.containerd.grpc.v1.cri"]
    device_ownership_from_security_context = false
    disable_apparmor = false
    disable_cgroup = false
    disable_hugetlb_controller = true
    disable_proc_mount = false
    disable_tcp_service = true
    enable_selinux = false
    enable_tls_streaming = false
    enable_unprivileged_icmp = false
    enable_unprivileged_ports = false
    ignore_image_defined_volumes = false
    max_concurrent_downloads = 3
    max_container_log_line_size = 16384
    netns_mounts_under_state_dir = false
    restrict_oom_score_adj = false
    sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.7"
    selinux_category_range = 1024
    stats_collect_period = 10
    stream_idle_timeout = "4h0m0s"
    stream_server_address = "127.0.0.1"
    stream_server_port = "0"
    systemd_cgroup = false
    tolerate_missing_hugetlb_controller = true
    unset_seccomp_profile = ""

    [plugins."io.containerd.grpc.v1.cri".cni]
    bin_dir = "/opt/cni/bin"
    conf_dir = "/etc/cni/net.d"
    conf_template = ""
    ip_pref = ""
    max_conf_num = 1

    [plugins."io.containerd.grpc.v1.cri".containerd]
    default_runtime_name = "runc"
    disable_snapshot_annotations = true
    discard_unpacked_layers = false
    ignore_rdt_not_enabled_errors = false
    no_pivot = false
    snapshotter = "overlayfs"

    [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = ""

    [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = "io.containerd.runc.v2"

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
    BinaryName = ""
    CriuImagePath = ""
    CriuPath = ""
    CriuWorkPath = ""
    IoGid = 0
    IoUid = 0
    NoNewKeyring = false
    NoPivotRoot = false
    Root = ""
    ShimCgroup = ""
    SystemdCgroup = true

    [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = ""

    [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".image_decryption]
    key_model = "node"

    [plugins."io.containerd.grpc.v1.cri".registry]
    config_path = ""

    [plugins."io.containerd.grpc.v1.cri".registry.auths]

    [plugins."io.containerd.grpc.v1.cri".registry.configs]

    [plugins."io.containerd.grpc.v1.cri".registry.headers]

    [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
    [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
    endpoint = ["https://q2gr04ke.mirror.aliyuncs.com"]

    [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
    tls_cert_file = ""
    tls_key_file = ""

    [plugins."io.containerd.internal.v1.opt"]
    path = "/opt/containerd"

    [plugins."io.containerd.internal.v1.restart"]
    interval = "10s"

    [plugins."io.containerd.internal.v1.tracing"]
    sampling_ratio = 1.0
    service_name = "containerd"

    [plugins."io.containerd.metadata.v1.bolt"]
    content_sharing_policy = "shared"

    [plugins."io.containerd.monitor.v1.cgroups"]
    no_prometheus = false

    [plugins."io.containerd.runtime.v1.linux"]
    no_shim = false
    runtime = "runc"
    runtime_root = ""
    shim = "containerd-shim"
    shim_debug = false

    [plugins."io.containerd.runtime.v2.task"]
    platforms = ["linux/amd64"]
    sched_core = false

    [plugins."io.containerd.service.v1.diff-service"]
    default = ["walking"]

    [plugins."io.containerd.service.v1.tasks-service"]
    rdt_config_file = ""

    [plugins."io.containerd.snapshotter.v1.aufs"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.btrfs"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.devmapper"]
    async_remove = false
    base_image_size = ""
    discard_blocks = false
    fs_options = ""
    fs_type = ""
    pool_name = ""
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.native"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.overlayfs"]
    root_path = ""
    upperdir_label = false

    [plugins."io.containerd.snapshotter.v1.zfs"]
    root_path = ""

    [plugins."io.containerd.tracing.processor.v1.otlp"]
    endpoint = ""
    insecure = false
    protocol = ""

    [proxy_plugins]

    [stream_processors]

    [stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
    args = ["--decryption-keys-path", "/usr/local/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/usr/local/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar"

    [stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
    args = ["--decryption-keys-path", "/usr/local/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/usr/local/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar+gzip"

    [timeouts]
    "io.containerd.timeout.bolt.open" = "0s"
    "io.containerd.timeout.shim.cleanup" = "5s"
    "io.containerd.timeout.shim.load" = "5s"
    "io.containerd.timeout.shim.shutdown" = "3s"
    "io.containerd.timeout.task.state" = "2s"

    [ttrpc]
    address = ""
    gid = 0
    uid = 0
  • 重载systemctl配置文件,启动containerd
1
runsh cmd ips/node.ips "systemctl daemon-reload; systemctl enable containerd --now"
安装 crictl

crictl是兼容CRI接口的容器调试工具
crictl调试Kubernetes节点参考文档

二进制包安装 crictl(Master/Worker节点)

1
2
3
4
5
6
7
8
9
# deploy-node
mkdir -p /opt/deploy-k8s/crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.30.0/crictl-v1.30.0-linux-amd64.tar.gz -O /opt/deploy-k8s/crictl/crictl.tar.gz
runsh scp ips/node.ips crictl/crictl.tar.gz /tmp/
runsh scp ips/node.ips crictl/crictl.yaml /usr/local/containerd/etc/
runsh cmd ips/node.ips "tar xf /tmp/crictl.tar.gz -C /usr/local/bin"
runsh cmd ips/node.ips "chmod +x /usr/local/bin/crictl"
runsh cmd ips/node.ips "mkdir -p /usr/local/containerd/etc/bash_completion.d/"
runsh cmd ips/node.ips "crictl completion bash > /usr/local/containerd/etc/bash_completion.d/crictl"
  • crictl.yaml
    1
    2
    3
    4
    runtime-endpoint: unix:///run/containerd/containerd.sock
    image-endpoint: unix:///run/containerd/containerd.sock
    timeout: 10
    debug: false # debug调试的时候设置为true

安装 k8s 及 etcd

安装 k8s
  • Master/Worker节点

    • Yum方式安装

      CentOS / RHEL / Fedora(目前该源支持 v1.24 - v1.29 版本,后续版本会持续更新)
      Aliyun Kubernetes Source
      由于官网未开放同步方式, 可能会有索引gpg检查失败的情况
      这时请用 yum install -y –nogpgcheck kubelet kubeadm kubectl 安装

      1
      2
      3
      4
      5
      6
      7
      8
      9
      # deploy-node
      cd /opt/deploy-k8s
      mkdir /opt/deploy-k8s/repos
      runsh scp ips/node.ips repos/kubernetes.repo /etc/yum.repos.d/
      # runsh cmd ips/node.ips "yum search kubelet kubeadm kubectl --disableexcludes=kubernetes --showduplicates|grep 1.28"
      runsh cmd ips/node.ips "yum install -y --disableexcludes=kubernetes kubelet-1.28.8 kubeadm-1.28.8 kubectl-1.28.8"
      runsh cmd ips/node.ips "systemctl enable kubelet --now"
      # yum安装kubelet时会自动安装CNI插件,安装containerd时官方文档推荐安装新版本的CNI,可能存在冲突
      runsh cmd ips/node.ips "ls /opt/cni"
      1
      2
      3
      4
      5
      6
      7
      # cat repos/kubernetes.repo
      [kubernetes]
      name=Kubernetes
      baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/
      enabled=1
      gpgcheck=1
      gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/repodata/repomd.xml.key
  • 二进制方式安装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# deploy-node
cd /opt/deploy-k8s
mkdir -p /opt/deploy-k8s/repos
wget -P /opt/deploy-k8s/repos https://dl.k8s.io/v1.28.8/kubernetes-server-linux-amd64.tar.gz
tar xf repos/kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}

runsh scp ips/master.ips "/usr/local/bin/kube*" /usr/local/bin/
runsh scp ips/worker.ips /usr/local/bin/kubelet /usr/local/bin/
runsh scp ips/worker.ips /usr/local/bin/kube-proxy /usr/local/bin/
# runsh cmd ips/node.ips "systemctl enable kubelet --now"
# runsh cmd ips/node.ips "kubelet --version"
# 安装cni
runsh cmd ips/node.ips "mkdir -p /opt/cni/bin"

安装 etcd
  • Master节点安装 etcd(或 etcd 集群)
1
2
3
4
5
6
7
8
9
10
11
12
13
# deploy-node
cat ips/etcd.ips
192.168.111.121
192.168.111.122
192.168.111.123

etcd_version=v3.5.13
mkdir -p /opt/deploy-k8s/etcd
wget -P /opt/deploy-k8s/etcd/ https://github.com/etcd-io/etcd/releases/download/v3.5.13/etcd-v3.5.13-linux-amd64.tar.gz
tar xf /opt/deploy-k8s/etcd/etcd-${etcd_version}-linux-amd64.tar.gz -C /usr/local/bin/ --strip-components=1 etcd-${etcd_version}-linux-amd64/etcd{,ctl}

runsh scp ips/etcd.ips "/usr/local/bin/etcd*" /usr/local/bin/
runsh cmd ips/etcd.ips "etcdctl version"

下载集群安装工具

  • Master-121节点操作
1
2
3
4
5
# deploy-node
# 安装 v1.28.8 版本
cd /opt/deploy-k8s/
git clone https://github.com/dotbalo/k8s-ha-install.git
cd k8s-ha-install && git checkout manual-installation-v1.28.x

四、生成证书

生成 k8s 组件证书

下载生成证书工具
1
2
3
4
5
# deploy-node = master-121
wget "https://pkg.cfssl.org/R1.2/cfssl_linux-amd64" -O /usr/local/bin/cfssl
wget "https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64" -O /usr/local/bin/cfssljson
chmod +x /usr/local/bin/cfssl /usr/local/bin/cfssljson
which cfssl cfssljson
创建证书存放目录
1
2
3
4
5
6
7
8
# deploy-node
cd /opt/deploy-k8s

# 所有节点,创建kubernetes相关目录
runsh cmd ips/node.ips "mkdir -p /etc/kubernetes/pki"

# Master节点,创建etcd证书目录
runsh cmd ips/etcd.ips "mkdir -p /etc/etcd/ssl"
生成 kubernetes 证书
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# deploy-node
# Master节点,生成kubernetes证书
cd /opt/deploy-k8s/k8s-ha-install/pki
cfssl gencert -initca ca-csr.json | cfssljson -bare /etc/kubernetes/pki/ca

# 172.20.0.1 为 k8s service 网段,根据实际情况更改
# 192.168.111.100 为 VIP,如果不是高可用集群,则为 Master01 的IP,根据实际情况更改
# 192.168.111.121,192.168.111.122,192.168.111.123 为 master节点IP,根据实际情况更改
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-hostname=172.20.0.1,192.168.111.100,127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,192.168.111.121,192.168.111.122,192.168.111.123 \
-profile=kubernetes apiserver-csr.json | cfssljson -bare /etc/kubernetes/pki/apiserver

# 生成 apiserver 的聚合证书
# Requestheader-client-xxx requestheader-allowwd-xxx:aggerator
cfssl gencert -initca front-proxy-ca-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-ca
cfssl gencert \
-ca=/etc/kubernetes/pki/front-proxy-ca.pem \
-ca-key=/etc/kubernetes/pki/front-proxy-ca-key.pem \
-config=ca-config.json \
-profile=kubernetes front-proxy-client-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-client

# 忽略返回结果的警告
> [WARNING] This certificate lacks a "hosts" field. This makes it unsuitable for
websites. For more information see the Baseline Requirements for the Issuance and Management
of Publicly-Trusted Certificates, v.1.1.6, from the CA/Browser Forum (https://cabforum.org);
specifically, section 10.2.3 ("Information Requirements").

# 生成 controller-manage 的证书
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
manager-csr.json | cfssljson -bare /etc/kubernetes/pki/controller-manager

# 注意,192.168.111.100 为VIP,如果不是高可用集群,改为 master01 的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context system:kube-controller-manager@kubernetes \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=/etc/kubernetes/pki/controller-manager.pem \
--client-key=/etc/kubernetes/pki/controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context system:kube-controller-manager@kubernetes \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
scheduler-csr.json | cfssljson -bare /etc/kubernetes/pki/scheduler

# 注意,如果不是高可用集群,192.168.111.100:8443改为master01的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials system:kube-scheduler \
--client-certificate=/etc/kubernetes/pki/scheduler.pem \
--client-key=/etc/kubernetes/pki/scheduler-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context system:kube-scheduler@kubernetes \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context system:kube-scheduler@kubernetes \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
admin-csr.json | cfssljson -bare /etc/kubernetes/pki/admin

# 注意,如果不是高可用集群,192.168.111.100:8443改为master01的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials kubernetes-admin \
--client-certificate=/etc/kubernetes/pki/admin.pem \
--client-key=/etc/kubernetes/pki/admin-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context kubernetes-admin@kubernetes \
--cluster=kubernetes \
--user=kubernetes-admin \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context kubernetes-admin@kubernetes \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 创建ServiceAccount Key --> secret
openssl genrsa -out /etc/kubernetes/pki/sa.key 2048
openssl rsa -in /etc/kubernetes/pki/sa.key -pubout -out /etc/kubernetes/pki/sa.pub

# 发送证书到其他Master节点
runsh scp /opt/deploy-k8s/ips/master.ips "/etc/kubernetes/pki/*" /etc/kubernetes/pki/
runsh scp /opt/deploy-k8s/ips/master.ips "/etc/kubernetes/*.kubeconfig" /etc/kubernetes/

# 验证
runsh cmd /opt/deploy-k8s/ips/master.ips "ls /etc/kubernetes/pki"
admin.csr apiserver.csr ca.csr controller-manager.csr front-proxy-ca.csr front-proxy-client.csr sa.key scheduler-key.pem
admin-key.pem apiserver-key.pem ca-key.pem controller-manager-key.pem front-proxy-ca-key.pem front-proxy-client-key.pem sa.pub scheduler.pem
admin.pem apiserver.pem ca.pem controller-manager.pem front-proxy-ca.pem front-proxy-client.pem scheduler.csr

runsh cmd /opt/deploy-k8s/ips/master.ips "ls /etc/kubernetes/pki |wc -l"
23
生成 etcd 证书
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# deploy-node
# Master节点,生成etcd证书
# master-121,master-122,k8s-master-123,192.168.111.121,192.168.111.122,192.168.111.123 为 etcd 节点的主机名和ip,根据实际情况更改
cd /opt/deploy-k8s/k8s-ha-install/pki
cfssl gencert -initca etcd-ca-csr.json | cfssljson -bare /etc/etcd/ssl/etcd-ca
cfssl gencert \
-ca=/etc/etcd/ssl/etcd-ca.pem \
-ca-key=/etc/etcd/ssl/etcd-ca-key.pem \
-config=ca-config.json \
-hostname=127.0.0.1,master-121,master-122,k8s-master-123,192.168.111.121,192.168.111.122,192.168.111.123 \
-profile=kubernetes \
etcd-csr.json | cfssljson -bare /etc/etcd/ssl/etcd

# Master节点,将证书复制到其他节点
runsh scp /opt/deploy-k8s/ips/etcd.ips "/etc/etcd/ssl/etcd*" /etc/etcd/ssl/

五、安装 etcd

配置 etcd

etcd配置大致相同,根据实际情况修改每个Master节点etcd配置的主机名和IP地址

  • master-121,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-121/etcd.config.yml
    name: 'master-121'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.121:2380'
    listen-client-urls: 'https://192.168.111.121:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.121:2380'
    advertise-client-urls: 'https://192.168.111.121:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • master-122,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-122/etcd.config.yml
    name: 'master-122'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.122:2380'
    listen-client-urls: 'https://192.168.111.122:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.122:2380'
    advertise-client-urls: 'https://192.168.111.122:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • master-123,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-123/etcd.config.yml
    name: 'master-123'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.123:2380'
    listen-client-urls: 'https://192.168.111.123:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.123:2380'
    advertise-client-urls: 'https://192.168.111.123:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • 将配置文件复制到 etcd节点
1
2
3
4
5
6
7
# deploy-node
cd /opt/deploy-k8s
mkdir /opt/deploy-k8s/etcd/master-12{1,2,3}
scp /opt/deploy-k8s/etcd/master-121/etcd.config.yml root@192.168.111.121:/etc/etcd/
scp /opt/deploy-k8s/etcd/master-122/etcd.config.yml root@192.168.111.122:/etc/etcd/
scp /opt/deploy-k8s/etcd/master-123/etcd.config.yml root@192.168.111.123:/etc/etcd/

配置 Service

  • 所有 Master节点,创建 etcd service 并启动
  • etcd.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # deploy-node
    # cat /opt/deploy-k8s/etcd/etcd.service
    [Unit]
    Description=Etcd Service
    Documentation=https://coreos.com/etcd/docs/latest/
    After=network.target

    [Service]
    Type=notify
    ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.config.yml
    Restart=on-failure
    RestartSec=10
    LimitNOFILE=65536

    [Install]
    WantedBy=multi-user.target
    Alias=etcd3.service

    # 复制 etcd.service 到所有 master节点
    runsh scp ips/master.ips /opt/deploy-k8s/etcd/etcd.service /usr/lib/systemd/system/
  • 所有 Master节点 创建 etcd 的证书目录
1
2
3
4
5
# deploy-node
runsh cmd ips/master.ips "mkdir -p /etc/kubernetes/pki/etcd"
runsh cmd ips/master.ips "ln -s /etc/etcd/ssl/* /etc/kubernetes/pki/etcd/"
runsh cmd ips/master.ips "systemctl daemon-reload"
runsh cmd ips/master.ips "systemctl enable etcd --now"
  • 查看 etcd 状态
1
2
3
4
5
6
7
8
# etcd-node
export ETCDCTL_API=3
etcdctl \
--endpoints="192.168.111.123:2379,192.168.111.122:2379,192.168.111.121:2379" \
--cacert=/etc/kubernetes/pki/etcd/etcd-ca.pem \
--cert=/etc/kubernetes/pki/etcd/etcd.pem \
--key=/etc/kubernetes/pki/etcd/etcd-key.pem endpoint status \
--write-out=table

etcd集群状态

六、高可用配置

安装 HAproxy 和 Keepalived

Slb –> haproxy –> apiserver

yum 安装keepalived和haproxy
  • 所有Master节点
1
2
# deploy-node
runsh cmd ips/master.ips "yum install -y keepalived haproxy"
配置HAProxy
  • 所有Master节点配置都一样

  • haproxy.cfg
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    global
    maxconn 2000
    ulimit-n 16384
    log 127.0.0.1 local0 err
    stats timeout 30s

    defaults
    log global
    mode http
    option httplog
    timeout connect 5000
    timeout client 50000
    timeout server 50000
    timeout http-request 15s
    timeout http-keep-alive 15s

    frontend k8s-master
    bind 0.0.0.0:8443
    bind 127.0.0.1:8443
    mode tcp
    option tcplog
    tcp-request inspect-delay 5s
    default_backend k8s-master

    backend k8s-master
    mode tcp
    option tcplog
    option tcp-check
    balance roundrobin
    default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
    server master-121 192.168.111.121:6443 check
    server master-122 192.168.111.122:6443 check
    server master-123 192.168.111.123:6443 check

    1
    2
    3
    4
    5
    6
    7
    8

    </details>

    - 复制配置文件到,所有 Master节点
    ```shell
    # deploy-node
    mkdir -p /opt/deploy-k8s/lb/haproxy
    runsh scp ips/master.ips /opt/deploy-k8s/lb/haproxy/haproxy.cfg /etc/haproxy/
配置 Keepalived
  • 所有Master节点,配置不一样(注意每个节点的IP和网卡interface参数)

  • master-121,keepalived.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    ! Configuration File for keepalived
    global_defs {
    router_id LVS_DEVEL
    }

    vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2
    rise 1
    }

    vrrp_instance VI_1 {
    state MASTER
    interface ens33
    mcast_src_ip 192.168.111.121
    virtual_router_id 51
    priority 101
    nopreempt
    advert_int 2

    authentication {
    auth_type PASS
    auth_pass K8SHA_AUTH123
    }

    virtual_ipaddress {
    192.168.111.100
    }

    track_script {
    chk_apiserver
    }
    }
  • master-122,keepalived.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    ! Configuration File for keepalived
    global_defs {
    router_id LVS_DEVEL
    }

    vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2
    rise 1

    }

    vrrp_instance VI_1 {
    state BACKUP
    interface ens33
    mcast_src_ip 192.168.111.122
    virtual_router_id 51
    priority 100
    nopreempt
    advert_int 2

    authentication {
    auth_type PASS
    auth_pass K8SHA_AUTH123
    }

    virtual_ipaddress {
    192.168.111.100
    }

    track_script {
    chk_apiserver
    }
    }
  • master-123,keepalived.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    ! Configuration File for keepalived
    global_defs {
    router_id LVS_DEVEL
    }

    vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2
    rise 1
    }

    vrrp_instance VI_1 {
    state BACKUP
    interface ens33
    mcast_src_ip 192.168.111.123
    virtual_router_id 51
    priority 100
    nopreempt
    advert_int 2

    authentication {
    auth_type PASS
    auth_pass K8SHA_AUTH123
    }

    virtual_ipaddress {
    192.168.111.100
    }

    track_script {
    chk_apiserver
    }
    }
  • check_apiserver.sh
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    #!/bin/bash

    err=0
    for k in $(seq 1 3)
    do
    check_code=$(pgrep haproxy)
    if [[ $check_code == "" ]]; then
    err=$(expr $err + 1)
    sleep 1
    continue
    else
    err=0
    break
    fi
    done

    if [[ $err != "0" ]]; then
    echo "systemctl stop keepalived"
    /usr/bin/systemctl stop keepalived
    exit 1
    else
    exit 0
    fi
  • 复制配置文件到Master节点
1
2
3
4
5
6
7
# deploy-k8s
mkdir -p /opt/deploy-k8s/lb/keepalived/master-12{1,2,3}
chmod +x /opt/deploy-k8s/lb/keepalived/check_apiserver.sh
scp /opt/deploy-k8s/lb/keepalived/master-121/keepalived.conf root@192.168.111.121:/etc/keepalived/
scp /opt/deploy-k8s/lb/keepalived/master-122/keepalived.conf root@192.168.111.122:/etc/keepalived/
scp /opt/deploy-k8s/lb/keepalived/master-123/keepalived.conf root@192.168.111.123:/etc/keepalived/
runsh scp ips/master.ips /opt/deploy-k8s/lb/keepalived/check_apiserver.sh /etc/keepalived/
启动并测试
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# deploy-node
# 所有master节点,启动haproxy和keepalived
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl enable haproxy --now; systemctl enable keepalived --now"

# VIP测试,重要:如果安装了keepalived和haproxy,需要测试keepalived是否是正常的
ping 192.168.111.100
telnet 192.168.111.100 8443

# VIP不通问题定位
需要排查keepalived的问题,比如防火墙和selinux,haproxy和keepalived的状态,监听端口等
所有节点查看防火墙状态必须为disable和inactive:systemctl status firewalld
所有节点查看selinux状态,必须为disable:getenforce
master节点查看haproxy和keepalived状态:systemctl status keepalived haproxy
master节点查看监听端口:netstat -lntp

七、K8s 组件配置

Apiserver

  • 所有节点创建相关目录
1
2
# deploy-node
mkdir -p /opt/deploy-k8s/apiserver/master-12{1,2,3}
  • master-121,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.121 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • master-122,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.122 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • master-123,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.123 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • 复制配置文件到Master节点,并启动
1
2
3
4
5
6
7
8
9
10
11
# deploy-node
scp /opt/deploy-k8s/apiserver/master-121/kube-apiserver.service root@192.168.111.121:/usr/lib/systemd/system/
scp /opt/deploy-k8s/apiserver/master-122/kube-apiserver.service root@192.168.111.122:/usr/lib/systemd/system/
scp /opt/deploy-k8s/apiserver/master-123/kube-apiserver.service root@192.168.111.123:/usr/lib/systemd/system/

# 启动apiserver
# 所有Master节点,开启kube-apiserver
runsh cmd ips/master.ips "systemctl daemon-reload && systemctl enable kube-apiserver --now"

# 检测kube-server状态
runsh cmd ips/master.ips "systemctl status kube-apiserver"
  • 问题定位
    kube-apiserver[62957]: Error: invalid argument “LegacyServiceAccountTokenNoAutoGeneration=false” for “–feature-gates” flag: cannot set feature gate LegacyServiceAccountTokenNoAutoGeneration to false, feature is locked to true

    从 Kubernetes 1.24 版本开始,这个 feature gate 被默认设置为 true,并且从 1.24 版本开始,这个 feature gate 被锁定,不能再被设置为 false。

  • 解决方案
    注释--feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false,重载服务

1
2
# deploy-node
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl restart kube-apiserver; systemctl status kube-apiserver"

Controller Manager

  • 配置kube-controller-manager service,所有master节点配置一样

    注意k8s Pod网段,该网段不能和宿主机的网段、k8s Service网段的重复,根据实际情况修改

  • kube-controller-manager.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    [Unit]
    Description=Kubernetes Controller Manager
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-controller-manager \
    --v=2 \
    --root-ca-file=/etc/kubernetes/pki/ca.pem \
    --cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \
    --cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
    --service-account-private-key-file=/etc/kubernetes/pki/sa.key \
    --kubeconfig=/etc/kubernetes/controller-manager.kubeconfig \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --leader-elect=true \
    --use-service-account-credentials=true \
    --node-monitor-grace-period=40s \
    --node-monitor-period=5s \
    --pod-eviction-timeout=2m0s \
    --controllers=*,bootstrapsigner,tokencleaner \
    --allocate-node-cidrs=true \
    --cluster-cidr=172.16.0.0/12 \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --node-cidr-mask-size=24

    Restart=always
    RestartSec=10s

    [Install]
    WantedBy=multi-user.target
  • 将配置文件复制到master节点
1
2
3
4
5
6
# deploy-node
mkdir -p /opt/deploy-k8s/controller-manager
runsh scp ips/master.ips /opt/deploy-k8s/controller-manager/kube-controller-manager.service /usr/lib/systemd/system/

# 启动kube-controller-manager
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl enable kube-controller-manager --now; systemctl status kube-controller-manager"
  • 问题定位
    k8s v1.28.8 配置kube-controller-manager[70733]: Error: unknown flag: –pod-eviction-timeout

    –pod-eviction-timeout=2m0s,作用是:若 node 节点为 Unhealthy 状态,等待 pod-eviction-timeout 时间后,将 Pod 驱逐。默认是5m
    在service中直接设置报错,设置方法暂未找到

  • 解决方案
    将 kube-controlller-manager.service 中的--pod-eviction-timeout=2m0s注释

Scheduler

  • 配置kube-scheduler service,所有master节点配置一样

  • kube-scheduler.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    [Unit]
    Description=Kubernetes Scheduler
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-scheduler \
    --v=2 \
    --leader-elect=true \
    --authentication-kubeconfig=/etc/kubernetes/scheduler.kubeconfig \
    --authorization-kubeconfig=/etc/kubernetes/scheduler.kubeconfig \
    --kubeconfig=/etc/kubernetes/scheduler.kubeconfig

    Restart=always
    RestartSec=10s

    [Install]
    WantedBy=multi-user.target
  • 将配置文件复制到master节点
1
2
3
4
5
6
# deploy-node
mkdir -p /opt/deploy-k8s/scheduler
runsh scp ips/master.ips /opt/deploy-k8s/scheduler/kube-scheduler.service /usr/lib/systemd/system/

# 启动kube-scheduler
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl enable kube-scheduler --now; systemctl status kube-scheduler"

八、TLS Bootstrapping配置

  • 只需要在Master-121创建bootstrap
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# deploy-node
cd /opt/deploy-k8s/k8s-ha-install/bootstrap

# 192.168.111.100 为VIP,如果不是高可用集群,改为 master-121 的地址,8443改为 apiserver 的端口,默认是6443
# 如果要修改 bootstrap.secret.yaml 的 token-id 和 token-secret,需要保证 name: bootstrap-token-c8ad9c 和 token-id: c8ad9c 字段的'c8ad9c'字符串一致的,并且位数是一样的。
# 还要保证 kubectl config set-credentials tls-bootstrap-token-user 命令的 --token=c8ad9c.2e4d610cf3e7426e 与修改的字符串要一致。

kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config set-credentials tls-bootstrap-token-user \
--token=c8ad9c.2e4d610cf3e7426e \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config set-context tls-bootstrap-token-user@kubernetes \
--cluster=kubernetes \
--user=tls-bootstrap-token-user \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config use-context tls-bootstrap-token-user@kubernetes \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

mkdir -p /root/.kube ; cp /etc/kubernetes/admin.kubeconfig /root/.kube/config
  • bootstrap.secret.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    apiVersion: v1
    kind: Secret
    metadata:
    name: bootstrap-token-c8ad9c
    namespace: kube-system
    type: bootstrap.kubernetes.io/token
    stringData:
    description: "The default bootstrap token generated by 'kubelet '."
    token-id: c8ad9c
    token-secret: 2e4d610cf3e7426e
    usage-bootstrap-authentication: "true"
    usage-bootstrap-signing: "true"
    auth-extra-groups: system:bootstrappers:default-node-token,system:bootstrappers:worker,system:bootstrappers:ingress

    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: kubelet-bootstrap
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:node-bootstrapper
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:bootstrappers:default-node-token
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: node-autoapprove-bootstrap
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:bootstrappers:default-node-token
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: node-autoapprove-certificate-rotation
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:certificates.k8s.io:certificatesigningrequests:selfnodeclient
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:nodes
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
    labels:
    kubernetes.io/bootstrapping: rbac-defaults
    name: system:kube-apiserver-to-kubelet
    rules:
    - apiGroups:
    - ""
    resources:
    - nodes/proxy
    - nodes/stats
    - nodes/log
    - nodes/spec
    - nodes/metrics
    verbs:
    - "*"
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: system:kube-apiserver
    namespace: ""
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:kube-apiserver-to-kubelet
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: User
    name: kube-apiserver
  • 确保可以正常查询集群状态,才可以继续往下,否则不行,需要排查k8s组件是否有故障
1
2
# master-121
kubectl get cs

集群状态

1
2
3
4
5
6
7
8
# master-121
kubectl create -f bootstrap.secret.yaml
secret/bootstrap-token-c8ad9c created
clusterrolebinding.rbac.authorization.k8s.io/kubelet-bootstrap created
clusterrolebinding.rbac.authorization.k8s.io/node-autoapprove-bootstrap created
clusterrolebinding.rbac.authorization.k8s.io/node-autoapprove-certificate-rotation created
clusterrole.rbac.authorization.k8s.io/system:kube-apiserver-to-kubelet created
clusterrolebinding.rbac.authorization.k8s.io/system:kube-apiserver created

九、Worker 节点配置

  • 复制 Master节点 证书至 Worker节点
1
2
3
4
5
6
7
8
9
# deploy-node
runsh scp ips/worker.ips "/etc/kubernetes/pki/ca.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/pki/ca-key.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/pki/front-proxy-ca.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/bootstrap-kubelet.kubeconfig" /etc/kubernetes/

runsh scp ips/worker.ips /etc/etcd/ssl/etcd-ca.pem /etc/etcd/ssl/
runsh scp ips/worker.ips /etc/etcd/ssl/etcd-key.pem /etc/etcd/ssl/
runsh scp ips/worker.ips /etc/etcd/ssl/etcd.pem /etc/etcd/ssl/

十、Kubelet 配置

  • 所有节点,创建相关目录
1
2
# deploy-node
runsh cmd ips/node.ips "mkdir -p /var/lib/kubelet /var/log/kubernetes /etc/systemd/system/kubelet.service.d /etc/kubernetes/manifests/"
  • 所有节点,配置kubelet service

    • kubelet.service
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      [Unit]
      Description=Kubernetes Kubelet
      Documentation=https://github.com/kubernetes/kubernetes

      [Service]
      ExecStart=/usr/local/bin/kubelet

      Restart=always
      StartLimitInterval=0
      RestartSec=10

      [Install]
      WantedBy=multi-user.target
  • 如果Runtime为Containerd,请使用如下Kubelet的配置,也可以写到kubelet.service

    • 10-kubelet.conf
      1
      2
      3
      4
      5
      6
      7
      [Service]
      Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig"
      Environment="KUBELET_SYSTEM_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
      Environment="KUBELET_CONFIG_ARGS=--config=/etc/kubernetes/kubelet-conf.yml"
      Environment="KUBELET_EXTRA_ARGS=--node-labels=node.kubernetes.io/node='' "
      ExecStart=
      ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_SYSTEM_ARGS $KUBELET_EXTRA_ARGS
    • 10-kubelet.conf(Docker)
      1
      2
      3
      4
      5
      6
      7
      [Service]
      Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig"
      Environment="KUBELET_SYSTEM_ARGS=--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin"
      Environment="KUBELET_CONFIG_ARGS=--config=/etc/kubernetes/kubelet-conf.yml --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.5"
      Environment="KUBELET_EXTRA_ARGS=--node-labels=node.kubernetes.io/node='' "
      ExecStart=
      ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_SYSTEM_ARGS $KUBELET_EXTRA_ARGS
  • 创建 kubelet-conf.yml

    注意:如果更改了k8s的 service 网段,需要更改 kubelet-conf.yml 的clusterDNS:配置,改成k8s Service网段的第十个地址,比如172.20.0.10。

    • kubelet-conf.yml
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      apiVersion: kubelet.config.k8s.io/v1beta1
      kind: KubeletConfiguration
      address: 0.0.0.0
      port: 10250
      readOnlyPort: 10255
      authentication:
      anonymous:
      enabled: false
      webhook:
      cacheTTL: 2m0s
      enabled: true
      x509:
      clientCAFile: /etc/kubernetes/pki/ca.pem
      authorization:
      mode: Webhook
      webhook:
      cacheAuthorizedTTL: 5m0s
      cacheUnauthorizedTTL: 30s
      cgroupDriver: systemd
      cgroupsPerQOS: true
      clusterDNS:
      - 172.20.0.10
      clusterDomain: cluster.local
      containerLogMaxFiles: 5
      containerLogMaxSize: 10Mi
      contentType: application/vnd.kubernetes.protobuf
      cpuCFSQuota: true
      cpuManagerPolicy: none
      cpuManagerReconcilePeriod: 10s
      enableControllerAttachDetach: true
      enableDebuggingHandlers: true
      enforceNodeAllocatable:
      - pods
      eventBurst: 10
      eventRecordQPS: 5
      evictionHard:
      imagefs.available: 15%
      memory.available: 100Mi
      nodefs.available: 10%
      nodefs.inodesFree: 5%
      evictionPressureTransitionPeriod: 5m0s
      failSwapOn: true
      fileCheckFrequency: 20s
      hairpinMode: promiscuous-bridge
      healthzBindAddress: 127.0.0.1
      healthzPort: 10248
      httpCheckFrequency: 20s
      imageGCHighThresholdPercent: 85
      imageGCLowThresholdPercent: 80
      imageMinimumGCAge: 2m0s
      iptablesDropBit: 15
      iptablesMasqueradeBit: 14
      kubeAPIBurst: 10
      kubeAPIQPS: 5
      makeIPTablesUtilChains: true
      maxOpenFiles: 1000000
      maxPods: 110
      nodeStatusUpdateFrequency: 10s
      oomScoreAdj: -999
      podPidsLimit: -1
      registryBurst: 10
      registryPullQPS: 5
      resolvConf: /etc/resolv.conf
      rotateCertificates: true
      runtimeRequestTimeout: 2m0s
      serializeImagePulls: true
      staticPodPath: /etc/kubernetes/manifests
      streamingConnectionIdleTimeout: 4h0m0s
      syncFrequency: 1m0s
      volumeStatsAggPeriod: 1m0s
  • 复制配置文件,并启动所有节点kubelet
1
2
3
4
5
6
7
8
# deploy-node
mkdir -p /opt/deploy-k8s/kubelet
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/kubelet.service /usr/lib/systemd/system/
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/10-kubelet.conf /etc/systemd/system/kubelet.service.d/
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/kubelet-conf.yml /etc/kubernetes/

runsh cmd ips/node.ips "systemctl daemon-reload; systemctl enable kubelet --now"
runsh cmd ips/node.ips "kubectl get node"

配置kube-proxy为ipvs模式

Worker节点

1
2
3
4
5
6
7
8
9
10
11
12
mkdir -p ~/.kube
scp 192.168.111.121:/etc/kubernetes/admin.conf ~/.kube/config
scp 192.168.111.121:/usr/bin/kubectl /usr/local/bin/

# 将 config.conf 中的 mode 设置为 ipvs
kubectl edit -n kube-system configmaps kube-proxy

# 重建pod
kubectl get pod -n kube-system | awk '$1 ~ /^kube-proxy/{print "kubectl delete pod -n kube-system",$1}'|bash

# 任意节点执行,验证是否存在ipvs规则
ipvsadm -ln

4.1.6 验证集群状态

由于没有部署CNI插件,提示异常

1
2
# kubectl get node -o wide
# kubectl get pod -A

部署flannel插件

所有的节点都会在同一个VPC中,各个节点之间的通信不需要跨路由,可以选择flannel插件的host-gw模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# https://github.com/flannel-io/flannel
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml

# 修改配置
1. Network 配置为Pod CIDR(在执行kubeadm init时指定的--pod-network-cidr 10.200.0.0/16)
2. Type 是指的flannel backend,所有的节点都会在同一个VPC中,各个节点之间的通信不需要跨路由,推荐使用host-gw
3. flannel 的镜像,当前版本默认使用的是 rancher 仓库

# vim kube-flannel.yml
......
"Network": "10.200.0.0/16",
"Type": "host-gw"
image: rancher/mirrored-flannelcni-flannel-cni-plugin:v1.0.1
#image: flannelcni/flannel-cni-plugin:v1.0.1 for ppc64le and mips64le (dockerhub limitations may apply)
image: rancher/mirrored-flannelcni-flannel:v0.17.0
#image: flannelcni/flannel:v0.17.0 for ppc64le and mips64le (dockerhub limitations may apply)
image: rancher/mirrored-flannelcni-flannel:v0.17.0
#image: flannelcni/flannel:v0.17.0 for ppc64le and mips64le (dockerhub limitations may apply)
......
kubectl apply -f kube-flannel.yml

# 检查集群状态
kubectl get node -o wide
kubectl get pod -n kube-system | grep coredns

部署ingress controller

Ingress Controller 有很多种,Istio-gateway,nginx,traefik 等等,

部署metrics-server

在kubernetes中HPA自动伸缩指标依据、kubectl top 命令的资源使用率,可以通过 metrics-server 来获取,但是它不适合作为准确的监控数据来源。
官方主页在大部分情况下,使用deployment部署一个副本即可,最多支持5000个node,每个node消耗3m CPU 和 3M 内存。

1
2
3
4
5
6
7
8
9
10
11
12
13
wget  https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

# 修改以下参数
# 1. 镜像仓库推荐替换
# 2. 增加--kubelet-insecure-tls启动参数,跳过证书验证

grep -w -E "image|kubelet-insecure-tls" components.yaml
- --kubelet-insecure-tls
# image: k8s.gcr.io/metrics-server/metrics-server:v0.6.1
image: registry.aliyuncs.com/google_containers/metrics-server:v0.6.1

kubectl apply -f components.yaml
kubectl top node

十五、安装kubernetes-dashboard

dashboard默认的yaml是采用https接口,dashboard的github主页

  • 修改kubernetes-dashboard.yaml
    增加管理员账户,ingress资源
kubernetes-dashboard.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Namespace
metadata:
name: kubernetes-dashboard

---

apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
ports:
- port: 443
targetPort: 8443
selector:
k8s-app: kubernetes-dashboard

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
data:
csrf: ""

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque

---

kind: ConfigMap
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard

---

kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
# Allow Dashboard to get, update and delete Dashboard exclusive secrets.
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"]
verbs: ["get", "update", "delete"]
# Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map.
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["kubernetes-dashboard-settings"]
verbs: ["get", "update"]
# Allow Dashboard to get metrics.
- apiGroups: [""]
resources: ["services"]
resourceNames: ["heapster", "dashboard-metrics-scraper"]
verbs: ["proxy"]
- apiGroups: [""]
resources: ["services/proxy"]
resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"]
verbs: ["get"]

---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
rules:
# Allow Metrics Scraper to get metrics from the Metrics server
- apiGroups: ["metrics.k8s.io"]
resources: ["pods", "nodes"]
verbs: ["get", "list", "watch"]

---

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: kubernetes-dashboard
template:
metadata:
labels:
k8s-app: kubernetes-dashboard
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: kubernetes-dashboard
image: kubernetesui/dashboard:v2.5.1
imagePullPolicy: Always
ports:
- containerPort: 8443
protocol: TCP
args:
- --auto-generate-certificates
- --namespace=kubernetes-dashboard
# Uncomment the following line to manually specify Kubernetes API server Host
# If not specified, Dashboard will attempt to auto discover the API server and connect
# to it. Uncomment only if the default does not work.
# - --apiserver-host=http://my-address:port
volumeMounts:
- name: kubernetes-dashboard-certs
mountPath: /certs
# Create on-disk volume to store exec logs
- mountPath: /tmp
name: tmp-volume
livenessProbe:
httpGet:
scheme: HTTPS
path: /
port: 8443
initialDelaySeconds: 30
timeoutSeconds: 30
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
volumes:
- name: kubernetes-dashboard-certs
secret:
secretName: kubernetes-dashboard-certs
- name: tmp-volume
emptyDir: {}
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule

---

kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
ports:
- port: 8000
targetPort: 8000
selector:
k8s-app: dashboard-metrics-scraper

---

kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: dashboard-metrics-scraper
template:
metadata:
labels:
k8s-app: dashboard-metrics-scraper
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: dashboard-metrics-scraper
image: kubernetesui/metrics-scraper:v1.0.7
ports:
- containerPort: 8000
protocol: TCP
livenessProbe:
httpGet:
scheme: HTTP
path: /
port: 8000
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- mountPath: /tmp
name: tmp-volume
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
volumes:
- name: tmp-volume
emptyDir: {}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: kubernetes-dashboard
namespace: kubernetes-dashboard
annotations:
# nginx.ingress.kubernetes.io/secure-backends:
nginx.org/ssl-services: "kubernetes-dashboard"
spec:
ingressClassName: nginx
rules:
- host: dashboard-local-01.huanle.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: kubernetes-dashboard
port:
number: 443
tls:
- hosts: ["dashboard-local-01.huanle.com"]
---
# admin.yaml, 管理员账号
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
addonmanager.kubernetes.io/mode: Reconcile
name: kubernetes-dashboard-admin
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubernetes-dashboard-admin
namespace: kube-system
labels:
k8s-app: kubernetes-dashboard
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard-admin
namespace: kube-system
4.2.5 安装nfs-storage-class

通常情况下,都需要为Kubernetes配置storage class,测试环境中使用 NFS 居多,下面以 NFS 为案例,Gitlab 地址

1
2
3
4
5
6
7
8
9
10
# middleware 提供NFS存储。
yum install -y nfs-utils
mkdir /data/nfs

# 各个节点进行挂载。这些节点都需要安装 nfs相关工具
# yum install -y nfs-utils
echo '/data/nfs 192.168.111.0/24(rw,sync,no_wdelay,no_root_squash)' > /etc/exports
mkdir /data/nfs
systemctl enable nfs --now
showmount -e
deployment.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
---
apiVersion: v1
kind: Namespace
metadata:
name: infra-storage
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-client-provisioner
labels:
app: nfs-client-provisioner
namespace: infra-storage
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
image: linuxduduniao/nfs-subdir-external-provisioner:v4.0.1
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: default-nfs-provisioner
- name: NFS_SERVER
value: 10.4.7.89
- name: NFS_PATH
value: /data/nfs
- name: TZ
value: Asia/Shanghai
volumes:
- name: nfs-client-root
nfs:
server: 10.4.7.89
path: /data/nfs
rbac.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
roleRef:
kind: Role
name: leader-locking-nfs-client-provisioner
apiGroup: rbac.authorization.k8s.io
class.yaml
1
2
3
4
5
6
7
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: managed-nfs-storage
provisioner: default-nfs-provisioner
parameters:
archiveOnDelete: "false"
  • 验证NFS存储
test-claim.yaml
1
2
3
4
5
6
7
8
9
10
11
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: test-claim
spec:
storageClassName: managed-nfs-storage
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Mi
test-pod.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
kind: Pod
apiVersion: v1
metadata:
name: test-pod
spec:
containers:
- name: test-pod
image: gcr.io/google_containers/busybox:1.24
command:
- "/bin/sh"
args:
- "-c"
- "touch /mnt/SUCCESS && exit 0 || exit 1"
volumeMounts:
- name: nfs-pvc
mountPath: "/mnt"
restartPolicy: "Never"
volumes:
- name: nfs-pvc
persistentVolumeClaim:
claimName: test-claim
1
2
3
4
5
6
7
8
9
kubectl apply -f test-claim.yaml -f test-pod

kubectl get pod
NAME READY STATUS RESTARTS AGE
test-pod 0/1 Completed 0 17s

kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
test-claim Bound pvc-baf4603c-fea5-4ea6-93ac-b3387a1f150c 1Mi RWX managed-nfs-storage 34s

部署 nginx LB

主要是用来给 Ingress Controller 负载流量以及为多个Master节点ApiServer负载流量。
当前集群为单节点,为方便后续扩展,提前配置好APIServer的负载均衡。

  • 添加主机名解析
1
2
3
4
5
6
runsh cmd ips/lb.ips "echo '192.168.111.121 master-121
192.168.111.122 master-122
192.168.111.123 master-123
192.168.111.124 worker-124
192.168.111.125 worker-125
192.168.111.127 lb-127' >> /etc/hosts"
  • LB安装nginx
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    #!/bin/bash
    # Close selinux
    setenforce 0 &> /dev/null

    # Install the compilation environment in advance
    yum install -y wget gcc gcc-c++ pcre pcre-devel openssl openssl-devel zlib zlib-devel gd gd-devel

    # Download nginx source code package
    #--->>> Only modify the following parameters <<<---

    nginx_version=1.20.1
    nginx_prefix_dir=/usr/local/nginx
    install_package_dir=/usr/local/src
    install_log_dir="./install_nginx.log"

    #--->>> Only modify the above parameters <<<---

    cd ${install_package_dir} && \
    ls nginx-${nginx_version}.tar.gz || wget http://nginx.org/download/nginx-${nginx_version}.tar.gz &&\
    tar xf nginx-${nginx_version}.tar.gz -C ${install_package_dir} && cd nginx-${nginx_version} && \
    useradd -s /sbin/nologin nginx

    # Compile and install
    ./configure --prefix=${nginx_prefix_dir} \
    --user=nginx \
    --group=nginx \
    --with-pcre \
    --with-http_ssl_module \
    --with-http_v2_module \
    --with-http_realip_module \
    --with-http_addition_module \
    --with-http_sub_module \
    --with-http_dav_module \
    --with-http_flv_module \
    --with-http_mp4_module \
    --with-http_gunzip_module \
    --with-http_gzip_static_module \
    --with-http_random_index_module \
    --with-http_secure_link_module \
    --with-http_stub_status_module \
    --with-http_auth_request_module \
    --with-http_image_filter_module \
    --with-http_slice_module \
    --with-mail \
    --with-threads \
    --with-file-aio \
    --with-stream \
    --with-mail_ssl_module \
    --with-stream_ssl_module

    if [[ $? -ne 0 ]];then
    echo "Compilation failed. Please check!";exit 2
    fi
    make && make install

    # Configure nginx daemons
    cat >/usr/lib/systemd/system/nginx.service <<EOF
    [Unit]
    Description=nginx - high performance web server
    Documentation=https://nginx.org/en/docs/
    After=network.target remote-fs.target nss-lookup.target

    [Service]
    Type=forking
    PIDFile=${nginx_prefix_dir}/logs/nginx.pid
    ExecStartPre=${nginx_prefix_dir}/sbin/nginx -t -c ${nginx_prefix_dir}/conf/nginx.conf
    ExecStart=${nginx_prefix_dir}/sbin/nginx -c ${nginx_prefix_dir}/conf/nginx.conf
    ExecReload=${nginx_prefix_dir}/sbin/nginx -s reload
    ExecStop=${nginx_prefix_dir}/sbin/nginx -s stop
    PrivateTmp=true

    [Install]
    WantedBy=multi-user.target
    EOF
    chmod +x /usr/lib/systemd/system/nginx.service
    systemctl daemon-reload

    # Set Nginx to boot automatically
    systemctl enable nginx --now
    # systemctl restart nginx && systemctl enable nginx

    # Add to PATH
    # Add Soft Connection
    ln -s ${nginx_prefix_dir}/sbin/nginx /usr/sbin/nginx

    # Add environment variables
    #cat >/etc/profile.d/nginx.sh<<EOF
    #export NGINX_HOME=${nginx_prefix_dir}
    #export PATH=\$PATH:\$NGINX_HOME/sbin
    #EOF
    #source /etc/profile

    # Print installation information
    cat > ${install_log_dir} <<EOF
    [Info]
    Binary: ln -s ${nginx_prefix_dir}/sbin/nginx /usr/sbin/nginx
    Daemon: /usr/lib/systemd/system/nginx.service
    [Detail]
    EOF
    nginx -V &>> ./${install_log_dir}
    clear; cat ./${install_log_dir}
  • 配置nginx.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    mkdir -p /usr/local/nginx/conf/{vhost,stream}
    cat > /usr/local/nginx/conf/nginx.conf <<EOF
    user nginx;
    worker_processes auto;

    #error_log logs/error.log;
    #error_log logs/error.log notice;
    #error_log logs/error.log info;

    #pid logs/nginx.pid;

    events {
    worker_connections 4096;
    }

    http {
    include mime.types;
    default_type application/octet-stream;

    log_format access '$time_local - $remote_addr - $upstream_addr - $status '
    '$upstream_connect_time - $bytes_sent '
    '$upstream_bytes_sent - $upstream_bytes_received';

    # log_format main '$remote_addr - $remote_user [$time_local] "$request" '
    # '$status $body_bytes_sent "$http_referer" '
    # '"$http_user_agent" "$http_x_forwarded_for"';
    access_log logs/access.log access;
    error_log logs/error.log;

    ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
    ssl_prefer_server_ciphers on;

    gzip on;
    sendfile on;
    tcp_nopush on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    server_tokens off;

    map $http_upgrade $connection_upgrade {
    default upgrade;
    '' close;
    }
    include vhost/*.conf;
    }

    stream {
    log_format proxy '$time_local - $remote_addr - $upstream_addr - $protocol - $status '
    '$session_time - $upstream_connect_time - $bytes_sent - $bytes_received '
    '$upstream_bytes_sent - $upstream_bytes_received' ;
    access_log logs/stream_access.log proxy;
    error_log logs/stream_error.log;

    include stream/*.conf;
    }
    EOF
  • 配置APIServer.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    # APIServer 
    cat > /usr/local/nginx/conf/stream/apiserver.conf <<EOF
    upstream kube-apiserver {
    server 192.168.111.121:6443 max_fails=1 fail_timeout=60s;
    # server 192.168.111.122:6443 max_fails=1 fail_timeout=60s;
    # server 192.168.111.123:6443 max_fails=1 fail_timeout=60s;
    }

    server {
    listen 0.0.0.0:6443;
    allow 192.168.0.0/16;
    allow 10.0.0.0/8;
    deny all;

    proxy_connect_timeout 2s;
    proxy_next_upstream on;
    proxy_next_upstream_timeout 5;
    proxy_next_upstream_tries 1;

    proxy_pass kube-apiserver;
    access_log logs/kube-apiserver.log proxy;
    }
    EOF
  • 重载 Nginx,使配置文件生效
1
2
# nginx -t
# systemctl reload nginx

安装 crictl

crictl是兼容CRI接口的容器调试工具
crictl调试Kubernetes节点参考文档

二进制包安装 crictl(Master/Worker节点)

1
2
3
4
5
6
7
8
9
# deploy-node
mkdir -p /opt/deploy-k8s/crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.30.0/crictl-v1.30.0-linux-amd64.tar.gz -O /opt/deploy-k8s/crictl/crictl.tar.gz
runsh scp ips/node.ips crictl/crictl.tar.gz /tmp/
runsh scp ips/node.ips crictl/crictl.yaml /usr/local/containerd/etc/
runsh cmd ips/node.ips "tar xf /tmp/crictl.tar.gz -C /usr/local/bin"
runsh cmd ips/node.ips "chmod +x /usr/local/bin/crictl"
runsh cmd ips/node.ips "mkdir -p /usr/local/containerd/etc/bash_completion.d/"
runsh cmd ips/node.ips "crictl completion bash > /usr/local/containerd/etc/bash_completion.d/crictl"
  • crictl.yaml
    1
    2
    3
    4
    runtime-endpoint: unix:///run/containerd/containerd.sock
    image-endpoint: unix:///run/containerd/containerd.sock
    timeout: 10
    debug: false # debug调试的时候设置为true

安装 k8s 及 etcd

安装 k8s
  • Master/Worker节点

    • Yum方式安装

      CentOS / RHEL / Fedora(目前该源支持 v1.24 - v1.29 版本,后续版本会持续更新)
      Aliyun Kubernetes Source
      由于官网未开放同步方式, 可能会有索引gpg检查失败的情况
      这时请用 yum install -y –nogpgcheck kubelet kubeadm kubectl 安装

      1
      2
      3
      4
      5
      6
      7
      8
      9
      # deploy-node
      cd /opt/deploy-k8s
      mkdir /opt/deploy-k8s/repos
      runsh scp ips/node.ips repos/kubernetes.repo /etc/yum.repos.d/
      # runsh cmd ips/node.ips "yum search kubelet kubeadm kubectl --disableexcludes=kubernetes --showduplicates|grep 1.28"
      runsh cmd ips/node.ips "yum install -y --disableexcludes=kubernetes kubelet-1.28.8 kubeadm-1.28.8 kubectl-1.28.8"
      runsh cmd ips/node.ips "systemctl enable kubelet --now"
      # yum安装kubelet时会自动安装CNI插件,安装containerd时官方文档推荐安装新版本的CNI,可能存在冲突
      runsh cmd ips/node.ips "ls /opt/cni"
      1
      2
      3
      4
      5
      6
      7
      # cat repos/kubernetes.repo
      [kubernetes]
      name=Kubernetes
      baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/
      enabled=1
      gpgcheck=1
      gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/repodata/repomd.xml.key
  • 二进制方式安装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# deploy-node
cd /opt/deploy-k8s
mkdir -p /opt/deploy-k8s/repos
wget -P /opt/deploy-k8s/repos https://dl.k8s.io/v1.28.8/kubernetes-server-linux-amd64.tar.gz
tar xf repos/kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}

runsh scp ips/master.ips "/usr/local/bin/kube*" /usr/local/bin/
runsh scp ips/worker.ips /usr/local/bin/kubelet /usr/local/bin/
runsh scp ips/worker.ips /usr/local/bin/kube-proxy /usr/local/bin/
# runsh cmd ips/node.ips "systemctl enable kubelet --now"
# runsh cmd ips/node.ips "kubelet --version"
# 安装cni
runsh cmd ips/node.ips "mkdir -p /opt/cni/bin"

安装 etcd
  • Master节点安装 etcd(或 etcd 集群)
1
2
3
4
5
6
7
8
9
10
11
12
13
# deploy-node
cat ips/etcd.ips
192.168.111.121
192.168.111.122
192.168.111.123

etcd_version=v3.5.13
mkdir -p /opt/deploy-k8s/etcd
wget -P /opt/deploy-k8s/etcd/ https://github.com/etcd-io/etcd/releases/download/v3.5.13/etcd-v3.5.13-linux-amd64.tar.gz
tar xf /opt/deploy-k8s/etcd/etcd-${etcd_version}-linux-amd64.tar.gz -C /usr/local/bin/ --strip-components=1 etcd-${etcd_version}-linux-amd64/etcd{,ctl}

runsh scp ips/etcd.ips "/usr/local/bin/etcd*" /usr/local/bin/
runsh cmd ips/etcd.ips "etcdctl version"

下载集群安装工具

  • Master-121节点操作
1
2
3
4
5
# deploy-node
# 安装 v1.28.8 版本
cd /opt/deploy-k8s/
git clone https://github.com/dotbalo/k8s-ha-install.git
cd k8s-ha-install && git checkout manual-installation-v1.28.x

四、生成证书

生成 k8s 组件证书

下载生成证书工具
1
2
3
4
5
# deploy-node = master-121
wget "https://pkg.cfssl.org/R1.2/cfssl_linux-amd64" -O /usr/local/bin/cfssl
wget "https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64" -O /usr/local/bin/cfssljson
chmod +x /usr/local/bin/cfssl /usr/local/bin/cfssljson
which cfssl cfssljson
创建证书存放目录
1
2
3
4
5
6
7
8
9
# deploy-node
cd /opt/deploy-k8s

# 所有节点,创建kubernetes相关目录
runsh cmd ips/node.ips "mkdir -p /etc/kubernetes/pki"

# Master节点,创建etcd证书目录
runsh cmd ips/etcd.ips "mkdir -p /etc/etcd/ssl"

生成 kubernetes 证书
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# deploy-node
# Master节点,生成kubernetes证书
cd /opt/deploy-k8s/k8s-ha-install/pki
cfssl gencert -initca ca-csr.json | cfssljson -bare /etc/kubernetes/pki/ca

# 172.20.0.1 为 k8s service 网段,根据实际情况更改
# 192.168.111.100 为 VIP,如果不是高可用集群,则为 Master01 的IP,根据实际情况更改
# 192.168.111.121,192.168.111.122,192.168.111.123 为 master节点IP,根据实际情况更改
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-hostname=172.20.0.1,192.168.111.100,127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,192.168.111.121,192.168.111.122,192.168.111.123 \
-profile=kubernetes apiserver-csr.json | cfssljson -bare /etc/kubernetes/pki/apiserver

# 生成 apiserver 的聚合证书
# Requestheader-client-xxx requestheader-allowwd-xxx:aggerator
cfssl gencert -initca front-proxy-ca-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-ca
cfssl gencert \
-ca=/etc/kubernetes/pki/front-proxy-ca.pem \
-ca-key=/etc/kubernetes/pki/front-proxy-ca-key.pem \
-config=ca-config.json \
-profile=kubernetes front-proxy-client-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-client

# 忽略返回结果的警告
> [WARNING] This certificate lacks a "hosts" field. This makes it unsuitable for
websites. For more information see the Baseline Requirements for the Issuance and Management
of Publicly-Trusted Certificates, v.1.1.6, from the CA/Browser Forum (https://cabforum.org);
specifically, section 10.2.3 ("Information Requirements").

# 生成 controller-manage 的证书
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
manager-csr.json | cfssljson -bare /etc/kubernetes/pki/controller-manager

# 注意,192.168.111.100 为VIP,如果不是高可用集群,改为 master01 的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context system:kube-controller-manager@kubernetes \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=/etc/kubernetes/pki/controller-manager.pem \
--client-key=/etc/kubernetes/pki/controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context system:kube-controller-manager@kubernetes \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig

cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
scheduler-csr.json | cfssljson -bare /etc/kubernetes/pki/scheduler

# 注意,如果不是高可用集群,192.168.111.100:8443改为master01的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials system:kube-scheduler \
--client-certificate=/etc/kubernetes/pki/scheduler.pem \
--client-key=/etc/kubernetes/pki/scheduler-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context system:kube-scheduler@kubernetes \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context system:kube-scheduler@kubernetes \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig

cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
admin-csr.json | cfssljson -bare /etc/kubernetes/pki/admin

# 注意,如果不是高可用集群,192.168.111.100:8443改为master01的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# set-credentials 设置一个用户项
kubectl config set-credentials kubernetes-admin \
--client-certificate=/etc/kubernetes/pki/admin.pem \
--client-key=/etc/kubernetes/pki/admin-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 设置一个环境项,一个上下文
kubectl config set-context kubernetes-admin@kubernetes \
--cluster=kubernetes \
--user=kubernetes-admin \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 使用某个环境当做默认环境
kubectl config use-context kubernetes-admin@kubernetes \
--kubeconfig=/etc/kubernetes/admin.kubeconfig

# 创建ServiceAccount Key --> secret
openssl genrsa -out /etc/kubernetes/pki/sa.key 2048
openssl rsa -in /etc/kubernetes/pki/sa.key -pubout -out /etc/kubernetes/pki/sa.pub

# 发送证书到其他Master节点
runsh scp /opt/deploy-k8s/ips/master.ips "/etc/kubernetes/pki/*" /etc/kubernetes/pki/
runsh scp /opt/deploy-k8s/ips/master.ips "/etc/kubernetes/*.kubeconfig" /etc/kubernetes/

# 验证
runsh cmd /opt/deploy-k8s/ips/master.ips "ls /etc/kubernetes/pki"
admin.csr apiserver.csr ca.csr controller-manager.csr front-proxy-ca.csr front-proxy-client.csr sa.key scheduler-key.pem
admin-key.pem apiserver-key.pem ca-key.pem controller-manager-key.pem front-proxy-ca-key.pem front-proxy-client-key.pem sa.pub scheduler.pem
admin.pem apiserver.pem ca.pem controller-manager.pem front-proxy-ca.pem front-proxy-client.pem scheduler.csr

runsh cmd /opt/deploy-k8s/ips/master.ips "ls /etc/kubernetes/pki |wc -l"
23
生成 etcd 证书
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# deploy-node
# Master节点,生成etcd证书
# master-121,master-122,k8s-master-123,192.168.111.121,192.168.111.122,192.168.111.123 为 etcd 节点的主机名和ip,根据实际情况更改
cd /opt/deploy-k8s/k8s-ha-install/pki
cfssl gencert -initca etcd-ca-csr.json | cfssljson -bare /etc/etcd/ssl/etcd-ca
cfssl gencert \
-ca=/etc/etcd/ssl/etcd-ca.pem \
-ca-key=/etc/etcd/ssl/etcd-ca-key.pem \
-config=ca-config.json \
-hostname=127.0.0.1,master-121,master-122,k8s-master-123,192.168.111.121,192.168.111.122,192.168.111.123 \
-profile=kubernetes \
etcd-csr.json | cfssljson -bare /etc/etcd/ssl/etcd

# Master节点,将证书复制到其他节点
runsh scp /opt/deploy-k8s/ips/etcd.ips "/etc/etcd/ssl/etcd*" /etc/etcd/ssl/

五、安装 etcd

配置 etcd

etcd配置大致相同,根据实际情况修改每个Master节点etcd配置的主机名和IP地址

  • master-121,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-121/etcd.config.yml
    name: 'master-121'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.121:2380'
    listen-client-urls: 'https://192.168.111.121:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.121:2380'
    advertise-client-urls: 'https://192.168.111.121:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • master-122,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-122/etcd.config.yml
    name: 'master-122'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.122:2380'
    listen-client-urls: 'https://192.168.111.122:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.122:2380'
    advertise-client-urls: 'https://192.168.111.122:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • master-123,配置etcd
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    # cat /opt/deploy-k8s/etcd/master-123/etcd.config.yml
    name: 'master-123'
    data-dir: /var/lib/etcd
    wal-dir: /var/lib/etcd/wal
    snapshot-count: 5000
    heartbeat-interval: 100
    election-timeout: 1000
    quota-backend-bytes: 0
    listen-peer-urls: 'https://192.168.111.123:2380'
    listen-client-urls: 'https://192.168.111.123:2379,http://127.0.0.1:2379'
    max-snapshots: 3
    max-wals: 5
    cors:
    initial-advertise-peer-urls: 'https://192.168.111.123:2380'
    advertise-client-urls: 'https://192.168.111.123:2379'
    discovery:
    discovery-fallback: 'proxy'
    discovery-proxy:
    discovery-srv:
    initial-cluster: 'master-121=https://192.168.111.121:2380,master-122=https://192.168.111.122:2380,master-123=https://192.168.111.123:2380'
    initial-cluster-token: 'etcd-k8s-cluster'
    initial-cluster-state: 'new'
    strict-reconfig-check: false
    enable-v2: true
    enable-pprof: true
    proxy: 'off'
    proxy-failure-wait: 5000
    proxy-refresh-interval: 30000
    proxy-dial-timeout: 1000
    proxy-write-timeout: 5000
    proxy-read-timeout: 0
    client-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    peer-transport-security:
    cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
    key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
    peer-client-cert-auth: true
    trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
    auto-tls: true
    debug: false
    log-package-levels:
    log-outputs: [default]
    force-new-cluster: false
  • 将配置文件复制到 etcd节点
1
2
3
4
5
6
7
# deploy-node
cd /opt/deploy-k8s
mkdir /opt/deploy-k8s/etcd/master-12{1,2,3}
scp /opt/deploy-k8s/etcd/master-121/etcd.config.yml root@192.168.111.121:/etc/etcd/
scp /opt/deploy-k8s/etcd/master-122/etcd.config.yml root@192.168.111.122:/etc/etcd/
scp /opt/deploy-k8s/etcd/master-123/etcd.config.yml root@192.168.111.123:/etc/etcd/

配置 Service

  • 所有 Master节点,创建 etcd service 并启动
  • etcd.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # deploy-node
    # cat /opt/deploy-k8s/etcd/etcd.service
    [Unit]
    Description=Etcd Service
    Documentation=https://coreos.com/etcd/docs/latest/
    After=network.target

    [Service]
    Type=notify
    ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.config.yml
    Restart=on-failure
    RestartSec=10
    LimitNOFILE=65536

    [Install]
    WantedBy=multi-user.target
    Alias=etcd3.service

    # 复制 etcd.service 到所有 master节点
    runsh scp ips/master.ips /opt/deploy-k8s/etcd/etcd.service /usr/lib/systemd/system/
  • 所有 Master节点 创建 etcd 的证书目录
1
2
3
4
5
# deploy-node
runsh cmd ips/master.ips "mkdir -p /etc/kubernetes/pki/etcd"
runsh cmd ips/master.ips "ln -s /etc/etcd/ssl/* /etc/kubernetes/pki/etcd/"
runsh cmd ips/master.ips "systemctl daemon-reload"
runsh cmd ips/master.ips "systemctl enable etcd --now"
  • 查看 etcd 状态
1
2
3
4
5
6
7
8
# etcd-node
export ETCDCTL_API=3
etcdctl \
--endpoints="192.168.111.123:2379,192.168.111.122:2379,192.168.111.121:2379" \
--cacert=/etc/kubernetes/pki/etcd/etcd-ca.pem \
--cert=/etc/kubernetes/pki/etcd/etcd.pem \
--key=/etc/kubernetes/pki/etcd/etcd-key.pem endpoint status \
--write-out=table

etcd集群状态

七、K8s 组件配置

Apiserver

  • 所有节点创建相关目录
1
2
# deploy-node
mkdir -p /opt/deploy-k8s/apiserver/master-12{1,2,3}
  • master-121,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.121 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • master-122,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.122 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • master-123,kube-apiserver.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    [Unit]
    Description=Kubernetes API Server
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-apiserver \
    --v=2 \
    --allow-privileged=true \
    --bind-address=0.0.0.0 \
    --secure-port=6443 \
    --advertise-address=192.168.111.123 \
    --service-cluster-ip-range=172.20.0.0/16 \
    --service-node-port-range=30000-32767 \
    --etcd-servers=https://192.168.111.121:2379,https://192.168.111.122:2379,https://192.168.111.123:2379 \
    --etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
    --etcd-certfile=/etc/etcd/ssl/etcd.pem \
    --etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
    --client-ca-file=/etc/kubernetes/pki/ca.pem \
    --tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
    --tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
    --kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
    --kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
    --service-account-key-file=/etc/kubernetes/pki/sa.pub \
    --service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
    --service-account-issuer=https://kubernetes.default.svc.cluster.local \
    --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
    --enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --authorization-mode=Node,RBAC \
    --enable-bootstrap-token-auth=true \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
    --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
    --requestheader-allowed-names=aggregator \
    --requestheader-group-headers=X-Remote-Group \
    --requestheader-extra-headers-prefix=X-Remote-Extra- \
    --requestheader-username-headers=X-Remote-User
    # --token-auth-file=/etc/kubernetes/token.csv

    Restart=on-failure
    RestartSec=10s
    LimitNOFILE=65535

    [Install]
    WantedBy=multi-user.target
  • 复制配置文件到Master节点,并启动
1
2
3
4
5
6
7
8
9
10
11
# deploy-node
scp /opt/deploy-k8s/apiserver/master-121/kube-apiserver.service root@192.168.111.121:/usr/lib/systemd/system/
scp /opt/deploy-k8s/apiserver/master-122/kube-apiserver.service root@192.168.111.122:/usr/lib/systemd/system/
scp /opt/deploy-k8s/apiserver/master-123/kube-apiserver.service root@192.168.111.123:/usr/lib/systemd/system/

# 启动apiserver
# 所有Master节点,开启kube-apiserver
runsh cmd ips/master.ips "systemctl daemon-reload && systemctl enable kube-apiserver --now"

# 检测kube-server状态
runsh cmd ips/master.ips "systemctl status kube-apiserver"
  • 问题定位
    kube-apiserver[62957]: Error: invalid argument “LegacyServiceAccountTokenNoAutoGeneration=false” for “–feature-gates” flag: cannot set feature gate LegacyServiceAccountTokenNoAutoGeneration to false, feature is locked to true

    从 Kubernetes 1.24 版本开始,这个 feature gate 被默认设置为 true,并且从 1.24 版本开始,这个 feature gate 被锁定,不能再被设置为 false。

  • 解决方案
    注释--feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false,重载服务

1
2
# deploy-node
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl restart kube-apiserver; systemctl status kube-apiserver"

Controller Manager

  • 配置kube-controller-manager service,所有master节点配置一样

    注意k8s Pod网段,该网段不能和宿主机的网段、k8s Service网段的重复,根据实际情况修改

  • kube-controller-manager.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    [Unit]
    Description=Kubernetes Controller Manager
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-controller-manager \
    --v=2 \
    --root-ca-file=/etc/kubernetes/pki/ca.pem \
    --cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \
    --cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
    --service-account-private-key-file=/etc/kubernetes/pki/sa.key \
    --kubeconfig=/etc/kubernetes/controller-manager.kubeconfig \
    --feature-gates=LegacyServiceAccountTokenNoAutoGeneration=false \
    --leader-elect=true \
    --use-service-account-credentials=true \
    --node-monitor-grace-period=40s \
    --node-monitor-period=5s \
    --pod-eviction-timeout=2m0s \
    --controllers=*,bootstrapsigner,tokencleaner \
    --allocate-node-cidrs=true \
    --cluster-cidr=172.16.0.0/12 \
    --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
    --node-cidr-mask-size=24

    Restart=always
    RestartSec=10s

    [Install]
    WantedBy=multi-user.target
  • 将配置文件复制到master节点
1
2
3
4
5
6
# deploy-node
mkdir -p /opt/deploy-k8s/controller-manager
runsh scp ips/master.ips /opt/deploy-k8s/controller-manager/kube-controller-manager.service /usr/lib/systemd/system/

# 启动kube-controller-manager
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl enable kube-controller-manager --now; systemctl status kube-controller-manager"
  • 问题定位
    k8s v1.28.8 配置kube-controller-manager[70733]: Error: unknown flag: –pod-eviction-timeout

    –pod-eviction-timeout=2m0s,作用是:若 node 节点为 Unhealthy 状态,等待 pod-eviction-timeout 时间后,将 Pod 驱逐。默认是5m
    在service中直接设置报错,设置方法暂未找到

  • 解决方案
    将 kube-controlller-manager.service 中的--pod-eviction-timeout=2m0s注释

Scheduler

  • 配置kube-scheduler service,所有master节点配置一样

  • kube-scheduler.service
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    [Unit]
    Description=Kubernetes Scheduler
    Documentation=https://github.com/kubernetes/kubernetes
    After=network.target

    [Service]
    ExecStart=/usr/local/bin/kube-scheduler \
    --v=2 \
    --leader-elect=true \
    --authentication-kubeconfig=/etc/kubernetes/scheduler.kubeconfig \
    --authorization-kubeconfig=/etc/kubernetes/scheduler.kubeconfig \
    --kubeconfig=/etc/kubernetes/scheduler.kubeconfig

    Restart=always
    RestartSec=10s

    [Install]
    WantedBy=multi-user.target
  • 将配置文件复制到master节点
1
2
3
4
5
6
# deploy-node
mkdir -p /opt/deploy-k8s/scheduler
runsh scp ips/master.ips /opt/deploy-k8s/scheduler/kube-scheduler.service /usr/lib/systemd/system/

# 启动kube-scheduler
runsh cmd ips/master.ips "systemctl daemon-reload; systemctl enable kube-scheduler --now; systemctl status kube-scheduler"

八、TLS Bootstrapping配置

  • 只需要在Master-121创建bootstrap
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# deploy-node
cd /opt/deploy-k8s/k8s-ha-install/bootstrap

# 192.168.111.100 为VIP,如果不是高可用集群,改为 master-121 的地址,8443改为 apiserver 的端口,默认是6443
# 如果要修改 bootstrap.secret.yaml 的 token-id 和 token-secret,需要保证 name: bootstrap-token-c8ad9c 和 token-id: c8ad9c 字段的'c8ad9c'字符串一致的,并且位数是一样的。
# 还要保证 kubectl config set-credentials tls-bootstrap-token-user 命令的 --token=c8ad9c.2e4d610cf3e7426e 与修改的字符串要一致。

kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.111.100:8443 \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config set-credentials tls-bootstrap-token-user \
--token=c8ad9c.2e4d610cf3e7426e \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config set-context tls-bootstrap-token-user@kubernetes \
--cluster=kubernetes \
--user=tls-bootstrap-token-user \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

kubectl config use-context tls-bootstrap-token-user@kubernetes \
--kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig

mkdir -p /root/.kube ; cp /etc/kubernetes/admin.kubeconfig /root/.kube/config
  • bootstrap.secret.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    apiVersion: v1
    kind: Secret
    metadata:
    name: bootstrap-token-c8ad9c
    namespace: kube-system
    type: bootstrap.kubernetes.io/token
    stringData:
    description: "The default bootstrap token generated by 'kubelet '."
    token-id: c8ad9c
    token-secret: 2e4d610cf3e7426e
    usage-bootstrap-authentication: "true"
    usage-bootstrap-signing: "true"
    auth-extra-groups: system:bootstrappers:default-node-token,system:bootstrappers:worker,system:bootstrappers:ingress

    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: kubelet-bootstrap
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:node-bootstrapper
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:bootstrappers:default-node-token
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: node-autoapprove-bootstrap
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:bootstrappers:default-node-token
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: node-autoapprove-certificate-rotation
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:certificates.k8s.io:certificatesigningrequests:selfnodeclient
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: Group
    name: system:nodes
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
    labels:
    kubernetes.io/bootstrapping: rbac-defaults
    name: system:kube-apiserver-to-kubelet
    rules:
    - apiGroups:
    - ""
    resources:
    - nodes/proxy
    - nodes/stats
    - nodes/log
    - nodes/spec
    - nodes/metrics
    verbs:
    - "*"
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: system:kube-apiserver
    namespace: ""
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: system:kube-apiserver-to-kubelet
    subjects:
    - apiGroup: rbac.authorization.k8s.io
    kind: User
    name: kube-apiserver
  • 确保可以正常查询集群状态,才可以继续往下,否则不行,需要排查k8s组件是否有故障
1
2
# master-121
kubectl get cs

集群状态

1
2
3
4
5
6
7
8
# master-121
kubectl create -f bootstrap.secret.yaml
secret/bootstrap-token-c8ad9c created
clusterrolebinding.rbac.authorization.k8s.io/kubelet-bootstrap created
clusterrolebinding.rbac.authorization.k8s.io/node-autoapprove-bootstrap created
clusterrolebinding.rbac.authorization.k8s.io/node-autoapprove-certificate-rotation created
clusterrole.rbac.authorization.k8s.io/system:kube-apiserver-to-kubelet created
clusterrolebinding.rbac.authorization.k8s.io/system:kube-apiserver created

九、Worker 节点配置

  • 复制 Master节点 证书至 Worker节点
1
2
3
4
5
6
7
8
9
# deploy-node
runsh scp ips/worker.ips "/etc/kubernetes/pki/ca.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/pki/ca-key.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/pki/front-proxy-ca.pem" /etc/kubernetes/pki/
runsh scp ips/worker.ips "/etc/kubernetes/bootstrap-kubelet.kubeconfig" /etc/kubernetes/

runsh scp ips/worker.ips /etc/etcd/ssl/etcd-ca.pem /etc/etcd/ssl/
runsh scp ips/worker.ips /etc/etcd/ssl/etcd-key.pem /etc/etcd/ssl/
runsh scp ips/worker.ips /etc/etcd/ssl/etcd.pem /etc/etcd/ssl/

十、Kubelet 配置

  • 所有节点,创建相关目录
1
2
# deploy-node
runsh cmd ips/node.ips "mkdir -p /var/lib/kubelet /var/log/kubernetes /etc/systemd/system/kubelet.service.d /etc/kubernetes/manifests/"
  • 所有节点,配置kubelet service

    • kubelet.service
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      [Unit]
      Description=Kubernetes Kubelet
      Documentation=https://github.com/kubernetes/kubernetes

      [Service]
      ExecStart=/usr/local/bin/kubelet

      Restart=always
      StartLimitInterval=0
      RestartSec=10

      [Install]
      WantedBy=multi-user.target
  • 如果Runtime为Containerd,请使用如下Kubelet的配置,也可以写到kubelet.service

    • 10-kubelet.conf
      1
      2
      3
      4
      5
      6
      7
      [Service]
      Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig"
      Environment="KUBELET_SYSTEM_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
      Environment="KUBELET_CONFIG_ARGS=--config=/etc/kubernetes/kubelet-conf.yml"
      Environment="KUBELET_EXTRA_ARGS=--node-labels=node.kubernetes.io/node='' "
      ExecStart=
      ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_SYSTEM_ARGS $KUBELET_EXTRA_ARGS
    • 10-kubelet.conf(Docker)
      1
      2
      3
      4
      5
      6
      7
      [Service]
      Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig"
      Environment="KUBELET_SYSTEM_ARGS=--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin"
      Environment="KUBELET_CONFIG_ARGS=--config=/etc/kubernetes/kubelet-conf.yml --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.5"
      Environment="KUBELET_EXTRA_ARGS=--node-labels=node.kubernetes.io/node='' "
      ExecStart=
      ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_SYSTEM_ARGS $KUBELET_EXTRA_ARGS
  • 创建 kubelet-conf.yml

    注意:如果更改了k8s的 service 网段,需要更改 kubelet-conf.yml 的clusterDNS:配置,改成k8s Service网段的第十个地址,比如172.20.0.10。

    • kubelet-conf.yml
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      apiVersion: kubelet.config.k8s.io/v1beta1
      kind: KubeletConfiguration
      address: 0.0.0.0
      port: 10250
      readOnlyPort: 10255
      authentication:
      anonymous:
      enabled: false
      webhook:
      cacheTTL: 2m0s
      enabled: true
      x509:
      clientCAFile: /etc/kubernetes/pki/ca.pem
      authorization:
      mode: Webhook
      webhook:
      cacheAuthorizedTTL: 5m0s
      cacheUnauthorizedTTL: 30s
      cgroupDriver: systemd
      cgroupsPerQOS: true
      clusterDNS:
      - 172.20.0.10
      clusterDomain: cluster.local
      containerLogMaxFiles: 5
      containerLogMaxSize: 10Mi
      contentType: application/vnd.kubernetes.protobuf
      cpuCFSQuota: true
      cpuManagerPolicy: none
      cpuManagerReconcilePeriod: 10s
      enableControllerAttachDetach: true
      enableDebuggingHandlers: true
      enforceNodeAllocatable:
      - pods
      eventBurst: 10
      eventRecordQPS: 5
      evictionHard:
      imagefs.available: 15%
      memory.available: 100Mi
      nodefs.available: 10%
      nodefs.inodesFree: 5%
      evictionPressureTransitionPeriod: 5m0s
      failSwapOn: true
      fileCheckFrequency: 20s
      hairpinMode: promiscuous-bridge
      healthzBindAddress: 127.0.0.1
      healthzPort: 10248
      httpCheckFrequency: 20s
      imageGCHighThresholdPercent: 85
      imageGCLowThresholdPercent: 80
      imageMinimumGCAge: 2m0s
      iptablesDropBit: 15
      iptablesMasqueradeBit: 14
      kubeAPIBurst: 10
      kubeAPIQPS: 5
      makeIPTablesUtilChains: true
      maxOpenFiles: 1000000
      maxPods: 110
      nodeStatusUpdateFrequency: 10s
      oomScoreAdj: -999
      podPidsLimit: -1
      registryBurst: 10
      registryPullQPS: 5
      resolvConf: /etc/resolv.conf
      rotateCertificates: true
      runtimeRequestTimeout: 2m0s
      serializeImagePulls: true
      staticPodPath: /etc/kubernetes/manifests
      streamingConnectionIdleTimeout: 4h0m0s
      syncFrequency: 1m0s
      volumeStatsAggPeriod: 1m0s
  • 复制配置文件,并启动所有节点kubelet
1
2
3
4
5
6
7
8
# deploy-node
mkdir -p /opt/deploy-k8s/kubelet
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/kubelet.service /usr/lib/systemd/system/
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/10-kubelet.conf /etc/systemd/system/kubelet.service.d/
runsh scp ips/node.ips /opt/deploy-k8s/kubelet/kubelet-conf.yml /etc/kubernetes/

runsh cmd ips/node.ips "systemctl daemon-reload; systemctl enable kubelet --now"
runsh cmd ips/node.ips "kubectl get node"

配置kube-proxy为ipvs模式

Worker节点

1
2
3
4
5
6
7
8
9
10
11
12
mkdir -p ~/.kube
scp 192.168.111.121:/etc/kubernetes/admin.conf ~/.kube/config
scp 192.168.111.121:/usr/bin/kubectl /usr/local/bin/

# 将 config.conf 中的 mode 设置为 ipvs
kubectl edit -n kube-system configmaps kube-proxy

# 重建pod
kubectl get pod -n kube-system | awk '$1 ~ /^kube-proxy/{print "kubectl delete pod -n kube-system",$1}'|bash

# 任意节点执行,验证是否存在ipvs规则
ipvsadm -ln

4.1.6 验证集群状态

由于没有部署CNI插件,提示异常

1
2
# kubectl get node -o wide
# kubectl get pod -A

部署flannel插件

所有的节点都会在同一个VPC中,各个节点之间的通信不需要跨路由,可以选择flannel插件的host-gw模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# https://github.com/flannel-io/flannel
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml

# 修改配置
1. Network 配置为Pod CIDR(在执行kubeadm init时指定的--pod-network-cidr 10.200.0.0/16)
2. Type 是指的flannel backend,所有的节点都会在同一个VPC中,各个节点之间的通信不需要跨路由,推荐使用host-gw
3. flannel 的镜像,当前版本默认使用的是 rancher 仓库

# vim kube-flannel.yml
......
"Network": "10.200.0.0/16",
"Type": "host-gw"
image: rancher/mirrored-flannelcni-flannel-cni-plugin:v1.0.1
#image: flannelcni/flannel-cni-plugin:v1.0.1 for ppc64le and mips64le (dockerhub limitations may apply)
image: rancher/mirrored-flannelcni-flannel:v0.17.0
#image: flannelcni/flannel:v0.17.0 for ppc64le and mips64le (dockerhub limitations may apply)
image: rancher/mirrored-flannelcni-flannel:v0.17.0
#image: flannelcni/flannel:v0.17.0 for ppc64le and mips64le (dockerhub limitations may apply)
......
kubectl apply -f kube-flannel.yml

# 检查集群状态
kubectl get node -o wide
kubectl get pod -n kube-system | grep coredns

部署ingress controller

Ingress Controller 有很多种,Istio-gateway,nginx,traefik 等等,

部署metrics-server

在kubernetes中HPA自动伸缩指标依据、kubectl top 命令的资源使用率,可以通过 metrics-server 来获取,但是它不适合作为准确的监控数据来源。
官方主页在大部分情况下,使用deployment部署一个副本即可,最多支持5000个node,每个node消耗3m CPU 和 3M 内存。

1
2
3
4
5
6
7
8
9
10
11
12
13
wget  https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

# 修改以下参数
# 1. 镜像仓库推荐替换
# 2. 增加--kubelet-insecure-tls启动参数,跳过证书验证

grep -w -E "image|kubelet-insecure-tls" components.yaml
- --kubelet-insecure-tls
# image: k8s.gcr.io/metrics-server/metrics-server:v0.6.1
image: registry.aliyuncs.com/google_containers/metrics-server:v0.6.1

kubectl apply -f components.yaml
kubectl top node

十五、安装kubernetes-dashboard

dashboard默认的yaml是采用https接口,dashboard的github主页

  • 修改kubernetes-dashboard.yaml
    增加管理员账户,ingress资源
kubernetes-dashboard.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Namespace
metadata:
name: kubernetes-dashboard

---

apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
ports:
- port: 443
targetPort: 8443
selector:
k8s-app: kubernetes-dashboard

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
data:
csrf: ""

---

apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque

---

kind: ConfigMap
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard

---

kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
# Allow Dashboard to get, update and delete Dashboard exclusive secrets.
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"]
verbs: ["get", "update", "delete"]
# Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map.
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["kubernetes-dashboard-settings"]
verbs: ["get", "update"]
# Allow Dashboard to get metrics.
- apiGroups: [""]
resources: ["services"]
resourceNames: ["heapster", "dashboard-metrics-scraper"]
verbs: ["proxy"]
- apiGroups: [""]
resources: ["services/proxy"]
resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"]
verbs: ["get"]

---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
rules:
# Allow Metrics Scraper to get metrics from the Metrics server
- apiGroups: ["metrics.k8s.io"]
resources: ["pods", "nodes"]
verbs: ["get", "list", "watch"]

---

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard

---

kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: kubernetes-dashboard
template:
metadata:
labels:
k8s-app: kubernetes-dashboard
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: kubernetes-dashboard
image: kubernetesui/dashboard:v2.5.1
imagePullPolicy: Always
ports:
- containerPort: 8443
protocol: TCP
args:
- --auto-generate-certificates
- --namespace=kubernetes-dashboard
# Uncomment the following line to manually specify Kubernetes API server Host
# If not specified, Dashboard will attempt to auto discover the API server and connect
# to it. Uncomment only if the default does not work.
# - --apiserver-host=http://my-address:port
volumeMounts:
- name: kubernetes-dashboard-certs
mountPath: /certs
# Create on-disk volume to store exec logs
- mountPath: /tmp
name: tmp-volume
livenessProbe:
httpGet:
scheme: HTTPS
path: /
port: 8443
initialDelaySeconds: 30
timeoutSeconds: 30
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
volumes:
- name: kubernetes-dashboard-certs
secret:
secretName: kubernetes-dashboard-certs
- name: tmp-volume
emptyDir: {}
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule

---

kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
ports:
- port: 8000
targetPort: 8000
selector:
k8s-app: dashboard-metrics-scraper

---

kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: dashboard-metrics-scraper
template:
metadata:
labels:
k8s-app: dashboard-metrics-scraper
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: dashboard-metrics-scraper
image: kubernetesui/metrics-scraper:v1.0.7
ports:
- containerPort: 8000
protocol: TCP
livenessProbe:
httpGet:
scheme: HTTP
path: /
port: 8000
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- mountPath: /tmp
name: tmp-volume
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
volumes:
- name: tmp-volume
emptyDir: {}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: kubernetes-dashboard
namespace: kubernetes-dashboard
annotations:
# nginx.ingress.kubernetes.io/secure-backends:
nginx.org/ssl-services: "kubernetes-dashboard"
spec:
ingressClassName: nginx
rules:
- host: dashboard-local-01.huanle.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: kubernetes-dashboard
port:
number: 443
tls:
- hosts: ["dashboard-local-01.huanle.com"]
---
# admin.yaml, 管理员账号
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
addonmanager.kubernetes.io/mode: Reconcile
name: kubernetes-dashboard-admin
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubernetes-dashboard-admin
namespace: kube-system
labels:
k8s-app: kubernetes-dashboard
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard-admin
namespace: kube-system
4.2.5 安装nfs-storage-class

通常情况下,都需要为Kubernetes配置storage class,测试环境中使用 NFS 居多,下面以 NFS 为案例,Gitlab 地址

1
2
3
4
5
6
7
8
9
10
# middleware 提供NFS存储。
yum install -y nfs-utils
mkdir /data/nfs

# 各个节点进行挂载。这些节点都需要安装 nfs相关工具
# yum install -y nfs-utils
echo '/data/nfs 192.168.111.0/24(rw,sync,no_wdelay,no_root_squash)' > /etc/exports
mkdir /data/nfs
systemctl enable nfs --now
showmount -e
deployment.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
---
apiVersion: v1
kind: Namespace
metadata:
name: infra-storage
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-client-provisioner
labels:
app: nfs-client-provisioner
namespace: infra-storage
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
image: linuxduduniao/nfs-subdir-external-provisioner:v4.0.1
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: default-nfs-provisioner
- name: NFS_SERVER
value: 10.4.7.89
- name: NFS_PATH
value: /data/nfs
- name: TZ
value: Asia/Shanghai
volumes:
- name: nfs-client-root
nfs:
server: 10.4.7.89
path: /data/nfs
rbac.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: infra-storage
roleRef:
kind: Role
name: leader-locking-nfs-client-provisioner
apiGroup: rbac.authorization.k8s.io
class.yaml
1
2
3
4
5
6
7
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: managed-nfs-storage
provisioner: default-nfs-provisioner
parameters:
archiveOnDelete: "false"
  • 验证NFS存储
test-claim.yaml
1
2
3
4
5
6
7
8
9
10
11
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: test-claim
spec:
storageClassName: managed-nfs-storage
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Mi
test-pod.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
kind: Pod
apiVersion: v1
metadata:
name: test-pod
spec:
containers:
- name: test-pod
image: gcr.io/google_containers/busybox:1.24
command:
- "/bin/sh"
args:
- "-c"
- "touch /mnt/SUCCESS && exit 0 || exit 1"
volumeMounts:
- name: nfs-pvc
mountPath: "/mnt"
restartPolicy: "Never"
volumes:
- name: nfs-pvc
persistentVolumeClaim:
claimName: test-claim
1
2
3
4
5
6
7
8
9
kubectl apply -f test-claim.yaml -f test-pod

kubectl get pod
NAME READY STATUS RESTARTS AGE
test-pod 0/1 Completed 0 17s

kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
test-claim Bound pvc-baf4603c-fea5-4ea6-93ac-b3387a1f150c 1Mi RWX managed-nfs-storage 34s

部署 nginx LB

主要是用来给 Ingress Controller 负载流量以及为多个Master节点ApiServer负载流量。
当前集群为单节点,为方便后续扩展,提前配置好APIServer的负载均衡。

  • 添加主机名解析
1
2
3
4
5
6
runsh cmd ips/lb.ips "echo '192.168.111.121 master-121
192.168.111.122 master-122
192.168.111.123 master-123
192.168.111.124 worker-124
192.168.111.125 worker-125
192.168.111.127 lb-127' >> /etc/hosts"
  • LB安装nginx
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    #!/bin/bash
    # Close selinux
    setenforce 0 &> /dev/null

    # Install the compilation environment in advance
    yum install -y wget gcc gcc-c++ pcre pcre-devel openssl openssl-devel zlib zlib-devel gd gd-devel

    # Download nginx source code package
    #--->>> Only modify the following parameters <<<---

    nginx_version=1.20.1
    nginx_prefix_dir=/usr/local/nginx
    install_package_dir=/usr/local/src
    install_log_dir="./install_nginx.log"

    #--->>> Only modify the above parameters <<<---

    cd ${install_package_dir} && \
    ls nginx-${nginx_version}.tar.gz || wget http://nginx.org/download/nginx-${nginx_version}.tar.gz &&\
    tar xf nginx-${nginx_version}.tar.gz -C ${install_package_dir} && cd nginx-${nginx_version} && \
    useradd -s /sbin/nologin nginx

    # Compile and install
    ./configure --prefix=${nginx_prefix_dir} \
    --user=nginx \
    --group=nginx \
    --with-pcre \
    --with-http_ssl_module \
    --with-http_v2_module \
    --with-http_realip_module \
    --with-http_addition_module \
    --with-http_sub_module \
    --with-http_dav_module \
    --with-http_flv_module \
    --with-http_mp4_module \
    --with-http_gunzip_module \
    --with-http_gzip_static_module \
    --with-http_random_index_module \
    --with-http_secure_link_module \
    --with-http_stub_status_module \
    --with-http_auth_request_module \
    --with-http_image_filter_module \
    --with-http_slice_module \
    --with-mail \
    --with-threads \
    --with-file-aio \
    --with-stream \
    --with-mail_ssl_module \
    --with-stream_ssl_module

    if [[ $? -ne 0 ]];then
    echo "Compilation failed. Please check!";exit 2
    fi
    make && make install

    # Configure nginx daemons
    cat >/usr/lib/systemd/system/nginx.service <<EOF
    [Unit]
    Description=nginx - high performance web server
    Documentation=https://nginx.org/en/docs/
    After=network.target remote-fs.target nss-lookup.target

    [Service]
    Type=forking
    PIDFile=${nginx_prefix_dir}/logs/nginx.pid
    ExecStartPre=${nginx_prefix_dir}/sbin/nginx -t -c ${nginx_prefix_dir}/conf/nginx.conf
    ExecStart=${nginx_prefix_dir}/sbin/nginx -c ${nginx_prefix_dir}/conf/nginx.conf
    ExecReload=${nginx_prefix_dir}/sbin/nginx -s reload
    ExecStop=${nginx_prefix_dir}/sbin/nginx -s stop
    PrivateTmp=true

    [Install]
    WantedBy=multi-user.target
    EOF
    chmod +x /usr/lib/systemd/system/nginx.service
    systemctl daemon-reload

    # Set Nginx to boot automatically
    systemctl enable nginx --now
    # systemctl restart nginx && systemctl enable nginx

    # Add to PATH
    # Add Soft Connection
    ln -s ${nginx_prefix_dir}/sbin/nginx /usr/sbin/nginx

    # Add environment variables
    #cat >/etc/profile.d/nginx.sh<<EOF
    #export NGINX_HOME=${nginx_prefix_dir}
    #export PATH=\$PATH:\$NGINX_HOME/sbin
    #EOF
    #source /etc/profile

    # Print installation information
    cat > ${install_log_dir} <<EOF
    [Info]
    Binary: ln -s ${nginx_prefix_dir}/sbin/nginx /usr/sbin/nginx
    Daemon: /usr/lib/systemd/system/nginx.service
    [Detail]
    EOF
    nginx -V &>> ./${install_log_dir}
    clear; cat ./${install_log_dir}
  • 配置nginx.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    mkdir -p /usr/local/nginx/conf/{vhost,stream}
    cat > /usr/local/nginx/conf/nginx.conf <<EOF
    user nginx;
    worker_processes auto;

    #error_log logs/error.log;
    #error_log logs/error.log notice;
    #error_log logs/error.log info;

    #pid logs/nginx.pid;

    events {
    worker_connections 4096;
    }

    http {
    include mime.types;
    default_type application/octet-stream;

    log_format access '$time_local - $remote_addr - $upstream_addr - $status '
    '$upstream_connect_time - $bytes_sent '
    '$upstream_bytes_sent - $upstream_bytes_received';

    # log_format main '$remote_addr - $remote_user [$time_local] "$request" '
    # '$status $body_bytes_sent "$http_referer" '
    # '"$http_user_agent" "$http_x_forwarded_for"';
    access_log logs/access.log access;
    error_log logs/error.log;

    ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
    ssl_prefer_server_ciphers on;

    gzip on;
    sendfile on;
    tcp_nopush on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    server_tokens off;

    map $http_upgrade $connection_upgrade {
    default upgrade;
    '' close;
    }
    include vhost/*.conf;
    }

    stream {
    log_format proxy '$time_local - $remote_addr - $upstream_addr - $protocol - $status '
    '$session_time - $upstream_connect_time - $bytes_sent - $bytes_received '
    '$upstream_bytes_sent - $upstream_bytes_received' ;
    access_log logs/stream_access.log proxy;
    error_log logs/stream_error.log;

    include stream/*.conf;
    }
    EOF
  • 配置APIServer.conf
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    # APIServer 
    cat > /usr/local/nginx/conf/stream/apiserver.conf <<EOF
    upstream kube-apiserver {
    server 192.168.111.121:6443 max_fails=1 fail_timeout=60s;
    # server 192.168.111.122:6443 max_fails=1 fail_timeout=60s;
    # server 192.168.111.123:6443 max_fails=1 fail_timeout=60s;
    }

    server {
    listen 0.0.0.0:6443;
    allow 192.168.0.0/16;
    allow 10.0.0.0/8;
    deny all;

    proxy_connect_timeout 2s;
    proxy_next_upstream on;
    proxy_next_upstream_timeout 5;
    proxy_next_upstream_tries 1;

    proxy_pass kube-apiserver;
    access_log logs/kube-apiserver.log proxy;
    }
    EOF
  • 重载 Nginx,使配置文件生效
1
2
# nginx -t
# systemctl reload nginx