写在最前

1. 下载与安装

可以从 Docker 官方下载页面根据自身的系统架构选择对应版本的安装包。这里以 Docker 28.2.2 为例进行演示。

https://download.docker.com/linux/static/stable/

# 可以先上传离线安装包(例如 docker-28.2.2_aarch64.tgz),也可以直接使用 wget 命令在线下载,方式任选。
# 1. 解压docker-28.2.2_aarch64.tgz
tar -xf docker-28.2.2_aarch64.tgz

# 2. 将docker中的二进制文件复制到/usr/local/bin/
cp docker/* /usr/local/bin/

# 3. 配置daemon.json,这里我选择将docker的镜像数据放在/app/docker中,故需要创建此目录
mkdir -p /app/docker
mkdir -p /etc/docker
cat > /etc/docker/daemon.json<<'EOF'
{
  "insecure-registries": ["0.0.0.0/0"],
  "data-root": "/app/docker",
  "log-driver": "json-file",
  "log-opts": {"max-size":"200m", "max-file":"3"},
  "exec-opts":["native.cgroupdriver=systemd"],
  "default-ulimits": {
    "nofile": {
       "Name": "nofile",
       "Hard": 819200,
       "Soft": 819200
      }
   }
}
EOF

# 4. 生成docker.service服务文件
cat > /etc/systemd/system/docker.service <<'EOF'
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target

[Service]
# 代理
# Environment="HTTP_PROXY=http://172.31.0.1:7890/"
# Environment="HTTPS_PROXY=http://172.31.0.1:7890/"
# Environment="NO_PROXY=localhost,10.*,172.*,192.*"
Type=notify
ExecStart=/usr/local/bin/dockerd
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s

[Install]
WantedBy=multi-user.target
EOF

# 5. 加载并启动
systemctl daemon-reload
systemctl enable --now docker

# 6. 检查docker服务
[root@tanqidi ~]# systemctl status docker
● docker.service - Docker Application Container Engine
   Loaded: loaded (/etc/systemd/system/docker.service; enabled; vendor preset: disabled)
   Active: active (running) since Wed 2025-11-12 17:01:17 CST; 14min ago
     Docs: https://docs.docker.com
 Main PID: 51061 (dockerd)
    Tasks: 19
   Memory: 31.7M
   CGroup: /system.slice/docker.service
           ├─51061 /usr/local/bin/dockerd
           └─51069 containerd --config /var/run/docker/containerd/containerd.toml

Nov 12 17:01:16 tanqidi dockerd[51061]: time="2025-11-12T17:01:16.578157949+08:00" level=info msg="Loading containers: start."
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.882864558+08:00" level=info msg="Loading containers: done."
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.898665416+08:00" level=info msg="Docker daemon" commit=45873be containerd-snapshotter=false storage-driver=overl>
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.898928711+08:00" level=info msg="Initializing buildkit"
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.910237104+08:00" level=warning msg="CDI setup error /var/run/cdi: failed to monitor for changes: no such file or>
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.910278475+08:00" level=warning msg="CDI setup error /etc/cdi: failed to monitor for changes: no such file or dir>
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.948664928+08:00" level=info msg="Completed buildkit initialization"
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.960320178+08:00" level=info msg="Daemon has completed initialization"
Nov 12 17:01:17 tanqidi dockerd[51061]: time="2025-11-12T17:01:17.960384789+08:00" level=info msg="API listen on /var/run/docker.sock"
Nov 12 17:01:17 tanqidi systemd[1]: Started Docker Application Container Engine.

[root@tanqidi ~]# docker ps
CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES

[root@tanqidi ~]# ll /app/docker/
total 4
drwx--x--x 3 root root 123 Nov 12 17:01 buildkit
drwx--x--x 3 root root  20 Nov 12 17:01 containerd
drwx--x--- 2 root root   6 Nov 12 17:01 containers
-rw------- 1 root root  36 Nov 12 17:01 engine-id
drwx------ 3 root root  22 Nov 12 17:01 image
drwxr-x--- 3 root root  19 Nov 12 17:01 network
drwx--x--- 3 root root  40 Nov 12 17:01 overlay2
drwx------ 3 root root  17 Nov 12 17:01 plugins
drwx------ 2 root root   6 Nov 12 17:01 runtimes
drwx------ 2 root root   6 Nov 12 17:01 swarm
drwx------ 2 root root   6 Nov 12 17:01 tmp
drwx-----x 2 root root  50 Nov 12 17:01 volumes
[root@tanqidi ~]# 

2. 安装 Docker-Compose

这里我选择 v2.33.1 版本来搭配 Docker 28.x 使用。由于我使用的是 ARM64 架构,因此下载文件为 docker-compose-linux-aarch64
如果是 x86_64 架构,则应选择 docker-compose-linux-x86_64

https://github.com/docker/compose/releases/tag/v2.33.1

/usr/bin 主要用于存放由系统或包管理器(如 aptyum)安装的可执行程序,是操作系统标准路径的一部分。这类程序通常由系统维护和更新,不建议手动修改或覆盖。

/usr/local/bin 则用于存放用户手动安装或自编译的程序,不受系统包管理器管理。它属于本地扩展路径,优先级通常高于 /usr/bin,方便用户在不影响系统环境的情况下,自行部署或替换特定版本的软件。

# 1. 你可以根据自身环境选择 离线上传 安装包,或直接使用 wget 命令进行 在线下载,方式任选。
# 2. 将docker-compose-linux-aarch64复制到/usr/local/bin
cp docker-compose-linux-aarch64 /usr/local/bin/docker-compose

# 3. 测试
[root@tanqidi app]# docker-compose -v
Docker Compose version v2.33.1

3. 故障排查

3.1 MTU

在某些云厂商的环境中,主机网卡的 MTU 值可能并非标准的 1500,而是 1450。如果在这种情况下仍使用默认配置安装 Docker,docker0 网卡的 MTU 会保持 1500,而宿主机的 eth 或 ens 网卡为 1450,这种不一致会导致严重的网络问题。

在高并发或大流量场景下,这种 MTU 不匹配可能引发 数据包分片异常、网络中断 等问题。
因此,在部署 Docker 前务必检查并确认主机网卡的 MTU 值是否符合预期(通常为 1500),并根据实际情况调整 Docker 的 MTU 配置。

[root@tanqidi ~]# ip a s 
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc fq_codel state UP group default qlen 1000
    link/ether fa:16:3e:33:1f:96 brd ff:ff:ff:ff:ff:ff
    inet 192.168.0.109/24 brd 192.168.0.255 scope global dynamic eth0
       valid_lft 31528097sec preferred_lft 31528097sec
    inet6 2406:440:600::2d8f/128 scope global dynamic 
       valid_lft 2147475746sec preferred_lft 2147475686sec
    inet6 fe80::f816:3eff:fe33:1f96/64 scope link 
       valid_lft forever preferred_lft forever
3: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1450 qdisc noqueue state DOWN group default 
    link/ether 92:92:45:10:8b:97 brd ff:ff:ff:ff:ff:ff
    inet 172.17.0.1/16 brd 172.17.255.255 scope global docker0
       valid_lft forever preferred_lft forever

可以在 Docker 的配置文件 /etc/docker/daemon.json 中通过设置 "mtu" 参数来指定 MTU 值,无特殊网络场景切记不可更改。

[root@tanqidi ~]# cat /etc/docker/daemon.json 
{
  "insecure-registries": ["0.0.0.0/0"],
  "data-root": "/app/docker",
  "mtu": 1450,
  "log-driver": "json-file",
  "log-opts": {"max-size":"200m", "max-file":"3"},
  "exec-opts":["native.cgroupdriver=systemd"],
  "default-ulimits": {
    "nofile": {
       "Name": "nofile",
       "Hard": 819200,
       "Soft": 819200
      }
   }
}

4. 常用命令

4.1 常用命令

# 从小到大,查看 Docker 中哪个容器最吃内存(排序)   
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | sort -k2 -h
# 从大到小
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | sort -k2 -h -r

# 查看 CPU 排名
docker stats --no-stream --format "{{.Name}} {{.CPUPerc}}" | sort -k2 -h -r

4.1 获取运行参数

docker inspect --format 'docker run \
  --name {{printf "%q" .Name}} \
    {{- with .HostConfig}}
        {{- if .Privileged}}
  --privileged \
        {{- end}}
        {{- if .AutoRemove}}
  --rm \
        {{- end}}
        {{- if .Runtime}}
  --runtime {{printf "%q" .Runtime}} \
        {{- end}}
        {{- range $b := .Binds}}
  --volume {{printf "%q" $b}} \
        {{- end}}
        {{- range $v := .VolumesFrom}}
  --volumes-from {{printf "%q" $v}} \
        {{- end}}
        {{- range $l := .Links}}
  --link {{printf "%q" $l}} \
        {{- end}}
        {{- if index . "Mounts"}}
            {{- range $m := .Mounts}}
  --mount type={{.Type}}
                {{- if $s := index $m "Source"}},source={{$s}}{{- end}}
                {{- if $t := index $m "Target"}},destination={{$t}}{{- end}}
                {{- if index $m "ReadOnly"}},readonly{{- end}}
                {{- if $vo := index $m "VolumeOptions"}}
                    {{- range $i, $v := $vo.Labels}}
                        {{- printf ",volume-label=%s=%s" $i $v}}
                    {{- end}}
                    {{- if $dc := index $vo "DriverConfig" }}
                        {{- if $n := index $dc "Name" }}
                            {{- printf ",volume-driver=%s" $n}}
                        {{- end}}
                        {{- range $i, $v := $dc.Options}}
                            {{- printf ",volume-opt=%s=%s" $i $v}}
                        {{- end}}
                    {{- end}}
                {{- end}}
                {{- if $bo := index $m "BindOptions"}}
                    {{- if $p := index $bo "Propagation" }}
                        {{- printf ",bind-propagation=%s" $p}}
                    {{- end}}
                {{- end}} \
            {{- end}}
        {{- end}}
        {{- if .PublishAllPorts}}
  --publish-all \
        {{- end}}
        {{- if .UTSMode}}
  --uts {{printf "%q" .UTSMode}} \
        {{- end}}
        {{- with .LogConfig}}
  --log-driver {{printf "%q" .Type}} \
            {{- range $o, $v := .Config}}
  --log-opt {{$o}}={{printf "%q" $v}} \
            {{- end}}
        {{- end}}
        {{- with .RestartPolicy}}
  --restart "{{.Name -}}
            {{- if eq .Name "on-failure"}}:{{.MaximumRetryCount}}
            {{- end}}" \
        {{- end}}
        {{- range $e := .ExtraHosts}}
  --add-host {{printf "%q" $e}} \
        {{- end}}
        {{- range $v := .CapAdd}}
  --cap-add {{printf "%q" $v}} \
        {{- end}}
        {{- range $v := .CapDrop}}
  --cap-drop {{printf "%q" $v}} \
        {{- end}}
        {{- range $d := .Devices}}
  --device {{printf "%q" (index $d).PathOnHost}}:{{printf "%q" (index $d).PathInContainer}}:{{(index $d).CgroupPermissions}} \
        {{- end}}
    {{- end}}
    {{- with .NetworkSettings -}}
        {{- range $p, $conf := .Ports}}
            {{- with $conf}}
  --publish "
                {{- if $h := (index $conf 0).HostIp}}{{$h}}:
                {{- end}}
                {{- (index $conf 0).HostPort}}:{{$p}}" \
            {{- end}}
        {{- end}}
        {{- range $n, $conf := .Networks}}
            {{- with $conf}}
  --network {{printf "%q" $n}} \
                {{- range $a := $conf.Aliases}}
  --network-alias {{printf "%q" $a}} \
                {{- end}}
            {{- end}}
        {{- end}}
    {{- end}}
    {{- with .Config}}
        {{- if .Hostname}}
  --hostname {{printf "%q" .Hostname}} \
        {{- end}}
        {{- if .Domainname}}
  --domainname {{printf "%q" .Domainname}} \
        {{- end}}
        {{- if index . "ExposedPorts"}}
        {{- range $p, $conf := .ExposedPorts}}
  --expose {{printf "%q" $p}} \
        {{- end}}
        {{- end}}
        {{- if .User}}
  --user {{printf "%q" .User}} \
        {{- end}}
        {{- range $e := .Env}}
  --env {{printf "%q" $e}} \
        {{- end}}
        {{- range $l, $v := .Labels}}
  --label {{printf "%q" $l}}={{printf "%q" $v}} \
        {{- end}}
    {{- if not (or .AttachStdin  (or .AttachStdout .AttachStderr))}}
  --detach \
    {{- end}}
    {{- if .AttachStdin}}
  --attach stdin \
    {{- end}}
    {{- if .AttachStdout}}
  --attach stdout \
    {{- end}}
    {{- if .AttachStderr}}
  --attach stderr \
    {{- end}}
    {{- if .Tty}}
  --tty \
    {{- end}}
    {{- if .OpenStdin}}
  --interactive \
    {{- end}}
    {{- if .Entrypoint}}
{{- /* Since the entry point cannot be overridden from the command line with an array of size over 1,
       we are fine assuming the default value in such a case. */ -}}
        {{- if eq (len .Entrypoint) 1 }}
  --entrypoint "
            {{- range $i, $v := .Entrypoint}}
                {{- if $i}} {{end}}
                {{- $v}}
            {{- end}}" \
        {{- end}}
    {{- end}}
  {{printf "%q" .Image}} \
  {{range .Cmd}}{{printf "%q " .}}{{- end}}
{{- end}}' \
jenkins1

写在最后