summaryrefslogtreecommitdiff
path: root/fsck/bcachefsck@.service.in
blob: 86c1824c8b82aa8f8d58231dd2bc4d3355ecd56b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2023-2024 Oracle.  All Rights Reserved.
# Author: Darrick J. Wong <djwong@kernel.org>

[Unit]
Description=Online bcachefsck for %f
OnFailure=bcachefsck@%i.service
Documentation=man:bcachefs(8)

# Explicitly require the capabilities that this program needs
ConditionCapability=CAP_SYS_ADMIN
ConditionCapability=CAP_FOWNER
ConditionCapability=CAP_DAC_OVERRIDE
ConditionCapability=CAP_DAC_READ_SEARCH
ConditionCapability=CAP_SYS_RAWIO

# Must be a mountpoint
ConditionPathIsMountPoint=%f
RequiresMountsFor=%f

[Service]
Type=oneshot
Environment=SERVICE_MODE=1
ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f
SyslogIdentifier=%N

# Run scrub with minimal CPU and IO priority so that nothing else will starve.
IOSchedulingClass=idle
CPUSchedulingPolicy=idle
CPUAccounting=true
Nice=19

# Create the service underneath the background service slice so that we can
# control resource usage.
Slice=system-bcachefsck.slice

# No realtime CPU scheduling
RestrictRealtime=true

# Dynamically create a user that isn't root
DynamicUser=true

# Make the entire filesystem readonly and /home inaccessible, then bind mount
# the filesystem we're supposed to be checking into our private /tmp dir.
# 'norbind' means that we don't bind anything under that original mount.
# This enables checking filesystems mounted under /tmp in the global mount
# namespace.
ProtectSystem=strict
ProtectHome=yes
PrivateTmp=true
BindPaths=%f:/tmp/scrub:norbind

# No network access
PrivateNetwork=true
ProtectHostname=true
RestrictAddressFamilies=none
IPAddressDeny=any

# Don't let the program mess with the kernel configuration at all
ProtectKernelLogs=true
ProtectKernelModules=true
ProtectKernelTunables=true
ProtectControlGroups=true
ProtectProc=invisible
RestrictNamespaces=true

# Hide everything in /proc, even /proc/mounts
ProcSubset=pid

# Only allow the default personality Linux
LockPersonality=true

# No writable memory pages
MemoryDenyWriteExecute=true

# Don't let our mounts leak out to the host
PrivateMounts=true

# Restrict system calls to the native arch and only enough to get things going
SystemCallArchitectures=native
SystemCallFilter=@system-service
SystemCallFilter=~@privileged
SystemCallFilter=~@resources
SystemCallFilter=~@mount

# bcachefsck needs these privileges to run, and no others
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
NoNewPrivileges=true

# bcachefsck doesn't create files
UMask=7777

# No access to hardware /dev files except for block devices
ProtectClock=true
DevicePolicy=closed
DeviceAllow=block-*