summaryrefslogtreecommitdiff
path: root/fsck/bcachefsck@.service.in
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2023-12-04 18:43:31 -0800
committerKent Overstreet <kent.overstreet@linux.dev>2023-12-07 11:57:36 -0500
commitfbfdd05ac5b5b748cf9c7c0ea72be5268917c5d6 (patch)
tree478e759ed430c56b4a7f02c2ec26457efd3134e0 /fsck/bcachefsck@.service.in
parent5fa7db9806b1f7a082a0bcda8ba7b7beeb03bc61 (diff)
fsck: add systemd service definitions for automatic online service
Add some systemd service files so that bcachefs can automatically fsck mounted filesystems in the background. Hopefully with minimal disruption to frontend operations. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fsck/bcachefsck@.service.in')
-rw-r--r--fsck/bcachefsck@.service.in98
1 files changed, 98 insertions, 0 deletions
diff --git a/fsck/bcachefsck@.service.in b/fsck/bcachefsck@.service.in
new file mode 100644
index 00000000..86c1824c
--- /dev/null
+++ b/fsck/bcachefsck@.service.in
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online bcachefsck for %f
+OnFailure=bcachefsck@%i.service
+Documentation=man:bcachefs(8)
+
+# Explicitly require the capabilities that this program needs
+ConditionCapability=CAP_SYS_ADMIN
+ConditionCapability=CAP_FOWNER
+ConditionCapability=CAP_DAC_OVERRIDE
+ConditionCapability=CAP_DAC_READ_SEARCH
+ConditionCapability=CAP_SYS_RAWIO
+
+# Must be a mountpoint
+ConditionPathIsMountPoint=%f
+RequiresMountsFor=%f
+
+[Service]
+Type=oneshot
+Environment=SERVICE_MODE=1
+ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f
+SyslogIdentifier=%N
+
+# Run scrub with minimal CPU and IO priority so that nothing else will starve.
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+CPUAccounting=true
+Nice=19
+
+# Create the service underneath the background service slice so that we can
+# control resource usage.
+Slice=system-bcachefsck.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Dynamically create a user that isn't root
+DynamicUser=true
+
+# Make the entire filesystem readonly and /home inaccessible, then bind mount
+# the filesystem we're supposed to be checking into our private /tmp dir.
+# 'norbind' means that we don't bind anything under that original mount.
+# This enables checking filesystems mounted under /tmp in the global mount
+# namespace.
+ProtectSystem=strict
+ProtectHome=yes
+PrivateTmp=true
+BindPaths=%f:/tmp/scrub:norbind
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# bcachefsck needs these privileges to run, and no others
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+NoNewPrivileges=true
+
+# bcachefsck doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+DeviceAllow=block-*