commit df64bc6cf7871765df7b6a08389bce59807d57a0 Author: tqcq <99722391+tqcq@users.noreply.github.com> Date: Wed Mar 5 10:06:46 2025 +0800 init repo. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..27a3cff --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +cachefilesd +*.o +*~ +\#* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..eb36199 --- /dev/null +++ b/Makefile @@ -0,0 +1,131 @@ +CFLAGS := -g -O2 -Wall -Wsign-compare +INSTALL := install +DESTDIR := +ETCDIR := /etc +BINDIR := /bin +SBINDIR := /sbin +MANDIR := /usr/share/man +SPECFILE := redhat/cachefilesd.spec + +LNS := ln -sf + +############################################################################### +# +# Determine the current package version from the specfile +# +############################################################################### +VERSION := $(word 2,$(shell grep "^Version:" $(SPECFILE))) +TARBALL := cachefilesd-$(VERSION).tar +ZTARBALL := $(TARBALL).bz2 + +############################################################################### +# +# Guess at the appropriate word size +# +############################################################################### +BUILDFOR := $(shell file /usr/bin/make | sed -e 's!.*ELF \(32\|64\)-bit.*!\1!')-bit + +ifeq ($(BUILDFOR),32-bit) +CFLAGS += -m32 +else +ifeq ($(BUILDFOR),64-bit) +CFLAGS += -m64 +endif +endif + +############################################################################### +# +# Build stuff +# +############################################################################### +all: cachefilesd + +cachefilesd: cachefilesd.c Makefile + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +############################################################################### +# +# Install everything +# +############################################################################### +MAN5 := $(DESTDIR)$(MANDIR)/man5 +MAN8 := $(DESTDIR)$(MANDIR)/man8 + +install: all + $(INSTALL) -D -m 0755 cachefilesd $(DESTDIR)$(SBINDIR)/cachefilesd + $(INSTALL) -D -m 0644 cachefilesd.conf $(DESTDIR)$(ETCDIR)/cachefilesd.conf + $(INSTALL) -D -m 0644 cachefilesd.conf.5 $(MAN5)/cachefilesd.conf.5 + $(INSTALL) -D -m 0644 cachefilesd.8 $(MAN8)/cachefilesd.8 + +############################################################################### +# +# Clean up +# +############################################################################### +clean: + $(RM) cachefilesd + $(RM) *.o *~ + $(RM) debugfiles.list debugsources.list + +distclean: clean + $(RM) -r rpmbuild $(TARBALL) + +############################################################################### +# +# Generate a tarball +# +############################################################################### +$(ZTARBALL): + git archive --prefix=cachefilesd-$(VERSION)/ --format tar -o $(TARBALL) HEAD + bzip2 -9 <$(TARBALL) >$(ZTARBALL) + +tarball: $(ZTARBALL) + +############################################################################### +# +# Generate an RPM +# +############################################################################### +SRCBALL := rpmbuild/SOURCES/$(TARBALL) +ZSRCBALL := rpmbuild/SOURCES/$(ZTARBALL) + +BUILDID := .local +dist := $(word 2,$(shell grep -r "^%dist" /etc/rpm /usr/lib/rpm)) +release := $(word 2,$(shell grep ^Release: $(SPECFILE))) +release := $(subst %{?dist},$(dist),$(release)) +release := $(subst %{?buildid},$(BUILDID),$(release)) +rpmver := $(VERSION)-$(release) +SRPM := rpmbuild/SRPMS/cachefilesd-$(rpmver).src.rpm + +RPMBUILDDIRS := \ + --define "_srcrpmdir $(CURDIR)/rpmbuild/SRPMS" \ + --define "_rpmdir $(CURDIR)/rpmbuild/RPMS" \ + --define "_sourcedir $(CURDIR)/rpmbuild/SOURCES" \ + --define "_specdir $(CURDIR)/rpmbuild/SPECS" \ + --define "_builddir $(CURDIR)/rpmbuild/BUILD" \ + --define "_buildrootdir $(CURDIR)/rpmbuild/BUILDROOT" + +RPMFLAGS := \ + --define "buildid $(BUILDID)" + +rpm: + mkdir -p rpmbuild + chmod ug-s rpmbuild + mkdir -p rpmbuild/{SPECS,SOURCES,BUILD,BUILDROOT,RPMS,SRPMS} + git archive --prefix=cachefilesd-$(VERSION)/ --format tar -o $(SRCBALL) HEAD + bzip2 -9 <$(SRCBALL) >$(ZSRCBALL) + rpmbuild -ts $(ZSRCBALL) --define "_srcrpmdir rpmbuild/SRPMS" $(RPMFLAGS) + rpmbuild --rebuild $(SRPM) $(RPMBUILDDIRS) $(RPMFLAGS) + +rpmlint: rpm + rpmlint $(SRPM) $(CURDIR)/rpmbuild/RPMS/*/cachefilesd-{,debuginfo-}$(rpmver).*.rpm + +############################################################################### +# +# Build debugging +# +############################################################################### +show_vars: + @echo VERSION=$(VERSION) + @echo TARBALL=$(TARBALL) + @echo BUILDFOR=$(BUILDFOR) diff --git a/README b/README new file mode 100644 index 0000000..6ed7de2 --- /dev/null +++ b/README @@ -0,0 +1,399 @@ + =============================================== + CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM + =============================================== + +Contents: + + (*) Overview. + + (*) Requirements. + + (*) Configuration. + + (*) Starting the cache. + + (*) Things to avoid. + + (*) Cache culling. + + (*) Cache structure. + + (*) Security model and SELinux. + + +======== +OVERVIEW +======== + +CacheFiles is a caching backend that's meant to use as a cache a directory on +an already mounted filesystem of a local type (such as Ext3). + +CacheFiles uses a userspace daemon to do some of the cache management - such as +reaping stale nodes and culling. This is called cachefilesd and lives in +/sbin. + +The filesystem and data integrity of the cache are only as good as those of the +filesystem providing the backing services. Note that CacheFiles does not +attempt to journal anything since the journalling interfaces of the various +filesystems are very specific in nature. + +CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for +communication with the daemon. Only one thing may have this open at once, and +whilst it is open, a cache is at least partially in existence. The daemon +opens this and sends commands down it to control the cache. + +CacheFiles is currently limited to a single cache. + +CacheFiles attempts to maintain at least a certain percentage of free space on +the filesystem, shrinking the cache by culling the objects it contains to make +space if necessary - see the "Cache Culling" section. This means it can be +placed on the same medium as a live set of data, and will expand to make use of +spare space and automatically contract when the set of data requires more +space. + + +============ +REQUIREMENTS +============ + +The use of CacheFiles and its daemon requires the following features to be +available in the system and in the cache filesystem: + + - dnotify. + + - extended attributes (xattrs). + + - openat() and friends. + + - bmap() support on files in the filesystem (FIBMAP ioctl). + + - The use of bmap() to detect a partial page at the end of the file. + +It is strongly recommended that the "dir_index" option is enabled on Ext3 +filesystems being used as a cache. + + +============= +CONFIGURATION +============= + +The cache is configured by a script in /etc/cachefilesd.conf. These commands +set up cache ready for use. The following script commands are available: + + (*) brun % + (*) bcull % + (*) bstop % + (*) frun % + (*) fcull % + (*) fstop % + + Configure the culling limits. Optional. See the section on culling + The defaults are 7% (run), 5% (cull) and 1% (stop) respectively. + + The commands beginning with a 'b' are file space (block) limits, those + beginning with an 'f' are file count limits. + + (*) dir + + Specify the directory containing the root of the cache. Mandatory. + + (*) tag + + Specify a tag to FS-Cache to use in distinguishing multiple caches. + Optional. The default is "CacheFiles". + + (*) culltable + + Specify the size of the tables holding the lists of cullable objects in + the cache. The bigger the number, the faster and more smoothly that + culling can proceed when there are many objects in the cache, but the + more memory will be consumed by cachefilesd. + + The quantity is specified as log2 of the size actually required, for + example 12 indicates a table of 4096 entries and 13 indicates 8192 + entries. The permissible values are between 12 and 20, the latter + indicating 1048576 entries. The default is 12. + + (*) resume_thresholds + + Scanning to refill the cull table is suspended when all the objects in + a cache are pinned by a live network filesystem in the kernel and + there's nothing available to cull. Scanning is resumed when the kernel + releases sufficient objects that either the number of objects released + exceeds the files parameter here or the cumulative i_blocks values + exceed the blocks parameter. Either threshold can be disabled by + specifying it as "-". + + The default is to ignore the block threshold and to resume when five or + more files have been released. + + (*) debug + + Specify a numeric bitmask to control debugging in the kernel module. + Optional. The default is zero (all off). + + +================== +STARTING THE CACHE +================== + +The cache is started by running the daemon. The daemon opens the cache proc +file, configures the cache and tells it to begin caching. At that point the +cache binds to fscache and the cache becomes live. + +The daemon is run as follows: + + /sbin/cachefilesd [-d]* [-s] [-n] [-N] [-f ] + +The flags are: + + (*) -d + + Increase the debugging level. This can be specified multiple times and + is cumulative with itself. + + (*) -s + + Send messages to stderr instead of syslog. + + (*) -n + + Don't daemonise and go into background. + + (*) -N + + Disable culling and scanning to fill the cull table. + + (*) -f + + Use an alternative configuration file rather than the default one. + + +=============== +THINGS TO AVOID +=============== + +Do not mount other things within the cache as this will cause problems. The +kernel module contains its own very cut-down path walking facility that ignores +mountpoints, but the daemon can't avoid them. + +Do not create, rename or unlink files and directories in the cache whilst the +cache is active, as this may cause the state to become uncertain. + +Renaming files in the cache might make objects appear to be other objects (the +filename is part of the lookup key). + +Do not change or remove the extended attributes attached to cache files by the +cache as this will cause the cache state management to get confused. + +Do not create files or directories in the cache, lest the cache get confused or +serve incorrect data. + +Do not chmod files in the cache. The module creates things with minimal +permissions to prevent random users being able to access them directly. + + +============= +CACHE CULLING +============= + +The cache may need culling occasionally to make space. This involves +discarding objects from the cache that have been used less recently than +anything else. Culling is based on the access time of data objects. Empty +directories are culled if not in use. + +Cache culling is done on the basis of the percentage of blocks and the +percentage of files available in the underlying filesystem. There are six +"limits": + + (*) brun + (*) frun + + If the amount of free space and the number of available files in the cache + rises above both these limits, then culling is turned off. + + (*) bcull + (*) fcull + + If the amount of available space or the number of available files in the + cache falls below either of these limits, then culling is started. + + (*) bstop + (*) fstop + + If the amount of available space or the number of available files in the + cache falls below either of these limits, then no further allocation of + disk space or files is permitted until culling has raised things above + these limits again. + +These must be configured thusly: + + 0 <= bstop < bcull < brun < 100 + 0 <= fstop < fcull < frun < 100 + +Note that these are percentages of available space and available files, and do +_not_ appear as 100 minus the percentage displayed by the "df" program. + +The userspace daemon scans the cache to build up a table of cullable objects. +These are then culled in least recently used order. A new scan of the cache is +started as soon as space is made in the table. Objects will be skipped if +their atimes have changed or if the kernel module says it is still using them. + + +=============== +CACHE STRUCTURE +=============== + +The CacheFiles module will create two directories in the directory it was +given: + + (*) cache/ + + (*) graveyard/ + +The active cache objects all reside in the first directory. The CacheFiles +kernel module moves any retired or culled objects that it can't simply unlink +to the graveyard from which the daemon will actually delete them. + +The daemon uses dnotify to monitor the graveyard directory, and will delete +anything that appears therein. + + +The module represents index objects as directories with the filename "I..." or +"J...". Note that the "cache/" directory is itself a special index. + +Data objects are represented as files if they have no children, or directories +if they do. Their filenames all begin "D..." or "E...". If represented as a +directory, data objects will have a file in the directory called "data" that +actually holds the data. + +Special objects are similar to data objects, except their filenames begin +"S..." or "T...". + + +If an object has children, then it will be represented as a directory. +Immediately in the representative directory are a collection of directories +named for hash values of the child object keys with an '@' prepended. Into +this directory, if possible, will be placed the representations of the child +objects: + + INDEX INDEX INDEX DATA FILES + ========= ========== ================================= ================ + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400 + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry + + +If the key is so long that it exceeds NAME_MAX with the decorations added on to +it, then it will be cut into pieces, the first few of which will be used to +make a nest of directories, and the last one of which will be the objects +inside the last directory. The names of the intermediate directories will have +'+' prepended: + + J1223/@23/+xy...z/+kl...m/Epqr + + +Note that keys are raw data, and not only may they exceed NAME_MAX in size, +they may also contain things like '/' and NUL characters, and so they may not +be suitable for turning directly into a filename. + +To handle this, CacheFiles will use a suitably printable filename directly and +"base-64" encode ones that aren't directly suitable. The two versions of +object filenames indicate the encoding: + + OBJECT TYPE PRINTABLE ENCODED + =============== =============== =============== + Index "I..." "J..." + Data "D..." "E..." + Special "S..." "T..." + +Intermediate directories are always "@" or "+" as appropriate. + + +Each object in the cache has an extended attribute label that holds the object +type ID (required to distinguish special objects) and the auxiliary data from +the netfs. The latter is used to detect stale objects in the cache and update +or retire them. + + +Note that CacheFiles will erase from the cache any file it doesn't recognise or +any file of an incorrect type (such as a FIFO file or a device file). + + +========================== +SECURITY MODEL AND SELINUX +========================== + +CacheFiles is implemented to deal properly with the LSM security features of +the Linux kernel and the SELinux facility. + +One of the problems that CacheFiles faces is that it is generally acting on +behalf of a process that is in a security context that is not appropriate for +accessing the cache - either because the files in the cache are inaccessible to +that process, or because if the process creates a file in the cache, it'll be +inaccessible to other processes. + +The way CacheFiles works is to temporarily change the security context (fsuid, +fsgid and actor security label) that the process acts as - without changing the +security context of the process when it the target of an operation performed by +some other process (so signalling and suchlike still work correctly). + + +When the CacheFiles module is asked to bind to its cache, it: + + (1) Finds the security label attached to the root cache directory and uses + that as the security label with which it will create files. By default, + this is: + + cachefiles_var_t + + (2) Finds the security label of the process which issued the bind request + (presumed to be the cachefilesd daemon), which by default will be: + + cachefilesd_t + + and asks LSM to supply a security ID as which it should act given the + daemon's label. By default, this will be: + + cachefiles_kernel_t + + SELinux transitions the daemon's security ID to the module's security ID + based on a rule of this form in the policy. + + type_transition kernel_t : process ; + + For instance: + + type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t; + + +The module's security ID gives it permission to create, move and remove files +and directories in the cache, to find and access directories and files in the +cache, to set and access extended attributes on cache objects, and to read and +write files in the cache. + +The daemon's security ID gives it only a very restricted set of permissions: it +may scan directories, stat files and erase files and directories. It may +not read or write files in the cache, and so it is precluded from accessing the +data cached therein; nor is it permitted to create new files in the cache. + + +The policy source files are for reference installed as: + + /usr/share/doc/cachefilesd/cachefilesd.te + /usr/share/doc/cachefilesd/cachefilesd.fc + /usr/share/doc/cachefilesd/cachefilesd.if + +By default, the cache is located in /var/cache/fscache, but if it is desirable +that it should be elsewhere, than either the above policy files must be +altered, or an auxiliary policy must be installed to label the alternate +location of the cache. + +For instructions on how to add an auxiliary policy to enable the cache to be +located elsewhere when SELinux is in enforcing mode, please see: + + /usr/share/doc/cachefilesd/move-cache.txt + +When the cachefilesd RPM is installed; alternatively, the document can be found +in the sources. diff --git a/cachefilesd.8 b/cachefilesd.8 new file mode 100644 index 0000000..3db9f9d --- /dev/null +++ b/cachefilesd.8 @@ -0,0 +1,47 @@ +.\" -*- nroff -*- +.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. +.\" Written by David Howells (dhowells@redhat.com) +.\" +.\" This program is free software; you can redistribute it and/or +.\" modify it under the terms of the GNU General Public License +.\" as published by the Free Software Foundation; either version +.\" 2 of the License, or (at your option) any later version. +.\" +.TH cachefilesd 8 "14 November 2006" +.SH NAME +cachefilesd \- CacheFiles userspace management daemon +.SH SYNOPSIS +.B "cachefilesd [-d]* [-s] [-n] [-N] [-f ]" +.SH DESCRIPTION +The \fBcachefilesd\fP daemon manages the cache data store that is used by +network filesystems such a AFS and NFS to cache data locally on disk. +.P +The README file should be read before attempting to configure this facility: +.IP +/usr/share/doc/cachefilesd/README +.SH OPTIONS +.TP +.B -d +Turn on debugging mode (message written to stderr). +.TP +.B -s +Don't use syslog. +.TP +.B -n +Don't daemonise. +.TP +.B -N +Disable culling and scanning to fill the cull table. +.TP +.BI "-p " +Use an alternate PID file to /var/run/cachefilesd.pid. +.TP +.BI "-f " +Read the alternate configuration files. +.SH FILES +.BR /etc/cachefilesd.conf +.SH SEE ALSO +\fBcachefilesd.conf\fR(5), /usr/share/doc/cachefilesd/README +.SH AUTHORS +.br +David Howells diff --git a/cachefilesd.c b/cachefilesd.c new file mode 100644 index 0000000..d4d236f --- /dev/null +++ b/cachefilesd.c @@ -0,0 +1,1650 @@ +/* CacheFiles userspace management daemon + * + * Copyright (C) 2006-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Configuration file goes in /etc/cachefiles.conf and is of the form: + * + * dir /var/cache/fscache + * tag mycache + * brun 10% + * bcull 7% + * bstop 3% + * frun 10% + * fcull 7% + * fstop 3% + * + * Only "dir" is mandatory + * Blank lines and lines beginning with a hash are comments + * Trailing spaces are significant + * There is no character escaping mechanism + * NUL characters are cause for error + */ + +#define CACHEFILESD_VERSION "0.10.10" + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum objtype { + OBJTYPE_INDEX, + OBJTYPE_DATA, + OBJTYPE_SPECIAL, + OBJTYPE_INTERMEDIATE, +} objtype_t; + +struct object { + struct object *parent; /* parent dir of this object (or NULL) */ + struct object *children; /* children of this object */ + struct object *next; /* next child of parent */ + struct object *prev; /* previous child of parent */ + DIR *dir; /* this object's directory (or NULL for data obj) */ + ino_t ino; /* inode number of this object */ + int usage; /* number of users of this object */ + bool empty; /* T if directory empty */ + bool new; /* T if object new */ + bool cullable; /* T if object now cullable */ + objtype_t type; /* type of object */ + time_t atime; /* last access time on this object */ + char name[1]; /* name of this object */ +}; + +/* cache root representation */ +static struct object root = { + .parent = NULL, + .usage = 2, + .type = OBJTYPE_INDEX, +}; + +static int nobjects = 1; +static int nopendir; + +/* current scan point */ +static struct object *scan_cursor; +static bool scan_signalled, stop_signalled, reap_signalled; + +/* ranked order of cullable objects + * - we have two tables: one we're building and one that's full of ready to be + * culled objects + */ +static unsigned culltable_size = 4096; +static struct object **cullbuild; +static struct object **cullready; + +static unsigned nr_in_build_table; +static unsigned nr_in_ready_table; +static int ncullable; +static bool kernel_wants_cull; +static bool have_nr_releases; +static unsigned long long f_released_since_last_scan; +static unsigned long long b_released_since_last_scan; + + +static const char *configfile = "/etc/cachefilesd.conf"; +static const char *devfile = "/dev/cachefiles"; +static const char *procfile = "/proc/fs/cachefiles"; +static const char *pidfile = "/var/run/cachefilesd.pid"; +static char *cacheroot, *graveyardpath; + +static bool culling_disabled; +static bool xnolog, xopenedlog; +static int xdebug; +static int graveyardfd; +static unsigned long long brun, bcull, bstop, frun, fcull, fstop; +static unsigned long long b_resume_threshold = ULLONG_MAX; +static unsigned long long f_resume_threshold = 5; + +static const gid_t group_list[0]; + +#define cachefd 3 + +static __attribute__((noreturn)) +void version(void) +{ + printf("cachefilesd version " CACHEFILESD_VERSION "\n"); + exit(0); +} + +static __attribute__((noreturn)) +void help(void) +{ + fprintf(stderr, + "Format:\n" + " /sbin/cachefilesd [-d]* [-s] [-n] [-p ] [-f ]\n" + " /sbin/cachefilesd -v\n" + "\n" + "Options:\n" + " -d\tIncrease debugging level (cumulative)\n" + " -n\tDon't daemonise the process\n" + " -s\tMessage output to stderr instead of syslog\n" + " -p \tWrite the PID into the file\n" + " -f \n" + " -v\tPrint version and exit\n" + "\tRead the specified configuration file instead of" + " /etc/cachefiles.conf\n"); + + exit(2); +} + +static __attribute__((noreturn, format(printf, 2, 3))) +void __error(int excode, const char *fmt, ...) +{ + va_list va; + + if (xnolog) { + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); + } + else { + if (!xopenedlog) { + openlog("cachefilesd", LOG_PID, LOG_DAEMON); + xopenedlog = true; + } + + va_start(va, fmt); + vsyslog(LOG_ERR, fmt, va); + va_end(va); + + closelog(); + } + + exit(excode); +} + +#define error(FMT,...) __error(3, "Internal error: "FMT"\n" ,##__VA_ARGS__) +#define oserror(FMT,...) __error(1, FMT": errno %d (%m)\n" ,##__VA_ARGS__ ,errno) +#define cfgerror(FMT,...) __error(2, "%s:%d:"FMT"\n", configfile, lineno ,##__VA_ARGS__) +#define opterror(FMT,...) __error(2, FMT"\n" ,##__VA_ARGS__) + +static __attribute__((format(printf, 3, 4))) +void __message(int dlevel, int level, const char *fmt, ...) +{ + va_list va; + + if (dlevel <= xdebug) { + if (xnolog) { + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); + } + else if (!xnolog) { + if (!xopenedlog) { + openlog("cachefilesd", LOG_PID, LOG_DAEMON); + xopenedlog = true; + } + + va_start(va, fmt); + vsyslog(level, fmt, va); + va_end(va); + + closelog(); + } + } +} + +#define info(FMT,...) __message(0, LOG_INFO, FMT"\n" ,##__VA_ARGS__) +#define debug(DL, FMT,...) __message(DL, LOG_DEBUG, FMT"\n" ,##__VA_ARGS__) +#define notice(FMT,...) __message(0, LOG_NOTICE, FMT"\n" ,##__VA_ARGS__) + +static void open_cache(void); +static void cachefilesd(void) __attribute__((noreturn)); +static void reap_graveyard(void); +static void reap_graveyard_aux(const char *dirname); +static void read_cache_state(void); +static int is_object_in_use(const char *filename); +static void cull_file(const char *filename); +static void begin_building_cull_table(void); +static bool build_cull_table(void); +static void decant_cull_table(void); +static void insert_into_cull_table(struct object *object); +static void put_object(struct object *object); +static struct object *create_object(struct object *parent, const char *name, struct stat64 *st); +static void destroy_unexpected_object(struct object *parent, struct dirent *de); +static int get_dir_fd(struct object *dir); +static void cull_object(struct object *object); +static void cull_objects(void); + +/*****************************************************************************/ +/* + * termination request + */ +static void sigterm(int sig) +{ + stop_signalled = true; +} + +/*****************************************************************************/ +/* + * the graveyard was populated + */ +static void sigio(int sig) +{ + reap_signalled = true; +} + +/*****************************************************************************/ +/* + * redo scan after a time since the last scan turned up no results + */ +static void sigalrm(int sig) +{ + scan_signalled = true; +} + +/*****************************************************************************/ +/* + * write the PID file + */ +static void write_pidfile(void) +{ + FILE *pf; + + pf = fopen(pidfile, "w"); + if (!pf) + oserror("Unable to open PID file: %s", pidfile); + + if (fprintf(pf, "%d\n", getpid()) < 0 || + fclose(pf) == EOF) + oserror("Unable to write PID file: %s", pidfile); +} + +/*****************************************************************************/ +/* + * start up the cache and go + */ +int main(int argc, char *argv[]) +{ + struct stat st; + unsigned lineno; + ssize_t n; + size_t m; + FILE *config; + char *line, *cp; + long page_size; + int _cachefd, nullfd, opt, loop, open_max; + bool nodaemon = false; + + /* handle help request */ + if (argc == 2 && strcmp(argv[1], "--help") == 0) + help(); + + if (argc == 2 && strcmp(argv[1], "--version") == 0) + version(); + + /* parse the arguments */ + while (opt = getopt(argc, argv, "dsnNf:p:v"), + opt != EOF + ) { + switch (opt) { + case 'd': + /* turn on debugging */ + xdebug++; + break; + + case 's': + /* disable syslog writing */ + xnolog = true; + break; + + case 'n': + /* don't daemonise */ + nodaemon = true; + break; + + case 'N': + /* disable culling */ + culling_disabled = true; + break; + + case 'f': + /* use a specific config file */ + configfile = optarg; + break; + + case 'p': + /* use a specific PID file */ + pidfile = optarg; + break; + + case 'v': + /* print the version and exit */ + version(); + + default: + opterror("Unknown commandline option '%c'", optopt); + } + } + + /* read various parameters */ + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + oserror("Unable to get page size"); + + open_max = sysconf(_SC_OPEN_MAX); + if (open_max < 0) + oserror("Unable to get max open files"); + + /* become owned by root */ + if (setgroups(sizeof(group_list) / sizeof(gid_t), group_list) < 0) + oserror("Unable to clear the supplementary groups"); + + if (setresuid(0, 0, 0) < 0) + oserror("Unable to set UID to 0"); + + if (setresgid(0, 0, 0) < 0) + oserror("Unable to set GID to 0"); + + /* just in case... */ + sync(); + + /* open the devfile or the procfile on fd 3 */ + _cachefd = open(devfile, O_RDWR); + if (_cachefd < 0) { + if (errno != ENOENT) + oserror("Unable to open %s", devfile); + + _cachefd = open(procfile, O_RDWR); + if (_cachefd < 0) { + if (errno == ENOENT) + oserror("Unable to open %s", devfile); + oserror("Unable to open %s", procfile); + } + } + + if (_cachefd != cachefd) { + if (dup2(_cachefd, cachefd) < 0) + oserror("Unable to transfer cache fd to 3"); + if (close(_cachefd) < 0) + oserror("Close of original cache fd failed"); + } + + /* open /dev/null */ + nullfd = open("/dev/null", O_RDWR); + if (nullfd < 0) + oserror("Unable to open /dev/null"); + + /* open the config file */ + config = fopen(configfile, "r"); + if (!config) + oserror("Unable to open %s", configfile); + + /* read the configuration */ + m = 0; + line = NULL; + lineno = 0; + while (n = getline(&line, &m, config), + n != EOF + ) { + lineno++; + + if (n >= page_size) + cfgerror("Line too long"); + + if (memchr(line, 0, n) != 0) + cfgerror("Line contains a NUL character"); + + /* eat blank lines, leading white space and trailing NL */ + cp = strchr(line, '\n'); + if (!cp) + cfgerror("Unterminated line"); + + if (cp == line) + continue; + *cp = '\0'; + + for (cp = line; isspace(*cp); cp++) {;} + + if (!*cp) + continue; + + /* eat full line comments */ + if (*cp == '#') + continue; + + /* allow culling to be disabled */ + if (memcmp(cp, "nocull", 6) == 0 && + (!cp[6] || isspace(cp[6]))) { + culling_disabled = true; + } + + /* note the cull table size command */ + if (memcmp(cp, "culltable", 9) == 0 && isspace(cp[9])) { + unsigned long cts; + char *sp; + + for (sp = cp + 10; isspace(*sp); sp++) {;} + + cts = strtoul(sp, &sp, 10); + if (*sp) + cfgerror("Invalid cull table size number"); + if (cts < 12 || cts > 20) + cfgerror("Log2 of cull table size must be 12 <= N <= 20"); + culltable_size = 1 << cts; + continue; + } + + /* Note the suspension resume released file count thresholds + * ("-" to disable a threshold). + */ + if (memcmp(cp, "resume_thresholds", 18) == 0 && isspace(cp[18])) { + unsigned long long b_thresh, f_thresh; + char *sp; + + for (sp = cp + 18; isspace(*sp); sp++) {;} + + if (*sp == '-') { + sp++; + b_thresh = ULLONG_MAX; + } else { + b_thresh = strtoul(sp, &sp, 10); + } + + if (!*sp || !isspace(*sp)) + cfgerror("Error parsing resume threshold (blocks)"); + if (b_thresh == 0) + cfgerror("Invalid resume threshold (blocks)"); + for (; isspace(*sp); sp++) {;} + + if (*sp == '-') { + sp++; + f_thresh = ULLONG_MAX; + } else { + f_thresh = strtoul(sp, &sp, 10); + if (*sp) + cfgerror("Error parsing resume threshold (files)"); + if (f_thresh == 0) + cfgerror("Invalid resume threshold (files)"); + } + + b_resume_threshold = b_thresh; + f_resume_threshold = f_thresh; + continue; + } + + /* note the dir command */ + if (memcmp(cp, "dir", 3) == 0 && isspace(cp[3])) { + char *sp; + + for (sp = cp + 4; isspace(*sp); sp++) {;} + + if (strlen(sp) > PATH_MAX - 10) + cfgerror("Cache pathname is too long"); + + if (stat(sp, &st) < 0) + oserror("Can't confirm cache location"); + + cacheroot = strdup(sp); + if (!cacheroot) + oserror("Can't copy cache name"); + } + + /* object to the bind command */ + if (memcmp(cp, "bind", 4) == 0 && + (!cp[4] || isspace(cp[4]))) + cfgerror("'bind' command not permitted"); + + /* pass the config options over to the kernel module */ + if (write(cachefd, line, strlen(line)) < 0) { + if (errno == -ENOMEM || errno == -EIO) + oserror("CacheFiles"); + cfgerror("CacheFiles gave config error: %m"); + } + } + + if (line) + free(line); + + if (!feof(config)) + oserror("Unable to read %s", configfile); + + if (fclose(config) == EOF) + oserror("Unable to close %s", configfile); + + /* allocate the cull tables */ + if (!culling_disabled) { + cullbuild = calloc(culltable_size, sizeof(cullbuild[0])); + if (!cullbuild) + oserror("calloc"); + + cullready = calloc(culltable_size, sizeof(cullready[0])); + if (!cullready) + oserror("calloc"); + } + + /* leave stdin, stdout, stderr and cachefd open only */ + if (nullfd != 0) + dup2(nullfd, 0); + if (nullfd != 1) + dup2(nullfd, 1); + + for (loop = 4; loop < open_max; loop++) + close(loop); + + /* set up a connection to syslog whilst we still can (the bind command + * will give us our own namespace with no /dev/log */ + openlog("cachefilesd", LOG_PID, LOG_DAEMON); + xopenedlog = true; + info("About to bind cache"); + + /* now issue the bind command */ + if (write(cachefd, "bind", 4) < 0) + oserror("CacheFiles bind failed"); + + info("Bound cache"); + + /* we now have a live cache - daemonise the process */ + if (!nodaemon) { + if (!xdebug) + dup2(1, 2); + + switch (fork()) { + case -1: + oserror("fork"); + + case 0: + if (xdebug) + fprintf(stderr, "Daemon PID %d\n", getpid()); + + signal(SIGTTIN, SIG_IGN); + signal(SIGTTOU, SIG_IGN); + signal(SIGTSTP, SIG_IGN); + setsid(); + write_pidfile(); + cachefilesd(); + + default: + break; + } + } + else { + cachefilesd(); + } + + exit(0); +} + +/*****************************************************************************/ +/* + * open the cache directories + */ +static void open_cache(void) +{ + struct statfs sfs; + char buffer[PATH_MAX + 1]; + + /* open the cache directory so we can scan it */ + snprintf(buffer, PATH_MAX, "%s/cache", cacheroot); + + root.dir = opendir(buffer); + if (!root.dir) + oserror("Unable to open cache directory"); + nopendir++; + + /* open the graveyard so we can set a notification on it */ + if (asprintf(&graveyardpath, "%s/graveyard", cacheroot) < 0) + oserror("Unable to copy graveyard name"); + + graveyardfd = open(graveyardpath, O_DIRECTORY); + if (graveyardfd < 0) + oserror("Unable to open graveyard directory"); + + if (fstatfs(graveyardfd, &sfs) < 0) + oserror("Unable to stat cache filesystem"); + + if (sfs.f_bsize + 1 == 0 || + sfs.f_blocks + 1 == 0 || + sfs.f_bfree + 1 == 0 || + sfs.f_bavail + 1 == 0) + error("Backing filesystem returns unusable statistics through fstatfs()"); +} + +/*****************************************************************************/ +/* + * manage the cache + */ +static void cachefilesd(void) +{ + sigset_t sigs, osigs; + bool scanning_suspended = false; + bool scan_in_progress = false; + + struct pollfd pollfds[1] = { + [0] = { + .fd = cachefd, + .events = POLLIN, + }, + }; + + notice("Daemon Started"); + + /* open the cache directories */ + open_cache(); + + /* We need to be able to disable signals that we need to check for + * before calling poll so that we don't race and miss something. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGIO); + sigaddset(&sigs, SIGINT); + sigaddset(&sigs, SIGTERM); + sigaddset(&sigs, SIGALRM); + + signal(SIGTERM, sigterm); + signal(SIGINT, sigterm); + + /* check the graveyard for graves */ + reap_graveyard(); + + while (!stop_signalled) { + bool do_cull = false; + + debug(3, "Loop %sbuild=%d ready=%d susp=%u scan=%u", + culling_disabled ? "NOCULL " : "", + nr_in_build_table, nr_in_ready_table, + scanning_suspended, scan_in_progress); + + read_cache_state(); + + if (!culling_disabled) { + /* Determine if we're going to need to start a new scan + * to refill the cull table. We want to do this if the + * secondary cull table is less than half full - but + * overriding that, we don't want to do this if we know + * there's insufficient cullables to make it worth + * while. + */ + if (!scan_in_progress) { + bool begin_scan = false; + + debug(1, "Consider scan %d/%d", + nr_in_build_table, culltable_size / 2); + + if (nr_in_build_table < culltable_size / 2) { + debug(1, "Want to scan"); + begin_scan = true; + } + + if (begin_scan && scanning_suspended) { + debug(1, "Scanning suspended"); + if (have_nr_releases) { + if (f_released_since_last_scan < + f_resume_threshold && + b_released_since_last_scan < + b_resume_threshold) + begin_scan = false; + } else { + begin_scan = scan_signalled; + } + } + + if (begin_scan) { + debug(1, "Beginning a scan"); + begin_building_cull_table(); + scan_in_progress = true; + scanning_suspended = false; + scan_signalled = false; + f_released_since_last_scan = 0; + b_released_since_last_scan = 0; + } + } + + /* Determine if there's anything we can actually cull yet if + * the kernel is calling for space. + */ + if (kernel_wants_cull) { + debug(1, "Want to cull"); + if (nr_in_ready_table > 0) + do_cull = true; + } + } + + /* We block the signals across the checks for reap, cull and + * scan initiation before polling so that we sleep without + * racing against the signal handlers. + */ + if (!scan_in_progress && !reap_signalled && !do_cull) { + if (sigprocmask(SIG_BLOCK, &sigs, &osigs) < 0) + oserror("Unable to block signals"); + + if (!reap_signalled && + !stop_signalled && + !scan_signalled) { + debug(1, "Poll"); + if (ppoll(pollfds, 1, NULL, &osigs) < 0 && + errno != EINTR) + oserror("Unable to suspend process"); + } + + if (sigprocmask(SIG_UNBLOCK, &sigs, NULL) < 0) + oserror("Unable to unblock signals"); + continue; + } + + if (!culling_disabled) { + if (do_cull) + cull_objects(); + + if (scan_in_progress) { + scan_in_progress = build_cull_table(); + if (!scan_in_progress) { + /* Scan complete. + * + * If the scan didn't produce a full + * table then don't repeat the scan + * until something gets released by the + * kernel. + */ + if (nr_in_build_table < culltable_size) { + debug(1, "Suspend scanning"); + scanning_suspended = true; + if (!have_nr_releases) { + signal(SIGALRM, sigalrm); + alarm(30); + } + } + } + } + + if (!scan_in_progress) { + if (nr_in_ready_table <= culltable_size / 2 + 2 && + nr_in_build_table > 0) { + debug(1, "Decant"); + decant_cull_table(); + } + } + } + + if (reap_signalled) + reap_graveyard(); + } + + notice("Daemon Terminated"); + exit(0); +} + +/*****************************************************************************/ +/* + * check the graveyard directory for graves to delete + */ +static void reap_graveyard(void) +{ + /* set a one-shot notification to catch more graves appearing */ + reap_signalled = false; + signal(SIGIO, sigio); + if (fcntl(graveyardfd, F_NOTIFY, DN_CREATE) < 0) + oserror("unable to set notification on graveyard"); + + reap_graveyard_aux(graveyardpath); +} + +/*****************************************************************************/ +/* + * recursively remove dead stuff from the graveyard + */ +static void reap_graveyard_aux(const char *dirname) +{ + struct dirent *de; + size_t len; + char name[NAME_MAX + 1]; + bool deleted; + DIR *dir; + + if (chdir(dirname) < 0) + oserror("chdir failed"); + + dir = opendir("."); + if (!dir) + oserror("Unable to open grave dir %s", dirname); + + do { + /* removing directory entries may cause us to skip when reading + * them */ + rewinddir(dir); + deleted = false; + + while (errno = 0, + de = readdir(dir), + de != NULL + ) { + /* ignore "." and ".." */ + if (de->d_name[0] == '.') { + if (de->d_name[1] == '\0') + continue; + if (de->d_name[1] == '.' || + de->d_name[1] == '\0') + continue; + } + + deleted = true; + + /* attempt to unlink non-directory files */ + if (de->d_type != DT_DIR) { + debug(1, "unlink %s", de->d_name); + if (unlink(de->d_name) == 0) + continue; + if (errno != EISDIR) + oserror("Unable to unlink file %s", + de->d_name); + } + + /* recurse into directories */ + len = strlen(de->d_name) + 1; + if (len > sizeof(name)) { + errno = ENAMETOOLONG; + oserror("Name longer than NAME_MAX \"%s\"", + de->d_name); + } + memcpy(name, de->d_name, len); + + reap_graveyard_aux(name); + + /* which we then attempt to remove */ + debug(1, "rmdir %s", name); + if (rmdir(name) < 0) + oserror("Unable to remove dir %s", name); + } + + if (errno != 0) + oserror("Unable to read dir %s", dirname); + } while (deleted); + + closedir(dir); + + if (chdir("..") < 0) + oserror("Unable to chdir to .."); +} + +/*****************************************************************************/ +/* + * read the cache state + */ +static void read_cache_state(void) +{ + char buffer[4096 + 1], *tok, *next, *arg; + int n; + + n = read(cachefd, buffer, sizeof(buffer) - 1); + if (n < 0) + oserror("Unable to read cache state"); + buffer[n] = '\0'; + + debug(3, "KERNEL: %s", buffer); + + tok = buffer; + do { + next = strpbrk(tok, " \t"); + if (next) + *next++ = '\0'; + + arg = strchr(tok, '='); + if (arg) { + *arg++ = '\0'; + } else { + debug(0, "Warning: malformed output from kernel, missing arg to [%s]", tok); + continue; + } + + if (strcmp(tok, "cull") == 0) { + kernel_wants_cull = (strtoul(arg, NULL, 0) != 0); + } else if (strcmp(tok, "brun") == 0) { + brun = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "bcull") == 0) { + bcull = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "bstop") == 0) { + bstop = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "frun") == 0) { + frun = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "fcull") == 0) { + fcull = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "fstop") == 0) { + fstop = strtoull(arg, NULL, 16); + } else if (strcmp(tok, "breleased") == 0) { + b_released_since_last_scan += strtoull(arg, NULL, 16); + have_nr_releases = true; + } else if (strcmp(tok, "freleased") == 0) { + f_released_since_last_scan += strtoull(arg, NULL, 16); + have_nr_releases = true; + } + + } while ((tok = next)); +} + +/*****************************************************************************/ +/* + * find out if an object in the current working directory is in use + */ +static int is_object_in_use(const char *filename) +{ + char buffer[NAME_MAX + 30]; + int ret, n; + + n = sprintf(buffer, "inuse %s", filename); + + /* command the module */ + ret = write(cachefd, buffer, n); + if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY) + oserror("Failed to check object's in-use state"); + + return ret < 0 && errno == EBUSY ? 1 : 0; +} + +/*****************************************************************************/ +/* + * cull a file representing an object in the current working directory + * - requests CacheFiles rename the object "/filename" to the graveyard + */ +static void cull_file(const char *filename) +{ + char buffer[NAME_MAX + 30]; + int ret, n; + + n = sprintf(buffer, "cull %s", filename); + + /* command the module */ + ret = write(cachefd, buffer, n); + if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY) + oserror("Failed to cull object"); +} + +/*****************************************************************************/ +/* + * create an object from a name and stat details and attach to the parent, if + * it doesn't already exist + */ +static struct object *create_object(struct object *parent, + const char *name, + struct stat64 *st) +{ + struct object *object, *p, *pr; + int len; + + /* see if the parent object already holds a representation of this + * one */ + pr = NULL; + for (p = parent->children; p; pr = p, p = p->next) { + if (p->ino <= st->st_ino) { + if (p->ino == st->st_ino) { + /* it does */ + p->usage++; + return p; + } + + break; + } + } + + /* allocate the object + * - note that struct object reserves space for NUL directly + */ + len = strlen(name); + + object = calloc(1, sizeof(struct object) + len); + if (!object) + oserror("Unable to alloc object"); + + object->usage = 1; + object->new = true; + + object->ino = st->st_ino; + object->atime = st->st_atime; + memcpy(object->name, name, len + 1); + + switch (object->name[0]) { + case 'I': + case 'J': + object->type = OBJTYPE_INDEX; + break; + case 'D': + case 'E': + object->type = OBJTYPE_DATA; + break; + case 'S': + case 'T': + object->type = OBJTYPE_SPECIAL; + break; + case '+': + case '@': + object->type = OBJTYPE_INTERMEDIATE; + break; + default: + error("Unexpected file type '%c'", object->name[0]); + } + + /* link into the parent's list */ + parent->usage++; + object->parent = parent; + object->prev = pr; + object->next = p; + if (pr) + pr->next = object; + else + parent->children = object; + if (p) + p->prev = object; + + nobjects++; + return object; +} + +/*****************************************************************************/ +/* + * free up an object, unlinking it from its parent + */ +static void put_object(struct object *object) +{ + struct object *parent; + + if (--object->usage > 0) + return; + + nobjects--; + + if (object->cullable) + ncullable--; + + /* destroy the object */ + if (object == &root) + error("Can't destroy root object representation"); + + if (object->children) + error("Destroying object with children: '%s'", object->name); + + if (object->dir) { + closedir(object->dir); + nopendir--; + } + + if (object->prev) + object->prev->next = object->next; + else + object->parent->children = object->next; + + if (object->next) + object->next->prev = object->prev; + + parent = object->parent; + + memset(object, 0x6d, sizeof(struct object)); + free(object); + + if (parent) + put_object(parent); +} + +/*****************************************************************************/ +/* + * destroy an unexpected object + */ +static void destroy_unexpected_object(struct object *parent, struct dirent *de) +{ + static unsigned uniquifier; + struct timeval tv; + char namebuf[40]; + int fd; + + fd = dirfd(parent->dir); + + if (de->d_type != DT_DIR) { + if (unlinkat(fd, de->d_name, 0) < 0 && + errno != ENOENT) + oserror("Unable to unlink unexpectedly named file: %s", + de->d_name); + } + else { + gettimeofday(&tv, NULL); + sprintf(namebuf, "x%lxx%xx", tv.tv_sec, uniquifier++); + + if (renameat(fd, de->d_name, graveyardfd, namebuf) < 0 && + errno != ENOENT) + oserror("Unable to rename unexpectedly named file: %s", + de->d_name); + } +} + +/*****************************************************************************/ +/* + * insert an object into the cull table if its old enough + */ +static void insert_into_cull_table(struct object *object) +{ + int y, o, m; + + if (!object) + error("NULL object pointer"); + + /* just insert if table is empty */ + if (nr_in_build_table == 0) { + object->usage++; + cullbuild[0] = object; + nr_in_build_table++; + return; + } + + /* insert somewhere if table is not full */ + if (nr_in_build_table < culltable_size) { + object->usage++; + + /* just insert at end if new oldest object */ + if (object->atime <= cullbuild[nr_in_build_table - 1]->atime) { + cullbuild[nr_in_build_table] = object; + nr_in_build_table++; + return; + } + + /* insert at front if new newest object */ + if (object->atime > cullbuild[0]->atime) { + memmove(&cullbuild[1], + &cullbuild[0], + nr_in_build_table * sizeof(cullbuild[0])); + + cullbuild[0] = object; + nr_in_build_table++; + return; + } + + /* if only two objects in list then insert between them */ + if (nr_in_build_table == 2) { + cullbuild[2] = cullbuild[1]; + cullbuild[1] = object; + nr_in_build_table++; + return; + } + + /* insert somewhere in between front and back elements + * of a three-plus object list + * - oldest_build == #objects_currently_in_list + */ + y = 1; + o = nr_in_build_table - 1; + + do { + m = (y + o) / 2; + + if (object->atime > cullbuild[m]->atime) + o = m; + else + y = m + 1; + + } while (y < o); + + memmove(&cullbuild[y + 1], + &cullbuild[y], + (nr_in_build_table - y) * sizeof(cullbuild[0])); + + cullbuild[y] = object; + nr_in_build_table++; + return; + } + + /* if table is full then insert only if older than newest */ + if (nr_in_build_table > culltable_size) + error("Cull table overfull"); + + if (object->atime >= cullbuild[0]->atime) + return; + + /* newest object in table will be displaced by this one */ + put_object(cullbuild[0]); + cullbuild[0] = (void *)(0x6b000000 | __LINE__); + object->usage++; + + /* place directly in first slot if second is older */ + if (object->atime >= cullbuild[1]->atime) { + cullbuild[0] = object; + return; + } + + /* shift everything up one if older than oldest */ + if (object->atime <= cullbuild[culltable_size - 1]->atime) { + memmove(&cullbuild[0], + &cullbuild[1], + (culltable_size - 1) * sizeof(cullbuild[0])); + + cullbuild[culltable_size - 1] = object; + return; + } + + /* search the table to find the insertion point + * - it will be between the first and last the slots + * - we know second is younger + */ + cullbuild[0] = cullbuild[1]; + + y = 2; + o = culltable_size - 1; + + do { + m = (y + o) / 2; + + if (object->atime >= cullbuild[m]->atime) + o = m; + else + y = m + 1; + + } while (y < o); + + if (y == 2) { + cullbuild[1] = object; + return; + } + + memmove(&cullbuild[1], + &cullbuild[2], + (y - 2) * sizeof(cullbuild[0])); + + cullbuild[y - 1] = object; +} + +/*****************************************************************************/ +/* + * Begin a scan to build a cull table. + */ +static void begin_building_cull_table(void) +{ + debug(1, "Refilling cull table"); + root.usage++; + scan_cursor = &root; +} + +/*****************************************************************************/ +/* + * Do the next step in building up the cull table. Returns false upon + * completion of a scan. + */ +static bool build_cull_table(void) +{ + struct dirent *de; + struct object *curr, *child; + struct stat64 st; + unsigned loop; + int fd; + + curr = scan_cursor; + + if (!curr->dir) { + curr->empty = true; + + fd = openat(dirfd(curr->parent->dir), curr->name, O_DIRECTORY); + if (fd < 0) { + if (errno != ENOENT) + oserror("Failed to open directory"); + goto dir_read_complete; + } + + curr->dir = fdopendir(fd); + if (!curr->dir) + oserror("Failed to open directory"); + + nopendir++; + } + + debug(2, "--> build_cull_table({%s})", curr->name); + + if (fchdir(dirfd(curr->dir)) < 0) + oserror("Failed to change current directory"); + +next: + /* read the next directory entry */ + errno = 0; + de = readdir(curr->dir); + if (!de) { + if (errno == 0 || errno == ENOENT) + goto dir_read_complete; + oserror("Unable to read directory"); + } + + if (de->d_name[0] == '.') { + if (!de->d_name[1] || + (de->d_name[1] == '.' && !de->d_name[2])) + goto next; + } + + debug(2, "readdir '%s'", de->d_name); + + switch (de->d_type) { + case DT_UNKNOWN: + case DT_DIR: + case DT_REG: + break; + default: + oserror("readdir returned unsupported type %d", de->d_type); + } + + /* delete any funny looking files */ + if (memchr("IDSJET+@", de->d_name[0], 8) == NULL) + goto found_unexpected_object; + + /* see if this object is already known to us */ + if (fstatat64(dirfd(curr->dir), de->d_name, &st, 0) < 0) { + if (errno == ENOENT) + goto next; + oserror("Failed to stat directory"); + } + + if (!S_ISDIR(st.st_mode) && + (!S_ISREG(st.st_mode) || + de->d_name[0] == 'I' || + de->d_name[0] == 'J' || + de->d_name[0] == '@' || + de->d_name[0] == '+')) + goto found_unexpected_object; + + /* create a representation for this object */ + child = create_object(curr, de->d_name, &st); + if (!child && errno == ENOENT) + goto next; + + curr->empty = false; + + if (!child) + oserror("Unable to create object"); + + /* we consider culling objects at the transition from index object to + * non-index object */ + switch (child->type) { + case OBJTYPE_DATA: + case OBJTYPE_SPECIAL: + if (!child->new) { + /* the child appears to have been retained in the + * culling table already, so we see if it should be + * removed therefrom + */ + debug(2, "- old child"); + + if (st.st_atime <= child->atime) { + /* file on disk hasn't been touched */ + put_object(child); + goto next; + } + + for (loop = 0; loop < nr_in_ready_table; loop++) + if (cullready[loop] == child) + break; + + if (loop == nr_in_ready_table - 1) { + /* child was oldest object */ + cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); + put_object(child); + goto removed; + } + else if (loop < nr_in_ready_table - 1) { + /* child was somewhere in between */ + memmove(&cullready[loop], + &cullready[loop + 1], + (nr_in_ready_table - (loop + 1)) * sizeof(cullready[0])); + cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); + put_object(child); + goto removed; + } + + for (loop = 0; loop < nr_in_build_table; loop++) + if (cullbuild[loop] == child) + break; + + if (loop == nr_in_build_table - 1) { + /* child was oldest object */ + cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__); + put_object(child); + } + else if (loop < nr_in_build_table - 1) { + /* child was somewhere in between */ + memmove(&cullbuild[loop], + &cullbuild[loop + 1], + (nr_in_build_table - (loop + 1)) * sizeof(cullbuild[0])); + cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__); + put_object(child); + } + + removed: + ; + } + + /* add objects that aren't in use to the cull table */ + if (!is_object_in_use(de->d_name)) { + debug(2, "- insert"); + child->new = false; + insert_into_cull_table(child); + } + put_object(child); + goto next; + + /* investigate all index and index-intermediate directories */ + case OBJTYPE_INDEX: + case OBJTYPE_INTERMEDIATE: + debug(2, "- descend"); + + child->new = false; + scan_cursor = child; + + debug(2, "<-- build_cull_table({%s})", curr->name); + return true; + + default: + error("Unexpected type"); + } + + /* we've finished reading a directory - see if we can cull it */ +dir_read_complete: + debug(2, "dir_read_complete: u=%d e=%d %s", + curr->usage, curr->empty, curr->name); + + if (curr->dir) { + if (curr != &root) { + closedir(curr->dir); + curr->dir = NULL; + nopendir--; + } + else { + rewinddir(curr->dir); + } + } + + if (curr->usage == 1 && curr->empty) { + /* attempt to cull unpinned empty intermediate and index + * objects */ + if (fchdir(dirfd(curr->parent->dir)) < 0) + oserror("Failed to change current directory"); + + switch (curr->type) { + case OBJTYPE_INDEX: + cull_file(curr->name); + break; + + case OBJTYPE_INTERMEDIATE: + unlinkat(dirfd(curr->parent->dir), curr->name, + AT_REMOVEDIR); + break; + + default: + break; + } + } + + scan_cursor = curr->parent; + if (!scan_cursor) + debug(1, "Scan complete"); + + debug(2, "<-- build_cull_table({%s})", curr->name); + put_object(curr); + return scan_cursor != NULL; + + /* delete unexpected objects that we've found */ +found_unexpected_object: + debug(2, "found_unexpected_object"); + + destroy_unexpected_object(curr, de); + goto next; +} + +/*****************************************************************************/ +/* + * decant cull entries from the build table to the ready table and enable them + */ +static void decant_cull_table(void) +{ + unsigned loop, avail, copy, leave, space, n; + + if (scan_cursor) + error("Can't decant cull table whilst scanning"); + + /* mark the new entries cullable */ + for (loop = 0; loop < nr_in_build_table; loop++) { + if (!cullbuild[loop]->cullable) { + cullbuild[loop]->cullable = true; + ncullable++; + } + } + + /* if the ready table is empty, copy the whole lot across */ + if (nr_in_ready_table == 0) { + copy = nr_in_build_table; + + debug(1, "Decant (all %d)", copy); + + n = copy * sizeof(cullready[0]); + memcpy(cullready, cullbuild, n); + memset(cullbuild, 0x6e, n); + nr_in_ready_table = nr_in_build_table; + nr_in_build_table = 0; + goto check; + } + + /* decant some of the build table if there's space */ + if (culltable_size < nr_in_ready_table) + error("Less than zero space in ready table"); + space = culltable_size - nr_in_ready_table; + if (space == 0) + goto check; + + /* work out how much of the build table we can copy */ + copy = avail = nr_in_build_table; + if (copy > space) + copy = space; + leave = avail - copy; + + debug(1, "Decant (%u/%u to %u)", copy, avail, space); + + /* make a hole in the ready table transfer "copy" elements from the end + * of cullbuild (oldest) to the beginning of cullready (youngest) + */ + memmove(&cullready[copy], &cullready[0], nr_in_ready_table * sizeof(cullready[0])); + nr_in_ready_table += copy; + + memcpy(&cullready[0], &cullbuild[leave], copy * sizeof(cullready[0])); + memset(&cullbuild[leave], 0x6b, copy * sizeof(cullbuild[0])); + nr_in_build_table = leave; + + if (copy + leave > culltable_size) + error("Scan table exceeded (%d+%d)", copy, leave); + +check: + for (loop = 0; loop < nr_in_ready_table; loop++) + if (((long)cullready[loop] & 0xf0000000) == 0x60000000) + abort(); +} + +/*****************************************************************************/ +/* + * get the directory handle for the given directory + */ +static int get_dir_fd(struct object *dir) +{ + int parentfd, fd; + + debug(1, "get_dir_fd(%s)", dir->name); + + if (dir->dir) { + fd = dup(dirfd(dir->dir)); + if (fd < 0) + oserror("Failed to dup fd"); + debug(1, "cache fd to %d", fd); + return fd; + } + + parentfd = get_dir_fd(dir->parent); + + fd = openat(parentfd, dir->name, O_DIRECTORY); + if (fd < 0 && errno != ENOENT) + oserror("Failed to open directory"); + + /* return parent fd or -1 if ENOENT */ + debug(1, "<%d>/%s to %d", parentfd, dir->name, fd); + close(parentfd); + return fd; +} + +/*****************************************************************************/ +/* + * cull an object + */ +static void cull_object(struct object *object) +{ + struct stat64 st; + int dirfd; + + debug(1, "CULL %s", object->name); + + dirfd = get_dir_fd(object->parent); + if (dirfd >= 0) { + if (fstatat64(dirfd, object->name, &st, 0) < 0) { + if (errno != ENOENT) + oserror("Failed to re-stat object"); + + close(dirfd); + goto object_already_gone; + } + + if (fchdir(dirfd) < 0) + oserror("Failed to change current directory"); + if (object->atime >= st.st_atime) + cull_file(object->name); + + close(dirfd); + } + +object_already_gone: + put_object(object); +} + +/*****************************************************************************/ +/* + * consider starting a cull + */ +static void cull_objects(void) +{ + if (ncullable <= 0) + error("Cullable object count is inconsistent"); + + if (cullready[nr_in_ready_table - 1]->cullable) { + cull_object(cullready[nr_in_ready_table - 1]); + cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); + } +} diff --git a/cachefilesd.conf b/cachefilesd.conf new file mode 100644 index 0000000..6905281 --- /dev/null +++ b/cachefilesd.conf @@ -0,0 +1,24 @@ +############################################################################### +# +# Copyright (C) 2006,2010 Red Hat, Inc. All Rights Reserved. +# Written by David Howells (dhowells@redhat.com) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. +# +############################################################################### + +dir /var/cache/fscache +tag mycache +brun 10% +bcull 7% +bstop 3% +frun 10% +fcull 7% +fstop 3% + +# Assuming you're using SELinux with the default security policy included in +# this package +secctx system_u:system_r:cachefiles_kernel_t:s0 diff --git a/cachefilesd.conf.5 b/cachefilesd.conf.5 new file mode 100644 index 0000000..b108bdc --- /dev/null +++ b/cachefilesd.conf.5 @@ -0,0 +1,187 @@ +.\" -*- nroff -*- +.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. +.\" Written by David Howells (dhowells@redhat.com) +.\" +.\" This program is free software; you can redistribute it and/or +.\" modify it under the terms of the GNU General Public License +.\" as published by the Free Software Foundation; either version +.\" 2 of the License, or (at your option) any later version. +.\" +.TH CACHEFILESD.CONF 5 "14 November 2005" Linux "Cache Files Utilities" +.SH NAME +/etc/cachefilesd.conf \- Local file caching configuration file +.SH SYNOPSIS +.P +The configuration file for cachefilesd which can manage a persistent cache for +a variety of network filesystems using a set of files on an already mounted +filesystem as the data store. +.SH DESCRIPTION +.P +This configuration file can contain a number of commands. Each one should be +on a separate line. Blank lines and lines beginning with a '#' character are +considered to be comments and are discarded. +.P +The only mandatory command is: +.TP +.B dir +This command specifies the directory containing the root of the cache. It may +only specified once per configuration file. +.P +All the other commands are optional: +.TP +.B secctx