/* CacheFiles userspace management daemon * * Copyright (C) 2006-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * * Configuration file goes in /etc/cachefiles.conf and is of the form: * * dir /var/cache/fscache * tag mycache * brun 10% * bcull 7% * bstop 3% * frun 10% * fcull 7% * fstop 3% * * Only "dir" is mandatory * Blank lines and lines beginning with a hash are comments * Trailing spaces are significant * There is no character escaping mechanism * NUL characters are cause for error */ #define CACHEFILESD_VERSION "0.10.10" #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum objtype { OBJTYPE_INDEX, OBJTYPE_DATA, OBJTYPE_SPECIAL, OBJTYPE_INTERMEDIATE, } objtype_t; struct object { struct object *parent; /* parent dir of this object (or NULL) */ struct object *children; /* children of this object */ struct object *next; /* next child of parent */ struct object *prev; /* previous child of parent */ DIR *dir; /* this object's directory (or NULL for data obj) */ ino_t ino; /* inode number of this object */ int usage; /* number of users of this object */ bool empty; /* T if directory empty */ bool new; /* T if object new */ bool cullable; /* T if object now cullable */ objtype_t type; /* type of object */ time_t atime; /* last access time on this object */ char name[1]; /* name of this object */ }; /* cache root representation */ static struct object root = { .parent = NULL, .usage = 2, .type = OBJTYPE_INDEX, }; static int nobjects = 1; static int nopendir; /* current scan point */ static struct object *scan_cursor; static bool scan_signalled, stop_signalled, reap_signalled; /* ranked order of cullable objects * - we have two tables: one we're building and one that's full of ready to be * culled objects */ static unsigned culltable_size = 4096; static struct object **cullbuild; static struct object **cullready; static unsigned nr_in_build_table; static unsigned nr_in_ready_table; static int ncullable; static bool kernel_wants_cull; static bool have_nr_releases; static unsigned long long f_released_since_last_scan; static unsigned long long b_released_since_last_scan; static const char *configfile = "/etc/cachefilesd.conf"; static const char *devfile = "/dev/cachefiles"; static const char *procfile = "/proc/fs/cachefiles"; static const char *pidfile = "/var/run/cachefilesd.pid"; static char *cacheroot, *graveyardpath; static bool culling_disabled; static bool xnolog, xopenedlog; static int xdebug; static int graveyardfd; static unsigned long long brun, bcull, bstop, frun, fcull, fstop; static unsigned long long b_resume_threshold = ULLONG_MAX; static unsigned long long f_resume_threshold = 5; static const gid_t group_list[0]; #define cachefd 3 static __attribute__((noreturn)) void version(void) { printf("cachefilesd version " CACHEFILESD_VERSION "\n"); exit(0); } static __attribute__((noreturn)) void help(void) { fprintf(stderr, "Format:\n" " /sbin/cachefilesd [-d]* [-s] [-n] [-p ] [-f ]\n" " /sbin/cachefilesd -v\n" "\n" "Options:\n" " -d\tIncrease debugging level (cumulative)\n" " -n\tDon't daemonise the process\n" " -s\tMessage output to stderr instead of syslog\n" " -p \tWrite the PID into the file\n" " -f \n" " -v\tPrint version and exit\n" "\tRead the specified configuration file instead of" " /etc/cachefiles.conf\n"); exit(2); } static __attribute__((noreturn, format(printf, 2, 3))) void __error(int excode, const char *fmt, ...) { va_list va; if (xnolog) { va_start(va, fmt); vfprintf(stderr, fmt, va); va_end(va); } else { if (!xopenedlog) { openlog("cachefilesd", LOG_PID, LOG_DAEMON); xopenedlog = true; } va_start(va, fmt); vsyslog(LOG_ERR, fmt, va); va_end(va); closelog(); } exit(excode); } #define error(FMT,...) __error(3, "Internal error: "FMT"\n" ,##__VA_ARGS__) #define oserror(FMT,...) __error(1, FMT": errno %d (%m)\n" ,##__VA_ARGS__ ,errno) #define cfgerror(FMT,...) __error(2, "%s:%d:"FMT"\n", configfile, lineno ,##__VA_ARGS__) #define opterror(FMT,...) __error(2, FMT"\n" ,##__VA_ARGS__) static __attribute__((format(printf, 3, 4))) void __message(int dlevel, int level, const char *fmt, ...) { va_list va; if (dlevel <= xdebug) { if (xnolog) { va_start(va, fmt); vfprintf(stderr, fmt, va); va_end(va); } else if (!xnolog) { if (!xopenedlog) { openlog("cachefilesd", LOG_PID, LOG_DAEMON); xopenedlog = true; } va_start(va, fmt); vsyslog(level, fmt, va); va_end(va); closelog(); } } } #define info(FMT,...) __message(0, LOG_INFO, FMT"\n" ,##__VA_ARGS__) #define debug(DL, FMT,...) __message(DL, LOG_DEBUG, FMT"\n" ,##__VA_ARGS__) #define notice(FMT,...) __message(0, LOG_NOTICE, FMT"\n" ,##__VA_ARGS__) static void open_cache(void); static void cachefilesd(void) __attribute__((noreturn)); static void reap_graveyard(void); static void reap_graveyard_aux(const char *dirname); static void read_cache_state(void); static int is_object_in_use(const char *filename); static void cull_file(const char *filename); static void begin_building_cull_table(void); static bool build_cull_table(void); static void decant_cull_table(void); static void insert_into_cull_table(struct object *object); static void put_object(struct object *object); static struct object *create_object(struct object *parent, const char *name, struct stat64 *st); static void destroy_unexpected_object(struct object *parent, struct dirent *de); static int get_dir_fd(struct object *dir); static void cull_object(struct object *object); static void cull_objects(void); /*****************************************************************************/ /* * termination request */ static void sigterm(int sig) { stop_signalled = true; } /*****************************************************************************/ /* * the graveyard was populated */ static void sigio(int sig) { reap_signalled = true; } /*****************************************************************************/ /* * redo scan after a time since the last scan turned up no results */ static void sigalrm(int sig) { scan_signalled = true; } /*****************************************************************************/ /* * write the PID file */ static void write_pidfile(void) { FILE *pf; pf = fopen(pidfile, "w"); if (!pf) oserror("Unable to open PID file: %s", pidfile); if (fprintf(pf, "%d\n", getpid()) < 0 || fclose(pf) == EOF) oserror("Unable to write PID file: %s", pidfile); } /*****************************************************************************/ /* * start up the cache and go */ int main(int argc, char *argv[]) { struct stat st; unsigned lineno; ssize_t n; size_t m; FILE *config; char *line, *cp; long page_size; int _cachefd, nullfd, opt, loop, open_max; bool nodaemon = false; /* handle help request */ if (argc == 2 && strcmp(argv[1], "--help") == 0) help(); if (argc == 2 && strcmp(argv[1], "--version") == 0) version(); /* parse the arguments */ while (opt = getopt(argc, argv, "dsnNf:p:v"), opt != EOF ) { switch (opt) { case 'd': /* turn on debugging */ xdebug++; break; case 's': /* disable syslog writing */ xnolog = true; break; case 'n': /* don't daemonise */ nodaemon = true; break; case 'N': /* disable culling */ culling_disabled = true; break; case 'f': /* use a specific config file */ configfile = optarg; break; case 'p': /* use a specific PID file */ pidfile = optarg; break; case 'v': /* print the version and exit */ version(); default: opterror("Unknown commandline option '%c'", optopt); } } /* read various parameters */ page_size = sysconf(_SC_PAGESIZE); if (page_size < 0) oserror("Unable to get page size"); open_max = sysconf(_SC_OPEN_MAX); if (open_max < 0) oserror("Unable to get max open files"); /* become owned by root */ if (setgroups(sizeof(group_list) / sizeof(gid_t), group_list) < 0) oserror("Unable to clear the supplementary groups"); if (setresuid(0, 0, 0) < 0) oserror("Unable to set UID to 0"); if (setresgid(0, 0, 0) < 0) oserror("Unable to set GID to 0"); /* just in case... */ sync(); /* open the devfile or the procfile on fd 3 */ _cachefd = open(devfile, O_RDWR); if (_cachefd < 0) { if (errno != ENOENT) oserror("Unable to open %s", devfile); _cachefd = open(procfile, O_RDWR); if (_cachefd < 0) { if (errno == ENOENT) oserror("Unable to open %s", devfile); oserror("Unable to open %s", procfile); } } if (_cachefd != cachefd) { if (dup2(_cachefd, cachefd) < 0) oserror("Unable to transfer cache fd to 3"); if (close(_cachefd) < 0) oserror("Close of original cache fd failed"); } /* open /dev/null */ nullfd = open("/dev/null", O_RDWR); if (nullfd < 0) oserror("Unable to open /dev/null"); /* open the config file */ config = fopen(configfile, "r"); if (!config) oserror("Unable to open %s", configfile); /* read the configuration */ m = 0; line = NULL; lineno = 0; while (n = getline(&line, &m, config), n != EOF ) { lineno++; if (n >= page_size) cfgerror("Line too long"); if (memchr(line, 0, n) != 0) cfgerror("Line contains a NUL character"); /* eat blank lines, leading white space and trailing NL */ cp = strchr(line, '\n'); if (!cp) cfgerror("Unterminated line"); if (cp == line) continue; *cp = '\0'; for (cp = line; isspace(*cp); cp++) {;} if (!*cp) continue; /* eat full line comments */ if (*cp == '#') continue; /* allow culling to be disabled */ if (memcmp(cp, "nocull", 6) == 0 && (!cp[6] || isspace(cp[6]))) { culling_disabled = true; } /* note the cull table size command */ if (memcmp(cp, "culltable", 9) == 0 && isspace(cp[9])) { unsigned long cts; char *sp; for (sp = cp + 10; isspace(*sp); sp++) {;} cts = strtoul(sp, &sp, 10); if (*sp) cfgerror("Invalid cull table size number"); if (cts < 12 || cts > 20) cfgerror("Log2 of cull table size must be 12 <= N <= 20"); culltable_size = 1 << cts; continue; } /* Note the suspension resume released file count thresholds * ("-" to disable a threshold). */ if (memcmp(cp, "resume_thresholds", 18) == 0 && isspace(cp[18])) { unsigned long long b_thresh, f_thresh; char *sp; for (sp = cp + 18; isspace(*sp); sp++) {;} if (*sp == '-') { sp++; b_thresh = ULLONG_MAX; } else { b_thresh = strtoul(sp, &sp, 10); } if (!*sp || !isspace(*sp)) cfgerror("Error parsing resume threshold (blocks)"); if (b_thresh == 0) cfgerror("Invalid resume threshold (blocks)"); for (; isspace(*sp); sp++) {;} if (*sp == '-') { sp++; f_thresh = ULLONG_MAX; } else { f_thresh = strtoul(sp, &sp, 10); if (*sp) cfgerror("Error parsing resume threshold (files)"); if (f_thresh == 0) cfgerror("Invalid resume threshold (files)"); } b_resume_threshold = b_thresh; f_resume_threshold = f_thresh; continue; } /* note the dir command */ if (memcmp(cp, "dir", 3) == 0 && isspace(cp[3])) { char *sp; for (sp = cp + 4; isspace(*sp); sp++) {;} if (strlen(sp) > PATH_MAX - 10) cfgerror("Cache pathname is too long"); if (stat(sp, &st) < 0) oserror("Can't confirm cache location"); cacheroot = strdup(sp); if (!cacheroot) oserror("Can't copy cache name"); } /* object to the bind command */ if (memcmp(cp, "bind", 4) == 0 && (!cp[4] || isspace(cp[4]))) cfgerror("'bind' command not permitted"); /* pass the config options over to the kernel module */ if (write(cachefd, line, strlen(line)) < 0) { if (errno == -ENOMEM || errno == -EIO) oserror("CacheFiles"); cfgerror("CacheFiles gave config error: %m"); } } if (line) free(line); if (!feof(config)) oserror("Unable to read %s", configfile); if (fclose(config) == EOF) oserror("Unable to close %s", configfile); /* allocate the cull tables */ if (!culling_disabled) { cullbuild = calloc(culltable_size, sizeof(cullbuild[0])); if (!cullbuild) oserror("calloc"); cullready = calloc(culltable_size, sizeof(cullready[0])); if (!cullready) oserror("calloc"); } /* leave stdin, stdout, stderr and cachefd open only */ if (nullfd != 0) dup2(nullfd, 0); if (nullfd != 1) dup2(nullfd, 1); for (loop = 4; loop < open_max; loop++) close(loop); /* set up a connection to syslog whilst we still can (the bind command * will give us our own namespace with no /dev/log */ openlog("cachefilesd", LOG_PID, LOG_DAEMON); xopenedlog = true; info("About to bind cache"); /* now issue the bind command */ if (write(cachefd, "bind", 4) < 0) oserror("CacheFiles bind failed"); info("Bound cache"); /* we now have a live cache - daemonise the process */ if (!nodaemon) { if (!xdebug) dup2(1, 2); switch (fork()) { case -1: oserror("fork"); case 0: if (xdebug) fprintf(stderr, "Daemon PID %d\n", getpid()); signal(SIGTTIN, SIG_IGN); signal(SIGTTOU, SIG_IGN); signal(SIGTSTP, SIG_IGN); setsid(); write_pidfile(); cachefilesd(); default: break; } } else { cachefilesd(); } exit(0); } /*****************************************************************************/ /* * open the cache directories */ static void open_cache(void) { struct statfs sfs; char buffer[PATH_MAX + 1]; /* open the cache directory so we can scan it */ snprintf(buffer, PATH_MAX, "%s/cache", cacheroot); root.dir = opendir(buffer); if (!root.dir) oserror("Unable to open cache directory"); nopendir++; /* open the graveyard so we can set a notification on it */ if (asprintf(&graveyardpath, "%s/graveyard", cacheroot) < 0) oserror("Unable to copy graveyard name"); graveyardfd = open(graveyardpath, O_DIRECTORY); if (graveyardfd < 0) oserror("Unable to open graveyard directory"); if (fstatfs(graveyardfd, &sfs) < 0) oserror("Unable to stat cache filesystem"); if (sfs.f_bsize + 1 == 0 || sfs.f_blocks + 1 == 0 || sfs.f_bfree + 1 == 0 || sfs.f_bavail + 1 == 0) error("Backing filesystem returns unusable statistics through fstatfs()"); } /*****************************************************************************/ /* * manage the cache */ static void cachefilesd(void) { sigset_t sigs, osigs; bool scanning_suspended = false; bool scan_in_progress = false; struct pollfd pollfds[1] = { [0] = { .fd = cachefd, .events = POLLIN, }, }; notice("Daemon Started"); /* open the cache directories */ open_cache(); /* We need to be able to disable signals that we need to check for * before calling poll so that we don't race and miss something. */ sigemptyset(&sigs); sigaddset(&sigs, SIGIO); sigaddset(&sigs, SIGINT); sigaddset(&sigs, SIGTERM); sigaddset(&sigs, SIGALRM); signal(SIGTERM, sigterm); signal(SIGINT, sigterm); /* check the graveyard for graves */ reap_graveyard(); while (!stop_signalled) { bool do_cull = false; debug(3, "Loop %sbuild=%d ready=%d susp=%u scan=%u", culling_disabled ? "NOCULL " : "", nr_in_build_table, nr_in_ready_table, scanning_suspended, scan_in_progress); read_cache_state(); if (!culling_disabled) { /* Determine if we're going to need to start a new scan * to refill the cull table. We want to do this if the * secondary cull table is less than half full - but * overriding that, we don't want to do this if we know * there's insufficient cullables to make it worth * while. */ if (!scan_in_progress) { bool begin_scan = false; debug(1, "Consider scan %d/%d", nr_in_build_table, culltable_size / 2); if (nr_in_build_table < culltable_size / 2) { debug(1, "Want to scan"); begin_scan = true; } if (begin_scan && scanning_suspended) { debug(1, "Scanning suspended"); if (have_nr_releases) { if (f_released_since_last_scan < f_resume_threshold && b_released_since_last_scan < b_resume_threshold) begin_scan = false; } else { begin_scan = scan_signalled; } } if (begin_scan) { debug(1, "Beginning a scan"); begin_building_cull_table(); scan_in_progress = true; scanning_suspended = false; scan_signalled = false; f_released_since_last_scan = 0; b_released_since_last_scan = 0; } } /* Determine if there's anything we can actually cull yet if * the kernel is calling for space. */ if (kernel_wants_cull) { debug(1, "Want to cull"); if (nr_in_ready_table > 0) do_cull = true; } } /* We block the signals across the checks for reap, cull and * scan initiation before polling so that we sleep without * racing against the signal handlers. */ if (!scan_in_progress && !reap_signalled && !do_cull) { if (sigprocmask(SIG_BLOCK, &sigs, &osigs) < 0) oserror("Unable to block signals"); if (!reap_signalled && !stop_signalled && !scan_signalled) { debug(1, "Poll"); if (ppoll(pollfds, 1, NULL, &osigs) < 0 && errno != EINTR) oserror("Unable to suspend process"); } if (sigprocmask(SIG_UNBLOCK, &sigs, NULL) < 0) oserror("Unable to unblock signals"); continue; } if (!culling_disabled) { if (do_cull) cull_objects(); if (scan_in_progress) { scan_in_progress = build_cull_table(); if (!scan_in_progress) { /* Scan complete. * * If the scan didn't produce a full * table then don't repeat the scan * until something gets released by the * kernel. */ if (nr_in_build_table < culltable_size) { debug(1, "Suspend scanning"); scanning_suspended = true; if (!have_nr_releases) { signal(SIGALRM, sigalrm); alarm(30); } } } } if (!scan_in_progress) { if (nr_in_ready_table <= culltable_size / 2 + 2 && nr_in_build_table > 0) { debug(1, "Decant"); decant_cull_table(); } } } if (reap_signalled) reap_graveyard(); } notice("Daemon Terminated"); exit(0); } /*****************************************************************************/ /* * check the graveyard directory for graves to delete */ static void reap_graveyard(void) { /* set a one-shot notification to catch more graves appearing */ reap_signalled = false; signal(SIGIO, sigio); if (fcntl(graveyardfd, F_NOTIFY, DN_CREATE) < 0) oserror("unable to set notification on graveyard"); reap_graveyard_aux(graveyardpath); } /*****************************************************************************/ /* * recursively remove dead stuff from the graveyard */ static void reap_graveyard_aux(const char *dirname) { struct dirent *de; size_t len; char name[NAME_MAX + 1]; bool deleted; DIR *dir; if (chdir(dirname) < 0) oserror("chdir failed"); dir = opendir("."); if (!dir) oserror("Unable to open grave dir %s", dirname); do { /* removing directory entries may cause us to skip when reading * them */ rewinddir(dir); deleted = false; while (errno = 0, de = readdir(dir), de != NULL ) { /* ignore "." and ".." */ if (de->d_name[0] == '.') { if (de->d_name[1] == '\0') continue; if (de->d_name[1] == '.' || de->d_name[1] == '\0') continue; } deleted = true; /* attempt to unlink non-directory files */ if (de->d_type != DT_DIR) { debug(1, "unlink %s", de->d_name); if (unlink(de->d_name) == 0) continue; if (errno != EISDIR) oserror("Unable to unlink file %s", de->d_name); } /* recurse into directories */ len = strlen(de->d_name) + 1; if (len > sizeof(name)) { errno = ENAMETOOLONG; oserror("Name longer than NAME_MAX \"%s\"", de->d_name); } memcpy(name, de->d_name, len); reap_graveyard_aux(name); /* which we then attempt to remove */ debug(1, "rmdir %s", name); if (rmdir(name) < 0) oserror("Unable to remove dir %s", name); } if (errno != 0) oserror("Unable to read dir %s", dirname); } while (deleted); closedir(dir); if (chdir("..") < 0) oserror("Unable to chdir to .."); } /*****************************************************************************/ /* * read the cache state */ static void read_cache_state(void) { char buffer[4096 + 1], *tok, *next, *arg; int n; n = read(cachefd, buffer, sizeof(buffer) - 1); if (n < 0) oserror("Unable to read cache state"); buffer[n] = '\0'; debug(3, "KERNEL: %s", buffer); tok = buffer; do { next = strpbrk(tok, " \t"); if (next) *next++ = '\0'; arg = strchr(tok, '='); if (arg) { *arg++ = '\0'; } else { debug(0, "Warning: malformed output from kernel, missing arg to [%s]", tok); continue; } if (strcmp(tok, "cull") == 0) { kernel_wants_cull = (strtoul(arg, NULL, 0) != 0); } else if (strcmp(tok, "brun") == 0) { brun = strtoull(arg, NULL, 16); } else if (strcmp(tok, "bcull") == 0) { bcull = strtoull(arg, NULL, 16); } else if (strcmp(tok, "bstop") == 0) { bstop = strtoull(arg, NULL, 16); } else if (strcmp(tok, "frun") == 0) { frun = strtoull(arg, NULL, 16); } else if (strcmp(tok, "fcull") == 0) { fcull = strtoull(arg, NULL, 16); } else if (strcmp(tok, "fstop") == 0) { fstop = strtoull(arg, NULL, 16); } else if (strcmp(tok, "breleased") == 0) { b_released_since_last_scan += strtoull(arg, NULL, 16); have_nr_releases = true; } else if (strcmp(tok, "freleased") == 0) { f_released_since_last_scan += strtoull(arg, NULL, 16); have_nr_releases = true; } } while ((tok = next)); } /*****************************************************************************/ /* * find out if an object in the current working directory is in use */ static int is_object_in_use(const char *filename) { char buffer[NAME_MAX + 30]; int ret, n; n = sprintf(buffer, "inuse %s", filename); /* command the module */ ret = write(cachefd, buffer, n); if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY) oserror("Failed to check object's in-use state"); return ret < 0 && errno == EBUSY ? 1 : 0; } /*****************************************************************************/ /* * cull a file representing an object in the current working directory * - requests CacheFiles rename the object "/filename" to the graveyard */ static void cull_file(const char *filename) { char buffer[NAME_MAX + 30]; int ret, n; n = sprintf(buffer, "cull %s", filename); /* command the module */ ret = write(cachefd, buffer, n); if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY) oserror("Failed to cull object"); } /*****************************************************************************/ /* * create an object from a name and stat details and attach to the parent, if * it doesn't already exist */ static struct object *create_object(struct object *parent, const char *name, struct stat64 *st) { struct object *object, *p, *pr; int len; /* see if the parent object already holds a representation of this * one */ pr = NULL; for (p = parent->children; p; pr = p, p = p->next) { if (p->ino <= st->st_ino) { if (p->ino == st->st_ino) { /* it does */ p->usage++; return p; } break; } } /* allocate the object * - note that struct object reserves space for NUL directly */ len = strlen(name); object = calloc(1, sizeof(struct object) + len); if (!object) oserror("Unable to alloc object"); object->usage = 1; object->new = true; object->ino = st->st_ino; object->atime = st->st_atime; memcpy(object->name, name, len + 1); switch (object->name[0]) { case 'I': case 'J': object->type = OBJTYPE_INDEX; break; case 'D': case 'E': object->type = OBJTYPE_DATA; break; case 'S': case 'T': object->type = OBJTYPE_SPECIAL; break; case '+': case '@': object->type = OBJTYPE_INTERMEDIATE; break; default: error("Unexpected file type '%c'", object->name[0]); } /* link into the parent's list */ parent->usage++; object->parent = parent; object->prev = pr; object->next = p; if (pr) pr->next = object; else parent->children = object; if (p) p->prev = object; nobjects++; return object; } /*****************************************************************************/ /* * free up an object, unlinking it from its parent */ static void put_object(struct object *object) { struct object *parent; if (--object->usage > 0) return; nobjects--; if (object->cullable) ncullable--; /* destroy the object */ if (object == &root) error("Can't destroy root object representation"); if (object->children) error("Destroying object with children: '%s'", object->name); if (object->dir) { closedir(object->dir); nopendir--; } if (object->prev) object->prev->next = object->next; else object->parent->children = object->next; if (object->next) object->next->prev = object->prev; parent = object->parent; memset(object, 0x6d, sizeof(struct object)); free(object); if (parent) put_object(parent); } /*****************************************************************************/ /* * destroy an unexpected object */ static void destroy_unexpected_object(struct object *parent, struct dirent *de) { static unsigned uniquifier; struct timeval tv; char namebuf[40]; int fd; fd = dirfd(parent->dir); if (de->d_type != DT_DIR) { if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) oserror("Unable to unlink unexpectedly named file: %s", de->d_name); } else { gettimeofday(&tv, NULL); sprintf(namebuf, "x%lxx%xx", tv.tv_sec, uniquifier++); if (renameat(fd, de->d_name, graveyardfd, namebuf) < 0 && errno != ENOENT) oserror("Unable to rename unexpectedly named file: %s", de->d_name); } } /*****************************************************************************/ /* * insert an object into the cull table if its old enough */ static void insert_into_cull_table(struct object *object) { int y, o, m; if (!object) error("NULL object pointer"); /* just insert if table is empty */ if (nr_in_build_table == 0) { object->usage++; cullbuild[0] = object; nr_in_build_table++; return; } /* insert somewhere if table is not full */ if (nr_in_build_table < culltable_size) { object->usage++; /* just insert at end if new oldest object */ if (object->atime <= cullbuild[nr_in_build_table - 1]->atime) { cullbuild[nr_in_build_table] = object; nr_in_build_table++; return; } /* insert at front if new newest object */ if (object->atime > cullbuild[0]->atime) { memmove(&cullbuild[1], &cullbuild[0], nr_in_build_table * sizeof(cullbuild[0])); cullbuild[0] = object; nr_in_build_table++; return; } /* if only two objects in list then insert between them */ if (nr_in_build_table == 2) { cullbuild[2] = cullbuild[1]; cullbuild[1] = object; nr_in_build_table++; return; } /* insert somewhere in between front and back elements * of a three-plus object list * - oldest_build == #objects_currently_in_list */ y = 1; o = nr_in_build_table - 1; do { m = (y + o) / 2; if (object->atime > cullbuild[m]->atime) o = m; else y = m + 1; } while (y < o); memmove(&cullbuild[y + 1], &cullbuild[y], (nr_in_build_table - y) * sizeof(cullbuild[0])); cullbuild[y] = object; nr_in_build_table++; return; } /* if table is full then insert only if older than newest */ if (nr_in_build_table > culltable_size) error("Cull table overfull"); if (object->atime >= cullbuild[0]->atime) return; /* newest object in table will be displaced by this one */ put_object(cullbuild[0]); cullbuild[0] = (void *)(0x6b000000 | __LINE__); object->usage++; /* place directly in first slot if second is older */ if (object->atime >= cullbuild[1]->atime) { cullbuild[0] = object; return; } /* shift everything up one if older than oldest */ if (object->atime <= cullbuild[culltable_size - 1]->atime) { memmove(&cullbuild[0], &cullbuild[1], (culltable_size - 1) * sizeof(cullbuild[0])); cullbuild[culltable_size - 1] = object; return; } /* search the table to find the insertion point * - it will be between the first and last the slots * - we know second is younger */ cullbuild[0] = cullbuild[1]; y = 2; o = culltable_size - 1; do { m = (y + o) / 2; if (object->atime >= cullbuild[m]->atime) o = m; else y = m + 1; } while (y < o); if (y == 2) { cullbuild[1] = object; return; } memmove(&cullbuild[1], &cullbuild[2], (y - 2) * sizeof(cullbuild[0])); cullbuild[y - 1] = object; } /*****************************************************************************/ /* * Begin a scan to build a cull table. */ static void begin_building_cull_table(void) { debug(1, "Refilling cull table"); root.usage++; scan_cursor = &root; } /*****************************************************************************/ /* * Do the next step in building up the cull table. Returns false upon * completion of a scan. */ static bool build_cull_table(void) { struct dirent *de; struct object *curr, *child; struct stat64 st; unsigned loop; int fd; curr = scan_cursor; if (!curr->dir) { curr->empty = true; fd = openat(dirfd(curr->parent->dir), curr->name, O_DIRECTORY); if (fd < 0) { if (errno != ENOENT) oserror("Failed to open directory"); goto dir_read_complete; } curr->dir = fdopendir(fd); if (!curr->dir) oserror("Failed to open directory"); nopendir++; } debug(2, "--> build_cull_table({%s})", curr->name); if (fchdir(dirfd(curr->dir)) < 0) oserror("Failed to change current directory"); next: /* read the next directory entry */ errno = 0; de = readdir(curr->dir); if (!de) { if (errno == 0 || errno == ENOENT) goto dir_read_complete; oserror("Unable to read directory"); } if (de->d_name[0] == '.') { if (!de->d_name[1] || (de->d_name[1] == '.' && !de->d_name[2])) goto next; } debug(2, "readdir '%s'", de->d_name); switch (de->d_type) { case DT_UNKNOWN: case DT_DIR: case DT_REG: break; default: oserror("readdir returned unsupported type %d", de->d_type); } /* delete any funny looking files */ if (memchr("IDSJET+@", de->d_name[0], 8) == NULL) goto found_unexpected_object; /* see if this object is already known to us */ if (fstatat64(dirfd(curr->dir), de->d_name, &st, 0) < 0) { if (errno == ENOENT) goto next; oserror("Failed to stat directory"); } if (!S_ISDIR(st.st_mode) && (!S_ISREG(st.st_mode) || de->d_name[0] == 'I' || de->d_name[0] == 'J' || de->d_name[0] == '@' || de->d_name[0] == '+')) goto found_unexpected_object; /* create a representation for this object */ child = create_object(curr, de->d_name, &st); if (!child && errno == ENOENT) goto next; curr->empty = false; if (!child) oserror("Unable to create object"); /* we consider culling objects at the transition from index object to * non-index object */ switch (child->type) { case OBJTYPE_DATA: case OBJTYPE_SPECIAL: if (!child->new) { /* the child appears to have been retained in the * culling table already, so we see if it should be * removed therefrom */ debug(2, "- old child"); if (st.st_atime <= child->atime) { /* file on disk hasn't been touched */ put_object(child); goto next; } for (loop = 0; loop < nr_in_ready_table; loop++) if (cullready[loop] == child) break; if (loop == nr_in_ready_table - 1) { /* child was oldest object */ cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); put_object(child); goto removed; } else if (loop < nr_in_ready_table - 1) { /* child was somewhere in between */ memmove(&cullready[loop], &cullready[loop + 1], (nr_in_ready_table - (loop + 1)) * sizeof(cullready[0])); cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); put_object(child); goto removed; } for (loop = 0; loop < nr_in_build_table; loop++) if (cullbuild[loop] == child) break; if (loop == nr_in_build_table - 1) { /* child was oldest object */ cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__); put_object(child); } else if (loop < nr_in_build_table - 1) { /* child was somewhere in between */ memmove(&cullbuild[loop], &cullbuild[loop + 1], (nr_in_build_table - (loop + 1)) * sizeof(cullbuild[0])); cullbuild[--nr_in_build_table] = (void *)(0x6b000000 | __LINE__); put_object(child); } removed: ; } /* add objects that aren't in use to the cull table */ if (!is_object_in_use(de->d_name)) { debug(2, "- insert"); child->new = false; insert_into_cull_table(child); } put_object(child); goto next; /* investigate all index and index-intermediate directories */ case OBJTYPE_INDEX: case OBJTYPE_INTERMEDIATE: debug(2, "- descend"); child->new = false; scan_cursor = child; debug(2, "<-- build_cull_table({%s})", curr->name); return true; default: error("Unexpected type"); } /* we've finished reading a directory - see if we can cull it */ dir_read_complete: debug(2, "dir_read_complete: u=%d e=%d %s", curr->usage, curr->empty, curr->name); if (curr->dir) { if (curr != &root) { closedir(curr->dir); curr->dir = NULL; nopendir--; } else { rewinddir(curr->dir); } } if (curr->usage == 1 && curr->empty) { /* attempt to cull unpinned empty intermediate and index * objects */ if (fchdir(dirfd(curr->parent->dir)) < 0) oserror("Failed to change current directory"); switch (curr->type) { case OBJTYPE_INDEX: cull_file(curr->name); break; case OBJTYPE_INTERMEDIATE: unlinkat(dirfd(curr->parent->dir), curr->name, AT_REMOVEDIR); break; default: break; } } scan_cursor = curr->parent; if (!scan_cursor) debug(1, "Scan complete"); debug(2, "<-- build_cull_table({%s})", curr->name); put_object(curr); return scan_cursor != NULL; /* delete unexpected objects that we've found */ found_unexpected_object: debug(2, "found_unexpected_object"); destroy_unexpected_object(curr, de); goto next; } /*****************************************************************************/ /* * decant cull entries from the build table to the ready table and enable them */ static void decant_cull_table(void) { unsigned loop, avail, copy, leave, space, n; if (scan_cursor) error("Can't decant cull table whilst scanning"); /* mark the new entries cullable */ for (loop = 0; loop < nr_in_build_table; loop++) { if (!cullbuild[loop]->cullable) { cullbuild[loop]->cullable = true; ncullable++; } } /* if the ready table is empty, copy the whole lot across */ if (nr_in_ready_table == 0) { copy = nr_in_build_table; debug(1, "Decant (all %d)", copy); n = copy * sizeof(cullready[0]); memcpy(cullready, cullbuild, n); memset(cullbuild, 0x6e, n); nr_in_ready_table = nr_in_build_table; nr_in_build_table = 0; goto check; } /* decant some of the build table if there's space */ if (culltable_size < nr_in_ready_table) error("Less than zero space in ready table"); space = culltable_size - nr_in_ready_table; if (space == 0) goto check; /* work out how much of the build table we can copy */ copy = avail = nr_in_build_table; if (copy > space) copy = space; leave = avail - copy; debug(1, "Decant (%u/%u to %u)", copy, avail, space); /* make a hole in the ready table transfer "copy" elements from the end * of cullbuild (oldest) to the beginning of cullready (youngest) */ memmove(&cullready[copy], &cullready[0], nr_in_ready_table * sizeof(cullready[0])); nr_in_ready_table += copy; memcpy(&cullready[0], &cullbuild[leave], copy * sizeof(cullready[0])); memset(&cullbuild[leave], 0x6b, copy * sizeof(cullbuild[0])); nr_in_build_table = leave; if (copy + leave > culltable_size) error("Scan table exceeded (%d+%d)", copy, leave); check: for (loop = 0; loop < nr_in_ready_table; loop++) if (((long)cullready[loop] & 0xf0000000) == 0x60000000) abort(); } /*****************************************************************************/ /* * get the directory handle for the given directory */ static int get_dir_fd(struct object *dir) { int parentfd, fd; debug(1, "get_dir_fd(%s)", dir->name); if (dir->dir) { fd = dup(dirfd(dir->dir)); if (fd < 0) oserror("Failed to dup fd"); debug(1, "cache fd to %d", fd); return fd; } parentfd = get_dir_fd(dir->parent); fd = openat(parentfd, dir->name, O_DIRECTORY); if (fd < 0 && errno != ENOENT) oserror("Failed to open directory"); /* return parent fd or -1 if ENOENT */ debug(1, "<%d>/%s to %d", parentfd, dir->name, fd); close(parentfd); return fd; } /*****************************************************************************/ /* * cull an object */ static void cull_object(struct object *object) { struct stat64 st; int dirfd; debug(1, "CULL %s", object->name); dirfd = get_dir_fd(object->parent); if (dirfd >= 0) { if (fstatat64(dirfd, object->name, &st, 0) < 0) { if (errno != ENOENT) oserror("Failed to re-stat object"); close(dirfd); goto object_already_gone; } if (fchdir(dirfd) < 0) oserror("Failed to change current directory"); if (object->atime >= st.st_atime) cull_file(object->name); close(dirfd); } object_already_gone: put_object(object); } /*****************************************************************************/ /* * consider starting a cull */ static void cull_objects(void) { if (ncullable <= 0) error("Cullable object count is inconsistent"); if (cullready[nr_in_ready_table - 1]->cullable) { cull_object(cullready[nr_in_ready_table - 1]); cullready[--nr_in_ready_table] = (void *)(0x6b000000 | __LINE__); } }