diff options
Diffstat (limited to 'fs')
294 files changed, 12409 insertions, 11379 deletions
diff --git a/fs/9p/9p.h b/fs/9p/9p.h deleted file mode 100644 index 94e2f92ab2e8..000000000000 --- a/fs/9p/9p.h +++ /dev/null @@ -1,375 +0,0 @@ -/* - * linux/fs/9p/9p.h - * - * 9P protocol definitions. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -/* Message Types */ -enum { - TVERSION = 100, - RVERSION, - TAUTH = 102, - RAUTH, - TATTACH = 104, - RATTACH, - TERROR = 106, - RERROR, - TFLUSH = 108, - RFLUSH, - TWALK = 110, - RWALK, - TOPEN = 112, - ROPEN, - TCREATE = 114, - RCREATE, - TREAD = 116, - RREAD, - TWRITE = 118, - RWRITE, - TCLUNK = 120, - RCLUNK, - TREMOVE = 122, - RREMOVE, - TSTAT = 124, - RSTAT, - TWSTAT = 126, - RWSTAT, -}; - -/* modes */ -enum { - V9FS_OREAD = 0x00, - V9FS_OWRITE = 0x01, - V9FS_ORDWR = 0x02, - V9FS_OEXEC = 0x03, - V9FS_OEXCL = 0x04, - V9FS_OTRUNC = 0x10, - V9FS_OREXEC = 0x20, - V9FS_ORCLOSE = 0x40, - V9FS_OAPPEND = 0x80, -}; - -/* permissions */ -enum { - V9FS_DMDIR = 0x80000000, - V9FS_DMAPPEND = 0x40000000, - V9FS_DMEXCL = 0x20000000, - V9FS_DMMOUNT = 0x10000000, - V9FS_DMAUTH = 0x08000000, - V9FS_DMTMP = 0x04000000, - V9FS_DMSYMLINK = 0x02000000, - V9FS_DMLINK = 0x01000000, - /* 9P2000.u extensions */ - V9FS_DMDEVICE = 0x00800000, - V9FS_DMNAMEDPIPE = 0x00200000, - V9FS_DMSOCKET = 0x00100000, - V9FS_DMSETUID = 0x00080000, - V9FS_DMSETGID = 0x00040000, -}; - -/* qid.types */ -enum { - V9FS_QTDIR = 0x80, - V9FS_QTAPPEND = 0x40, - V9FS_QTEXCL = 0x20, - V9FS_QTMOUNT = 0x10, - V9FS_QTAUTH = 0x08, - V9FS_QTTMP = 0x04, - V9FS_QTSYMLINK = 0x02, - V9FS_QTLINK = 0x01, - V9FS_QTFILE = 0x00, -}; - -#define V9FS_NOTAG (u16)(~0) -#define V9FS_NOFID (u32)(~0) -#define V9FS_MAXWELEM 16 - -/* ample room for Twrite/Rread header (iounit) */ -#define V9FS_IOHDRSZ 24 - -struct v9fs_str { - u16 len; - char *str; -}; - -/* qids are the unique ID for a file (like an inode */ -struct v9fs_qid { - u8 type; - u32 version; - u64 path; -}; - -/* Plan 9 file metadata (stat) structure */ -struct v9fs_stat { - u16 size; - u16 type; - u32 dev; - struct v9fs_qid qid; - u32 mode; - u32 atime; - u32 mtime; - u64 length; - struct v9fs_str name; - struct v9fs_str uid; - struct v9fs_str gid; - struct v9fs_str muid; - struct v9fs_str extension; /* 9p2000.u extensions */ - u32 n_uid; /* 9p2000.u extensions */ - u32 n_gid; /* 9p2000.u extensions */ - u32 n_muid; /* 9p2000.u extensions */ -}; - -/* file metadata (stat) structure used to create Twstat message - The is similar to v9fs_stat, but the strings don't point to - the same memory block and should be freed separately -*/ -struct v9fs_wstat { - u16 size; - u16 type; - u32 dev; - struct v9fs_qid qid; - u32 mode; - u32 atime; - u32 mtime; - u64 length; - char *name; - char *uid; - char *gid; - char *muid; - char *extension; /* 9p2000.u extensions */ - u32 n_uid; /* 9p2000.u extensions */ - u32 n_gid; /* 9p2000.u extensions */ - u32 n_muid; /* 9p2000.u extensions */ -}; - -/* Structures for Protocol Operations */ - -struct Tversion { - u32 msize; - struct v9fs_str version; -}; - -struct Rversion { - u32 msize; - struct v9fs_str version; -}; - -struct Tauth { - u32 afid; - struct v9fs_str uname; - struct v9fs_str aname; -}; - -struct Rauth { - struct v9fs_qid qid; -}; - -struct Rerror { - struct v9fs_str error; - u32 errno; /* 9p2000.u extension */ -}; - -struct Tflush { - u16 oldtag; -}; - -struct Rflush { -}; - -struct Tattach { - u32 fid; - u32 afid; - struct v9fs_str uname; - struct v9fs_str aname; -}; - -struct Rattach { - struct v9fs_qid qid; -}; - -struct Twalk { - u32 fid; - u32 newfid; - u16 nwname; - struct v9fs_str wnames[16]; -}; - -struct Rwalk { - u16 nwqid; - struct v9fs_qid wqids[16]; -}; - -struct Topen { - u32 fid; - u8 mode; -}; - -struct Ropen { - struct v9fs_qid qid; - u32 iounit; -}; - -struct Tcreate { - u32 fid; - struct v9fs_str name; - u32 perm; - u8 mode; - struct v9fs_str extension; -}; - -struct Rcreate { - struct v9fs_qid qid; - u32 iounit; -}; - -struct Tread { - u32 fid; - u64 offset; - u32 count; -}; - -struct Rread { - u32 count; - u8 *data; -}; - -struct Twrite { - u32 fid; - u64 offset; - u32 count; - u8 *data; -}; - -struct Rwrite { - u32 count; -}; - -struct Tclunk { - u32 fid; -}; - -struct Rclunk { -}; - -struct Tremove { - u32 fid; -}; - -struct Rremove { -}; - -struct Tstat { - u32 fid; -}; - -struct Rstat { - struct v9fs_stat stat; -}; - -struct Twstat { - u32 fid; - struct v9fs_stat stat; -}; - -struct Rwstat { -}; - -/* - * fcall is the primary packet structure - * - */ - -struct v9fs_fcall { - u32 size; - u8 id; - u16 tag; - void *sdata; - - union { - struct Tversion tversion; - struct Rversion rversion; - struct Tauth tauth; - struct Rauth rauth; - struct Rerror rerror; - struct Tflush tflush; - struct Rflush rflush; - struct Tattach tattach; - struct Rattach rattach; - struct Twalk twalk; - struct Rwalk rwalk; - struct Topen topen; - struct Ropen ropen; - struct Tcreate tcreate; - struct Rcreate rcreate; - struct Tread tread; - struct Rread rread; - struct Twrite twrite; - struct Rwrite rwrite; - struct Tclunk tclunk; - struct Rclunk rclunk; - struct Tremove tremove; - struct Rremove rremove; - struct Tstat tstat; - struct Rstat rstat; - struct Twstat twstat; - struct Rwstat rwstat; - } params; -}; - -#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \ - fcall?fcall->params.rerror.error.len:0, \ - fcall?fcall->params.rerror.error.str:""); - -int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **rcall); - -int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **rcall); - -int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid); - -int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); - -int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_wstat *wstat, struct v9fs_fcall **rcall); - -int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **rcall); - -int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **rcall); - -int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); - -int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, - u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall); - -int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, - u64 offset, u32 count, struct v9fs_fcall **rcall); - -int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, const char __user * data, - struct v9fs_fcall **rcall); -int v9fs_printfcall(char *, int, struct v9fs_fcall *, int); diff --git a/fs/9p/Makefile b/fs/9p/Makefile index 87897f84dfb6..bc7f0d1551e6 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -1,18 +1,12 @@ obj-$(CONFIG_9P_FS) := 9p.o 9p-objs := \ - trans_fd.o \ - mux.o \ - fcall.o \ - conv.o \ vfs_super.o \ vfs_inode.o \ vfs_addr.o \ vfs_file.o \ vfs_dir.o \ vfs_dentry.o \ - error.o \ v9fs.o \ fid.o \ - fcprint.o diff --git a/fs/9p/conv.c b/fs/9p/conv.c deleted file mode 100644 index a3ed571eee31..000000000000 --- a/fs/9p/conv.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * linux/fs/9p/conv.c - * - * 9P protocol conversion functions - * - * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/idr.h> -#include <asm/uaccess.h> -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" - -/* - * Buffer to help with string parsing - */ -struct cbuf { - unsigned char *sp; - unsigned char *p; - unsigned char *ep; -}; - -static inline void buf_init(struct cbuf *buf, void *data, int datalen) -{ - buf->sp = buf->p = data; - buf->ep = data + datalen; -} - -static inline int buf_check_overflow(struct cbuf *buf) -{ - return buf->p > buf->ep; -} - -static int buf_check_size(struct cbuf *buf, int len) -{ - if (buf->p + len > buf->ep) { - if (buf->p < buf->ep) { - eprintk(KERN_ERR, "buffer overflow: want %d has %d\n", - len, (int)(buf->ep - buf->p)); - dump_stack(); - buf->p = buf->ep + 1; - } - - return 0; - } - - return 1; -} - -static void *buf_alloc(struct cbuf *buf, int len) -{ - void *ret = NULL; - - if (buf_check_size(buf, len)) { - ret = buf->p; - buf->p += len; - } - - return ret; -} - -static void buf_put_int8(struct cbuf *buf, u8 val) -{ - if (buf_check_size(buf, 1)) { - buf->p[0] = val; - buf->p++; - } -} - -static void buf_put_int16(struct cbuf *buf, u16 val) -{ - if (buf_check_size(buf, 2)) { - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; - } -} - -static void buf_put_int32(struct cbuf *buf, u32 val) -{ - if (buf_check_size(buf, 4)) { - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; - } -} - -static void buf_put_int64(struct cbuf *buf, u64 val) -{ - if (buf_check_size(buf, 8)) { - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; - } -} - -static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) -{ - char *ret; - - ret = NULL; - if (buf_check_size(buf, slen + 2)) { - buf_put_int16(buf, slen); - ret = buf->p; - memcpy(buf->p, s, slen); - buf->p += slen; - } - - return ret; -} - -static inline void buf_put_string(struct cbuf *buf, const char *s) -{ - buf_put_stringn(buf, s, strlen(s)); -} - -static u8 buf_get_int8(struct cbuf *buf) -{ - u8 ret = 0; - - if (buf_check_size(buf, 1)) { - ret = buf->p[0]; - buf->p++; - } - - return ret; -} - -static u16 buf_get_int16(struct cbuf *buf) -{ - u16 ret = 0; - - if (buf_check_size(buf, 2)) { - ret = le16_to_cpu(*(__le16 *)buf->p); - buf->p += 2; - } - - return ret; -} - -static u32 buf_get_int32(struct cbuf *buf) -{ - u32 ret = 0; - - if (buf_check_size(buf, 4)) { - ret = le32_to_cpu(*(__le32 *)buf->p); - buf->p += 4; - } - - return ret; -} - -static u64 buf_get_int64(struct cbuf *buf) -{ - u64 ret = 0; - - if (buf_check_size(buf, 8)) { - ret = le64_to_cpu(*(__le64 *)buf->p); - buf->p += 8; - } - - return ret; -} - -static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr) -{ - vstr->len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { - vstr->str = buf->p; - buf->p += vstr->len; - } else { - vstr->len = 0; - vstr->str = NULL; - } -} - -static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid) -{ - qid->type = buf_get_int8(bufp); - qid->version = buf_get_int32(bufp); - qid->path = buf_get_int64(bufp); -} - -/** - * v9fs_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure - * @extended: non-zero if 9P2000.u - * - */ - -static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended) -{ - int size = 0; - - if (wstat == NULL) { - eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); - return 0; - } - - size = /* 2 + *//* size[2] */ - 2 + /* type[2] */ - 4 + /* dev[4] */ - 1 + /* qid.type[1] */ - 4 + /* qid.vers[4] */ - 8 + /* qid.path[8] */ - 4 + /* mode[4] */ - 4 + /* atime[4] */ - 4 + /* mtime[4] */ - 8 + /* length[8] */ - 8; /* minimum sum of string lengths */ - - if (wstat->name) - size += strlen(wstat->name); - if (wstat->uid) - size += strlen(wstat->uid); - if (wstat->gid) - size += strlen(wstat->gid); - if (wstat->muid) - size += strlen(wstat->muid); - - if (extended) { - size += 4 + /* n_uid[4] */ - 4 + /* n_gid[4] */ - 4 + /* n_muid[4] */ - 2; /* string length of extension[4] */ - if (wstat->extension) - size += strlen(wstat->extension); - } - - return size; -} - -/** - * buf_get_stat - safely decode a recieved metadata (stat) structure - * @bufp: buffer to deserialize - * @stat: metadata (stat) structure - * @extended: non-zero if 9P2000.u - * - */ - -static void -buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended) -{ - stat->size = buf_get_int16(bufp); - stat->type = buf_get_int16(bufp); - stat->dev = buf_get_int32(bufp); - stat->qid.type = buf_get_int8(bufp); - stat->qid.version = buf_get_int32(bufp); - stat->qid.path = buf_get_int64(bufp); - stat->mode = buf_get_int32(bufp); - stat->atime = buf_get_int32(bufp); - stat->mtime = buf_get_int32(bufp); - stat->length = buf_get_int64(bufp); - buf_get_str(bufp, &stat->name); - buf_get_str(bufp, &stat->uid); - buf_get_str(bufp, &stat->gid); - buf_get_str(bufp, &stat->muid); - - if (extended) { - buf_get_str(bufp, &stat->extension); - stat->n_uid = buf_get_int32(bufp); - stat->n_gid = buf_get_int32(bufp); - stat->n_muid = buf_get_int32(bufp); - } -} - -/** - * v9fs_deserialize_stat - decode a received metadata structure - * @buf: buffer to deserialize - * @buflen: length of received buffer - * @stat: metadata structure to decode into - * @extended: non-zero if 9P2000.u - * - * Note: stat will point to the buf region. - */ - -int -v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, - int extended) -{ - struct cbuf buffer; - struct cbuf *bufp = &buffer; - unsigned char *p; - - buf_init(bufp, buf, buflen); - p = bufp->p; - buf_get_stat(bufp, stat, extended); - - if (buf_check_overflow(bufp)) - return 0; - else - return bufp->p - p; -} - -/** - * deserialize_fcall - unmarshal a response - * @buf: recieved buffer - * @buflen: length of received buffer - * @rcall: fcall structure to populate - * @rcalllen: length of fcall structure to populate - * @extended: non-zero if 9P2000.u - * - */ - -int -v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, - int extended) -{ - - struct cbuf buffer; - struct cbuf *bufp = &buffer; - int i = 0; - - buf_init(bufp, buf, buflen); - - rcall->size = buf_get_int32(bufp); - rcall->id = buf_get_int8(bufp); - rcall->tag = buf_get_int16(bufp); - - dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, - rcall->tag); - - switch (rcall->id) { - default: - eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); - return -EPROTO; - case RVERSION: - rcall->params.rversion.msize = buf_get_int32(bufp); - buf_get_str(bufp, &rcall->params.rversion.version); - break; - case RFLUSH: - break; - case RATTACH: - rcall->params.rattach.qid.type = buf_get_int8(bufp); - rcall->params.rattach.qid.version = buf_get_int32(bufp); - rcall->params.rattach.qid.path = buf_get_int64(bufp); - break; - case RWALK: - rcall->params.rwalk.nwqid = buf_get_int16(bufp); - if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) { - eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n", - V9FS_MAXWELEM, rcall->params.rwalk.nwqid); - return -EPROTO; - } - - for (i = 0; i < rcall->params.rwalk.nwqid; i++) - buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); - break; - case ROPEN: - buf_get_qid(bufp, &rcall->params.ropen.qid); - rcall->params.ropen.iounit = buf_get_int32(bufp); - break; - case RCREATE: - buf_get_qid(bufp, &rcall->params.rcreate.qid); - rcall->params.rcreate.iounit = buf_get_int32(bufp); - break; - case RREAD: - rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = bufp->p; - buf_check_size(bufp, rcall->params.rread.count); - break; - case RWRITE: - rcall->params.rwrite.count = buf_get_int32(bufp); - break; - case RCLUNK: - break; - case RREMOVE: - break; - case RSTAT: - buf_get_int16(bufp); - buf_get_stat(bufp, &rcall->params.rstat.stat, extended); - break; - case RWSTAT: - break; - case RERROR: - buf_get_str(bufp, &rcall->params.rerror.error); - if (extended) - rcall->params.rerror.errno = buf_get_int16(bufp); - break; - } - - if (buf_check_overflow(bufp)) { - dprintk(DEBUG_ERROR, "buffer overflow\n"); - return -EIO; - } - - return bufp->p - bufp->sp; -} - -static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p) -{ - *p = val; - buf_put_int8(bufp, val); -} - -static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p) -{ - *p = val; - buf_put_int16(bufp, val); -} - -static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p) -{ - *p = val; - buf_put_int32(bufp, val); -} - -static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p) -{ - *p = val; - buf_put_int64(bufp, val); -} - -static void -v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str) -{ - int len; - char *s; - - if (data) - len = strlen(data); - else - len = 0; - - s = buf_put_stringn(bufp, data, len); - if (str) { - str->len = len; - str->str = s; - } -} - -static int -v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - return copy_from_user(*pdata, data, count); -} - -static void -v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat, - struct v9fs_stat *stat, int statsz, int extended) -{ - v9fs_put_int16(bufp, statsz, &stat->size); - v9fs_put_int16(bufp, wstat->type, &stat->type); - v9fs_put_int32(bufp, wstat->dev, &stat->dev); - v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type); - v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version); - v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path); - v9fs_put_int32(bufp, wstat->mode, &stat->mode); - v9fs_put_int32(bufp, wstat->atime, &stat->atime); - v9fs_put_int32(bufp, wstat->mtime, &stat->mtime); - v9fs_put_int64(bufp, wstat->length, &stat->length); - - v9fs_put_str(bufp, wstat->name, &stat->name); - v9fs_put_str(bufp, wstat->uid, &stat->uid); - v9fs_put_str(bufp, wstat->gid, &stat->gid); - v9fs_put_str(bufp, wstat->muid, &stat->muid); - - if (extended) { - v9fs_put_str(bufp, wstat->extension, &stat->extension); - v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid); - v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid); - v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid); - } -} - -static struct v9fs_fcall * -v9fs_create_common(struct cbuf *bufp, u32 size, u8 id) -{ - struct v9fs_fcall *fc; - - size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ - fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL); - if (!fc) - return ERR_PTR(-ENOMEM); - - fc->sdata = (char *)fc + sizeof(*fc); - - buf_init(bufp, (char *)fc->sdata, size); - v9fs_put_int32(bufp, size, &fc->size); - v9fs_put_int8(bufp, id, &fc->id); - v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag); - - return fc; -} - -void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag) -{ - fc->tag = tag; - *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); -} - -struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(version); /* msize[4] version[s] */ - fc = v9fs_create_common(bufp, size, TVERSION); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, msize, &fc->params.tversion.msize); - v9fs_put_str(bufp, version, &fc->params.tversion.version); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -#if 0 -struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */ - fc = v9fs_create_common(bufp, size, TAUTH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, afid, &fc->params.tauth.afid); - v9fs_put_str(bufp, uname, &fc->params.tauth.uname); - v9fs_put_str(bufp, aname, &fc->params.tauth.aname); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} -#endif /* 0 */ - -struct v9fs_fcall * -v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */ - fc = v9fs_create_common(bufp, size, TATTACH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tattach.fid); - v9fs_put_int32(bufp, afid, &fc->params.tattach.afid); - v9fs_put_str(bufp, uname, &fc->params.tattach.uname); - v9fs_put_str(bufp, aname, &fc->params.tattach.aname); - - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tflush(u16 oldtag) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 2; /* oldtag[2] */ - fc = v9fs_create_common(bufp, size, TFLUSH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames) -{ - int i, size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - if (nwname > V9FS_MAXWELEM) { - dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM); - return NULL; - } - - size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ - for (i = 0; i < nwname; i++) { - size += 2 + strlen(wnames[i]); /* wname[s] */ - } - - fc = v9fs_create_common(bufp, size, TWALK); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twalk.fid); - v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid); - v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname); - for (i = 0; i < nwname; i++) { - v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 1; /* fid[4] mode[1] */ - fc = v9fs_create_common(bufp, size, TOPEN); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.topen.fid); - v9fs_put_int8(bufp, mode, &fc->params.topen.mode); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int extended) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ - if (extended) { - size += 2 + /* extension[s] */ - (extension == NULL ? 0 : strlen(extension)); - } - - fc = v9fs_create_common(bufp, size, TCREATE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid); - v9fs_put_str(bufp, name, &fc->params.tcreate.name); - v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm); - v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode); - if (extended) - v9fs_put_str(bufp, extension, &fc->params.tcreate.extension); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ - fc = v9fs_create_common(bufp, size, TREAD); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tread.fid); - v9fs_put_int64(bufp, offset, &fc->params.tread.offset); - v9fs_put_int32(bufp, count, &fc->params.tread.count); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, - const char __user * data) -{ - int size, err; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */ - fc = v9fs_create_common(bufp, size, TWRITE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twrite.fid); - v9fs_put_int64(bufp, offset, &fc->params.twrite.offset); - v9fs_put_int32(bufp, count, &fc->params.twrite.count); - err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tclunk(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TCLUNK); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tremove(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TREMOVE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tremove.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tstat(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TSTAT); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tstat.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, - int extended) -{ - int size, statsz; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - statsz = v9fs_size_wstat(wstat, extended); - size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ - fc = v9fs_create_common(bufp, size, TWSTAT); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twstat.fid); - buf_put_int16(bufp, statsz + 2); - v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} diff --git a/fs/9p/conv.h b/fs/9p/conv.h deleted file mode 100644 index dd5b6b1b610f..000000000000 --- a/fs/9p/conv.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * linux/fs/9p/conv.h - * - * 9P protocol conversion definitions. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, - int extended); -int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, - int extended); - -void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag); - -struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version); -struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname, - char *aname); -struct v9fs_fcall *v9fs_create_tflush(u16 oldtag); -struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames); -struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode); -struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int extended); -struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count); -struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, - const char __user *data); -struct v9fs_fcall *v9fs_create_tclunk(u32 fid); -struct v9fs_fcall *v9fs_create_tremove(u32 fid); -struct v9fs_fcall *v9fs_create_tstat(u32 fid); -struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, - int extended); diff --git a/fs/9p/debug.h b/fs/9p/debug.h deleted file mode 100644 index 4228c0bb3c32..000000000000 --- a/fs/9p/debug.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * linux/fs/9p/debug.h - V9FS Debug Definitions - * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#define DEBUG_ERROR (1<<0) -#define DEBUG_CURRENT (1<<1) -#define DEBUG_9P (1<<2) -#define DEBUG_VFS (1<<3) -#define DEBUG_CONV (1<<4) -#define DEBUG_MUX (1<<5) -#define DEBUG_TRANS (1<<6) -#define DEBUG_SLABS (1<<7) -#define DEBUG_FCALL (1<<8) - -#define DEBUG_DUMP_PKT 0 - -extern int v9fs_debug_level; - -#define dprintk(level, format, arg...) \ -do { \ - if((v9fs_debug_level & level)==level) \ - printk(KERN_NOTICE "-- %s (%d): " \ - format , __FUNCTION__, current->pid , ## arg); \ -} while(0) - -#define eprintk(level, format, arg...) \ -do { \ - printk(level "v9fs: %s (%d): " \ - format , __FUNCTION__, current->pid , ## arg); \ -} while(0) - -#if DEBUG_DUMP_PKT -static inline void dump_data(const unsigned char *data, unsigned int datalen) -{ - int i, n; - char buf[5*8]; - - n = 0; - i = 0; - while (i < datalen) { - n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]); - if (i%4 == 0) - n += snprintf(buf+n, sizeof(buf)-n, " "); - - if (i%16 == 0) { - dprintk(DEBUG_ERROR, "%s\n", buf); - n = 0; - } - } - - dprintk(DEBUG_ERROR, "%s\n", buf); -} -#else /* DEBUG_DUMP_PKT */ -static inline void dump_data(const unsigned char *data, unsigned int datalen) -{ - -} -#endif /* DEBUG_DUMP_PKT */ diff --git a/fs/9p/error.c b/fs/9p/error.c deleted file mode 100644 index 0d7fa4e08812..000000000000 --- a/fs/9p/error.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * linux/fs/9p/error.c - * - * Error string handling - * - * Plan 9 uses error strings, Unix uses error numbers. These functions - * try to help manage that and provide for dynamically adding error - * mappings. - * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> - -#include <linux/list.h> -#include <linux/jhash.h> - -#include "debug.h" -#include "error.h" - -/** - * v9fs_error_init - preload - * @errstr: error string - * - */ - -int v9fs_error_init(void) -{ - struct errormap *c; - int bucket; - - /* initialize hash table */ - for (bucket = 0; bucket < ERRHASHSZ; bucket++) - INIT_HLIST_HEAD(&hash_errmap[bucket]); - - /* load initial error map into hash table */ - for (c = errmap; c->name != NULL; c++) { - c->namelen = strlen(c->name); - bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; - INIT_HLIST_NODE(&c->list); - hlist_add_head(&c->list, &hash_errmap[bucket]); - } - - return 1; -} - -/** - * errstr2errno - convert error string to error number - * @errstr: error string - * - */ - -int v9fs_errstr2errno(char *errstr, int len) -{ - int errno = 0; - struct hlist_node *p = NULL; - struct errormap *c = NULL; - int bucket = jhash(errstr, len, 0) % ERRHASHSZ; - - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { - if (c->namelen==len && !memcmp(c->name, errstr, len)) { - errno = c->val; - break; - } - } - - if (errno == 0) { - /* TODO: if error isn't found, add it dynamically */ - errstr[len] = 0; - printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, - errstr); - errno = 1; - } - - return -errno; -} diff --git a/fs/9p/error.h b/fs/9p/error.h deleted file mode 100644 index 5f3ca522b316..000000000000 --- a/fs/9p/error.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * linux/fs/9p/error.h - * - * Huge Nasty Error Table - * - * Plan 9 uses error strings, Unix uses error numbers. This table tries to - * match UNIX strings and Plan 9 strings to unix error numbers. It is used - * to preload the dynamic error table which can also track user-specific error - * strings. - * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/errno.h> -#include <asm/errno.h> - -struct errormap { - char *name; - int val; - - int namelen; - struct hlist_node list; -}; - -#define ERRHASHSZ 32 -static struct hlist_head hash_errmap[ERRHASHSZ]; - -/* FixMe - reduce to a reasonable size */ -static struct errormap errmap[] = { - {"Operation not permitted", EPERM}, - {"wstat prohibited", EPERM}, - {"No such file or directory", ENOENT}, - {"directory entry not found", ENOENT}, - {"file not found", ENOENT}, - {"Interrupted system call", EINTR}, - {"Input/output error", EIO}, - {"No such device or address", ENXIO}, - {"Argument list too long", E2BIG}, - {"Bad file descriptor", EBADF}, - {"Resource temporarily unavailable", EAGAIN}, - {"Cannot allocate memory", ENOMEM}, - {"Permission denied", EACCES}, - {"Bad address", EFAULT}, - {"Block device required", ENOTBLK}, - {"Device or resource busy", EBUSY}, - {"File exists", EEXIST}, - {"Invalid cross-device link", EXDEV}, - {"No such device", ENODEV}, - {"Not a directory", ENOTDIR}, - {"Is a directory", EISDIR}, - {"Invalid argument", EINVAL}, - {"Too many open files in system", ENFILE}, - {"Too many open files", EMFILE}, - {"Text file busy", ETXTBSY}, - {"File too large", EFBIG}, - {"No space left on device", ENOSPC}, - {"Illegal seek", ESPIPE}, - {"Read-only file system", EROFS}, - {"Too many links", EMLINK}, - {"Broken pipe", EPIPE}, - {"Numerical argument out of domain", EDOM}, - {"Numerical result out of range", ERANGE}, - {"Resource deadlock avoided", EDEADLK}, - {"File name too long", ENAMETOOLONG}, - {"No locks available", ENOLCK}, - {"Function not implemented", ENOSYS}, - {"Directory not empty", ENOTEMPTY}, - {"Too many levels of symbolic links", ELOOP}, - {"No message of desired type", ENOMSG}, - {"Identifier removed", EIDRM}, - {"No data available", ENODATA}, - {"Machine is not on the network", ENONET}, - {"Package not installed", ENOPKG}, - {"Object is remote", EREMOTE}, - {"Link has been severed", ENOLINK}, - {"Communication error on send", ECOMM}, - {"Protocol error", EPROTO}, - {"Bad message", EBADMSG}, - {"File descriptor in bad state", EBADFD}, - {"Streams pipe error", ESTRPIPE}, - {"Too many users", EUSERS}, - {"Socket operation on non-socket", ENOTSOCK}, - {"Message too long", EMSGSIZE}, - {"Protocol not available", ENOPROTOOPT}, - {"Protocol not supported", EPROTONOSUPPORT}, - {"Socket type not supported", ESOCKTNOSUPPORT}, - {"Operation not supported", EOPNOTSUPP}, - {"Protocol family not supported", EPFNOSUPPORT}, - {"Network is down", ENETDOWN}, - {"Network is unreachable", ENETUNREACH}, - {"Network dropped connection on reset", ENETRESET}, - {"Software caused connection abort", ECONNABORTED}, - {"Connection reset by peer", ECONNRESET}, - {"No buffer space available", ENOBUFS}, - {"Transport endpoint is already connected", EISCONN}, - {"Transport endpoint is not connected", ENOTCONN}, - {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, - {"Connection timed out", ETIMEDOUT}, - {"Connection refused", ECONNREFUSED}, - {"Host is down", EHOSTDOWN}, - {"No route to host", EHOSTUNREACH}, - {"Operation already in progress", EALREADY}, - {"Operation now in progress", EINPROGRESS}, - {"Is a named type file", EISNAM}, - {"Remote I/O error", EREMOTEIO}, - {"Disk quota exceeded", EDQUOT}, -/* errors from fossil, vacfs, and u9fs */ - {"fid unknown or out of range", EBADF}, - {"permission denied", EACCES}, - {"file does not exist", ENOENT}, - {"authentication failed", ECONNREFUSED}, - {"bad offset in directory read", ESPIPE}, - {"bad use of fid", EBADF}, - {"wstat can't convert between files and directories", EPERM}, - {"directory is not empty", ENOTEMPTY}, - {"file exists", EEXIST}, - {"file already exists", EEXIST}, - {"file or directory already exists", EEXIST}, - {"fid already in use", EBADF}, - {"file in use", ETXTBSY}, - {"i/o error", EIO}, - {"file already open for I/O", ETXTBSY}, - {"illegal mode", EINVAL}, - {"illegal name", ENAMETOOLONG}, - {"not a directory", ENOTDIR}, - {"not a member of proposed group", EPERM}, - {"not owner", EACCES}, - {"only owner can change group in wstat", EACCES}, - {"read only file system", EROFS}, - {"no access to special file", EPERM}, - {"i/o count too large", EIO}, - {"unknown group", EINVAL}, - {"unknown user", EINVAL}, - {"bogus wstat buffer", EPROTO}, - {"exclusive use file already open", EAGAIN}, - {"corrupted directory entry", EIO}, - {"corrupted file entry", EIO}, - {"corrupted block label", EIO}, - {"corrupted meta data", EIO}, - {"illegal offset", EINVAL}, - {"illegal path element", ENOENT}, - {"root of file system is corrupted", EIO}, - {"corrupted super block", EIO}, - {"protocol botch", EPROTO}, - {"file system is full", ENOSPC}, - {"file is in use", EAGAIN}, - {"directory entry is not allocated", ENOENT}, - {"file is read only", EROFS}, - {"file has been removed", EIDRM}, - {"only support truncation to zero length", EPERM}, - {"cannot remove root", EPERM}, - {"file too big", EFBIG}, - {"venti i/o error", EIO}, - /* these are not errors */ - {"u9fs rhostsauth: no authentication required", 0}, - {"u9fs authnone: no authentication required", 0}, - {NULL, -1} -}; - -extern int v9fs_error_init(void); diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c deleted file mode 100644 index dc336a67592f..000000000000 --- a/fs/9p/fcall.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * linux/fs/9p/fcall.c - * - * This file contains functions to perform synchronous 9P calls - * - * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/idr.h> - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" -#include "mux.h" - -/** - * v9fs_t_version - negotiate protocol parameters with sever - * @v9ses: 9P2000 session information - * @msize: requested max size packet - * @version: requested version.extension string - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); - tc = v9fs_create_tversion(msize, version); - - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_attach - mount the server - * @v9ses: 9P2000 session information - * @uname: user name doing the attach - * @aname: remote name being attached to - * @fid: mount fid to attatch to root node - * @afid: authentication fid (in this case result key) - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall* tc; - - dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, - aname, fid, afid); - - tc = v9fs_create_tattach(fid, afid, uname, aname); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err) -{ - int fid, id; - struct v9fs_session_info *v9ses; - - id = 0; - fid = tc->params.tclunk.fid; - if (rc) - id = rc->id; - - kfree(tc); - kfree(rc); - if (id == RCLUNK) { - v9ses = a; - v9fs_put_idpool(fid, &v9ses->fidpool); - } -} - -/** - * v9fs_t_clunk - release a fid (finish a transaction) - * @v9ses: 9P2000 session information - * @fid: fid to release - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - rc = NULL; - tc = v9fs_create_tclunk(fid); - if (!IS_ERR(tc)) - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - else - ret = PTR_ERR(tc); - - if (ret) - dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret); - - v9fs_t_clunk_cb(v9ses, tc, rc, ret); - return ret; -} - -#if 0 -/** - * v9fs_v9fs_t_flush - flush a pending transaction - * @v9ses: 9P2000 session information - * @tag: tag to release - * - */ -int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "oldtag %d\n", oldtag); - - tc = v9fs_create_tflush(oldtag); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, NULL); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} -#endif - -/** - * v9fs_t_stat - read a file's meta-data - * @v9ses: 9P2000 session information - * @fid: fid pointing to file or directory to get info about - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - ret = -ENOMEM; - tc = v9fs_create_tstat(fid); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_wstat - write a file's meta-data - * @v9ses: 9P2000 session information - * @fid: fid pointing to file or directory to write info about - * @stat: metadata - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_wstat *wstat, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - tc = v9fs_create_twstat(fid, wstat, v9ses->extended); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_walk - walk a fid to a new file or directory - * @v9ses: 9P2000 session information - * @fid: fid to walk - * @newfid: new fid (for clone operations) - * @name: path to walk fid to - * @fcall: pointer to response fcall - * - */ - -/* TODO: support multiple walk */ - -int -v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - int nwname; - - dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); - - if (name) - nwname = 1; - else - nwname = 0; - - tc = v9fs_create_twalk(fid, newfid, nwname, &name); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_open - open a file - * - * @v9ses - 9P2000 session information - * @fid - fid to open - * @mode - mode to open file (R, RW, etc) - * @fcall - pointer to response fcall - * - */ - -int -v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); - - tc = v9fs_create_topen(fid, mode); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_remove - remove a file or directory - * @v9ses: 9P2000 session information - * @fid: fid to remove - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - tc = v9fs_create_tremove(fid); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_create - create a file or directory - * @v9ses: 9P2000 session information - * @fid: fid to create - * @name: name of the file or directory to create - * @perm: permissions to create with - * @mode: mode to open file (R, RW, etc) - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm, - u8 mode, char *extension, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", - fid, name, perm, mode); - - tc = v9fs_create_tcreate(fid, name, perm, mode, extension, - v9ses->extended); - - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_read - read data - * @v9ses: 9P2000 session information - * @fid: fid to read from - * @offset: offset to start read at - * @count: how many bytes to read - * @fcall: pointer to response fcall (with data) - * - */ - -int -v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, - (long long unsigned) offset, count); - - tc = v9fs_create_tread(fid, offset, count); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - if (!ret) - ret = rc->params.rread.count; - if (rcp) - *rcp = rc; - else - kfree(rc); - - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_write - write data - * @v9ses: 9P2000 session information - * @fid: fid to write to - * @offset: offset to start write at - * @count: how many bytes to write - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, - const char __user *data, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, - (long long unsigned) offset, count); - - tc = v9fs_create_twrite(fid, offset, count, data); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - - if (!ret) - ret = rc->params.rwrite.count; - if (rcp) - *rcp = rc; - else - kfree(rc); - - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c deleted file mode 100644 index 34b96114a28d..000000000000 --- a/fs/9p/fcprint.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * linux/fs/9p/fcprint.c - * - * Print 9P call. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/idr.h> - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "mux.h" - -static int -v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q) -{ - int n; - char b[10]; - - n = 0; - if (q->type & V9FS_QTDIR) - b[n++] = 'd'; - if (q->type & V9FS_QTAPPEND) - b[n++] = 'a'; - if (q->type & V9FS_QTAUTH) - b[n++] = 'A'; - if (q->type & V9FS_QTEXCL) - b[n++] = 'l'; - if (q->type & V9FS_QTTMP) - b[n++] = 't'; - if (q->type & V9FS_QTSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path, - q->version, b); -} - -static int -v9fs_printperm(char *buf, int buflen, int perm) -{ - int n; - char b[15]; - - n = 0; - if (perm & V9FS_DMDIR) - b[n++] = 'd'; - if (perm & V9FS_DMAPPEND) - b[n++] = 'a'; - if (perm & V9FS_DMAUTH) - b[n++] = 'A'; - if (perm & V9FS_DMEXCL) - b[n++] = 'l'; - if (perm & V9FS_DMTMP) - b[n++] = 't'; - if (perm & V9FS_DMDEVICE) - b[n++] = 'D'; - if (perm & V9FS_DMSOCKET) - b[n++] = 'S'; - if (perm & V9FS_DMNAMEDPIPE) - b[n++] = 'P'; - if (perm & V9FS_DMSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "%s%03o", b, perm&077); -} - -static int -v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended) -{ - int n; - - n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, - st->name.str, st->uid.len, st->uid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); - - n += scnprintf(buf+n, buflen-n, " q "); - n += v9fs_printqid(buf+n, buflen-n, &st->qid); - n += scnprintf(buf+n, buflen-n, " m "); - n += v9fs_printperm(buf+n, buflen-n, st->mode); - n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", - st->atime, st->mtime, (long long int) st->length); - - if (extended) - n += scnprintf(buf+n, buflen-n, " ext '%.*s'", - st->extension.len, st->extension.str); - - return n; -} - -static int -v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen) -{ - int i, n; - - i = n = 0; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - return n; -} - -static int -v9fs_printdata(char *buf, int buflen, u8 *data, int datalen) -{ - return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16); -} - -int -v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended) -{ - int i, ret, type, tag; - - if (!fc) - return scnprintf(buf, buflen, "<NULL>"); - - type = fc->id; - tag = fc->tag; - - ret = 0; - switch (type) { - case TVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Tversion tag %u msize %u version '%.*s'", tag, - fc->params.tversion.msize, fc->params.tversion.version.len, - fc->params.tversion.version.str); - break; - - case RVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Rversion tag %u msize %u version '%.*s'", tag, - fc->params.rversion.msize, fc->params.rversion.version.len, - fc->params.rversion.version.str); - break; - - case TAUTH: - ret += scnprintf(buf+ret, buflen-ret, - "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tauth.afid, fc->params.tauth.uname.len, - fc->params.tauth.uname.str, fc->params.tauth.aname.len, - fc->params.tauth.aname.str); - break; - - case RAUTH: - ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); - v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); - break; - - case TATTACH: - ret += scnprintf(buf+ret, buflen-ret, - "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", - tag, fc->params.tattach.fid, fc->params.tattach.afid, - fc->params.tattach.uname.len, fc->params.tattach.uname.str, - fc->params.tattach.aname.len, fc->params.tattach.aname.str); - break; - - case RATTACH: - ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag); - v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); - break; - - case RERROR: - ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'", - tag, fc->params.rerror.error.len, - fc->params.rerror.error.str); - if (extended) - ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", - fc->params.rerror.errno); - break; - - case TFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", - tag, fc->params.tflush.oldtag); - break; - - case RFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); - break; - - case TWALK: - ret += scnprintf(buf+ret, buflen-ret, - "Twalk tag %u fid %d newfid %d nwname %d", tag, - fc->params.twalk.fid, fc->params.twalk.newfid, - fc->params.twalk.nwname); - for(i = 0; i < fc->params.twalk.nwname; i++) - ret += scnprintf(buf+ret, buflen-ret," '%.*s'", - fc->params.twalk.wnames[i].len, - fc->params.twalk.wnames[i].str); - break; - - case RWALK: - ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", - tag, fc->params.rwalk.nwqid); - for(i = 0; i < fc->params.rwalk.nwqid; i++) - ret += v9fs_printqid(buf+ret, buflen-ret, - &fc->params.rwalk.wqids[i]); - break; - - case TOPEN: - ret += scnprintf(buf+ret, buflen-ret, - "Topen tag %u fid %d mode %d", tag, - fc->params.topen.fid, fc->params.topen.mode); - break; - - case ROPEN: - ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); - ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); - ret += scnprintf(buf+ret, buflen-ret," iounit %d", - fc->params.ropen.iounit); - break; - - case TCREATE: - ret += scnprintf(buf+ret, buflen-ret, - "Tcreate tag %u fid %d name '%.*s' perm ", tag, - fc->params.tcreate.fid, fc->params.tcreate.name.len, - fc->params.tcreate.name.str); - - ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm); - ret += scnprintf(buf+ret, buflen-ret, " mode %d", - fc->params.tcreate.mode); - break; - - case RCREATE: - ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); - ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.rcreate.iounit); - break; - - case TREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Tread tag %u fid %d offset %lld count %u", tag, - fc->params.tread.fid, - (long long int) fc->params.tread.offset, - fc->params.tread.count); - break; - - case RREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Rread tag %u count %u data ", tag, - fc->params.rread.count); - ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data, - fc->params.rread.count); - break; - - case TWRITE: - ret += scnprintf(buf+ret, buflen-ret, - "Twrite tag %u fid %d offset %lld count %u data ", - tag, fc->params.twrite.fid, - (long long int) fc->params.twrite.offset, - fc->params.twrite.count); - ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data, - fc->params.twrite.count); - break; - - case RWRITE: - ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", - tag, fc->params.rwrite.count); - break; - - case TCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", - tag, fc->params.tclunk.fid); - break; - - case RCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); - break; - - case TREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", - tag, fc->params.tremove.fid); - break; - - case RREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); - break; - - case TSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", - tag, fc->params.tstat.fid); - break; - - case RSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); - ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, - extended); - break; - - case TWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", - tag, fc->params.twstat.fid); - ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat, - extended); - break; - - case RWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); - break; - - default: - ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); - break; - } - - return ret; -} diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 90419715c7e9..08fa320b7e6d 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -26,10 +26,10 @@ #include <linux/sched.h> #include <linux/idr.h> #include <asm/semaphore.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -40,67 +40,29 @@ * */ -int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) +int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) { - struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; - dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid, - dentry->d_iname, dentry); - if (dentry->d_fsdata == NULL) { - dentry->d_fsdata = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (dentry->d_fsdata == NULL) { - dprintk(DEBUG_ERROR, "Out of memory\n"); - return -ENOMEM; - } - fid_list = (struct list_head *)dentry->d_fsdata; - INIT_LIST_HEAD(fid_list); /* Initialize list head */ - } + struct v9fs_dentry *dent; - fid->uid = current->uid; - list_add(&fid->list, fid_list); - return 0; -} + P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", + fid->fid, dentry->d_iname); -/** - * v9fs_fid_create - allocate a FID structure - * @dentry - dentry to link newly created fid to - * - */ - -struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid) -{ - struct v9fs_fid *new; + dent = dentry->d_fsdata; + if (!dent) { + dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL); + if (!dent) + return -ENOMEM; - dprintk(DEBUG_9P, "fid create fid %d\n", fid); - new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); - if (new == NULL) { - dprintk(DEBUG_ERROR, "Out of Memory\n"); - return ERR_PTR(-ENOMEM); + spin_lock_init(&dent->lock); + INIT_LIST_HEAD(&dent->fidlist); + dentry->d_fsdata = dent; } - new->fid = fid; - new->v9ses = v9ses; - new->fidopen = 0; - new->fidclunked = 0; - new->iounit = 0; - new->rdir_pos = 0; - new->rdir_fcall = NULL; - init_MUTEX(&new->lock); - INIT_LIST_HEAD(&new->list); - - return new; -} - -/** - * v9fs_fid_destroy - deallocate a FID structure - * @fid: fid to destroy - * - */ + spin_lock(&dent->lock); + list_add(&fid->dlist, &dent->fidlist); + spin_unlock(&dent->lock); -void v9fs_fid_destroy(struct v9fs_fid *fid) -{ - list_del(&fid->list); - kfree(fid); + return 0; } /** @@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid) * */ -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) +struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) { - struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; - struct v9fs_fid *return_fid = NULL; - - dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); - - if (fid_list) - return_fid = list_entry(fid_list->next, struct v9fs_fid, list); + struct v9fs_dentry *dent; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + dent = dentry->d_fsdata; + if (dent) + fid = list_entry(dent->fidlist.next, struct p9_fid, dlist); + else + fid = ERR_PTR(-EBADF); + + P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid); + return fid; +} - if (!return_fid) { - dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n"); - return_fid = ERR_PTR(-EBADF); +struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry) +{ + struct p9_fid *fid; + struct v9fs_dentry *dent; + + dent = dentry->d_fsdata; + fid = v9fs_fid_lookup(dentry); + if (!IS_ERR(fid)) { + spin_lock(&dent->lock); + list_del(&fid->dlist); + spin_unlock(&dent->lock); } - if(down_interruptible(&return_fid->lock)) - return ERR_PTR(-EINTR); - - return return_fid; + return fid; } + /** * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and - * release it + * release it * @dentry: dentry to look for fid in * * find a fid in the dentry and then clone to a new private fid @@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) * */ -struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry) +struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF); - struct v9fs_fcall *fcall = NULL; - int fid, err; - - base_fid = v9fs_fid_lookup(dentry); - - if(IS_ERR(base_fid)) - return base_fid; - - if(base_fid) { /* clone fid */ - fid = v9fs_get_idpool(&v9ses->fidpool); - if (fid < 0) { - eprintk(KERN_WARNING, "newfid fails!\n"); - new_fid = ERR_PTR(-ENOSPC); - goto Release_Fid; - } - - err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall); - if (err < 0) { - dprintk(DEBUG_ERROR, "clone walk didn't work\n"); - v9fs_put_idpool(fid, &v9ses->fidpool); - new_fid = ERR_PTR(err); - goto Free_Fcall; - } - new_fid = v9fs_fid_create(v9ses, fid); - if (new_fid == NULL) { - dprintk(DEBUG_ERROR, "out of memory\n"); - new_fid = ERR_PTR(-ENOMEM); - } -Free_Fcall: - kfree(fcall); - } + struct p9_fid *ofid, *fid; -Release_Fid: - up(&base_fid->lock); - return new_fid; -} + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + ofid = v9fs_fid_lookup(dentry); + if (IS_ERR(ofid)) + return ofid; -void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid) -{ - v9fs_t_clunk(v9ses, fid->fid); - v9fs_fid_destroy(fid); + fid = p9_client_walk(ofid, 0, NULL, 1); + return fid; } diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 48fc170c26c8..47a0ba742872 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,41 +22,12 @@ #include <linux/list.h> -#define FID_OP 0 -#define FID_WALK 1 -#define FID_CREATE 2 - -struct v9fs_fid { - struct list_head list; /* list of fids associated with a dentry */ - struct list_head active; /* XXX - debug */ - - struct semaphore lock; - - u32 fid; - unsigned char fidopen; /* set when fid is opened */ - unsigned char fidclunked; /* set when fid has already been clunked */ - - struct v9fs_qid qid; - u32 iounit; - - /* readdir stuff */ - int rdir_fpos; - loff_t rdir_pos; - struct v9fs_fcall *rdir_fcall; - - /* management stuff */ - uid_t uid; /* user associated with this fid */ - - /* private data */ - struct file *filp; /* backpointer to File struct for open files */ - struct v9fs_session_info *v9ses; /* session info for this FID */ +struct v9fs_dentry { + spinlock_t lock; /* protect fidlist */ + struct list_head fidlist; }; -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry); -struct v9fs_fid *v9fs_fid_get_created(struct dentry *); -void v9fs_fid_destroy(struct v9fs_fid *fid); -struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid); -int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry); -struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry); -void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid); - +struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); +struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry); +struct p9_fid *v9fs_fid_clone(struct dentry *dentry); +int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); diff --git a/fs/9p/mux.c b/fs/9p/mux.c deleted file mode 100644 index c783874a9caf..000000000000 --- a/fs/9p/mux.c +++ /dev/null @@ -1,1033 +0,0 @@ -/* - * linux/fs/9p/mux.c - * - * Protocol Multiplexer - * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/poll.h> -#include <linux/kthread.h> -#include <linux/idr.h> -#include <linux/mutex.h> - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" -#include "transport.h" -#include "mux.h" - -#define ERREQFLUSH 1 -#define SCHED_TIMEOUT 10 -#define MAXPOLLWADDR 2 - -enum { - Rworksched = 1, /* read work scheduled or running */ - Rpending = 2, /* can read */ - Wworksched = 4, /* write work scheduled or running */ - Wpending = 8, /* can write */ -}; - -enum { - None, - Flushing, - Flushed, -}; - -struct v9fs_mux_poll_task; - -struct v9fs_req { - spinlock_t lock; - int tag; - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - int err; - v9fs_mux_req_callback cb; - void *cba; - int flush; - struct list_head req_list; -}; - -struct v9fs_mux_data { - spinlock_t lock; - struct list_head mux_list; - struct v9fs_mux_poll_task *poll_task; - int msize; - unsigned char *extended; - struct v9fs_transport *trans; - struct v9fs_idpool tagpool; - int err; - wait_queue_head_t equeue; - struct list_head req_list; - struct list_head unsent_req_list; - struct v9fs_fcall *rcall; - int rpos; - char *rbuf; - int wpos; - int wsize; - char *wbuf; - wait_queue_t poll_wait[MAXPOLLWADDR]; - wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; - poll_table pt; - struct work_struct rq; - struct work_struct wq; - unsigned long wsched; -}; - -struct v9fs_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; - -struct v9fs_mux_rpc { - struct v9fs_mux_data *m; - int err; - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - wait_queue_head_t wqueue; -}; - -static int v9fs_poll_proc(void *); -static void v9fs_read_work(struct work_struct *work); -static void v9fs_write_work(struct work_struct *work); -static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, - poll_table * p); -static u16 v9fs_mux_get_tag(struct v9fs_mux_data *); -static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16); - -static DEFINE_MUTEX(v9fs_mux_task_lock); -static struct workqueue_struct *v9fs_mux_wq; - -static int v9fs_mux_num; -static int v9fs_mux_poll_task_num; -static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100]; - -int v9fs_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) - v9fs_mux_poll_tasks[i].task = NULL; - - v9fs_mux_wq = create_workqueue("v9fs"); - if (!v9fs_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - -void v9fs_mux_global_exit(void) -{ - destroy_workqueue(v9fs_mux_wq); -} - -/** - * v9fs_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. - * - * The current implementation returns sqrt of the number of mounts. - */ -static int v9fs_mux_calc_poll_procs(int muxnum) -{ - int n; - - if (v9fs_mux_poll_task_num) - n = muxnum / v9fs_mux_poll_task_num + - (muxnum % v9fs_mux_poll_task_num ? 1 : 0); - else - n = 1; - - if (n > ARRAY_SIZE(v9fs_mux_poll_tasks)) - n = ARRAY_SIZE(v9fs_mux_poll_tasks); - - return n; -} - -static int v9fs_mux_poll_start(struct v9fs_mux_data *m) -{ - int i, n; - struct v9fs_mux_poll_task *vpt, *vptlast; - struct task_struct *pproc; - - dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num, - v9fs_mux_poll_task_num); - mutex_lock(&v9fs_mux_task_lock); - - n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1); - if (n > v9fs_mux_poll_task_num) { - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { - if (v9fs_mux_poll_tasks[i].task == NULL) { - vpt = &v9fs_mux_poll_tasks[i]; - dprintk(DEBUG_MUX, "create proc %p\n", vpt); - pproc = kthread_create(v9fs_poll_proc, vpt, - "v9fs-poll"); - - if (!IS_ERR(pproc)) { - vpt->task = pproc; - INIT_LIST_HEAD(&vpt->mux_list); - vpt->muxnum = 0; - v9fs_mux_poll_task_num++; - wake_up_process(vpt->task); - } - break; - } - } - - if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) - dprintk(DEBUG_ERROR, "warning: no free poll slots\n"); - } - - n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num + - ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0); - - vptlast = NULL; - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { - vpt = &v9fs_mux_poll_tasks[i]; - if (vpt->task != NULL) { - vptlast = vpt; - if (vpt->muxnum < n) { - dprintk(DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vpt->mux_list); - vpt->muxnum++; - m->poll_task = vpt; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, v9fs_pollwait); - break; - } - } - } - - if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) { - if (vptlast == NULL) - return -ENOMEM; - - dprintk(DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vptlast->mux_list); - vptlast->muxnum++; - m->poll_task = vptlast; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, v9fs_pollwait); - } - - v9fs_mux_num++; - mutex_unlock(&v9fs_mux_task_lock); - - return 0; -} - -static void v9fs_mux_poll_stop(struct v9fs_mux_data *m) -{ - int i; - struct v9fs_mux_poll_task *vpt; - - mutex_lock(&v9fs_mux_task_lock); - vpt = m->poll_task; - list_del(&m->mux_list); - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (m->poll_waddr[i] != NULL) { - remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); - m->poll_waddr[i] = NULL; - } - } - vpt->muxnum--; - if (!vpt->muxnum) { - dprintk(DEBUG_MUX, "destroy proc %p\n", vpt); - kthread_stop(vpt->task); - vpt->task = NULL; - v9fs_mux_poll_task_num--; - } - v9fs_mux_num--; - mutex_unlock(&v9fs_mux_task_lock); -} - -/** - * v9fs_mux_init - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. - * - * @trans - transport structure - * @msize - maximum message size - * @extended - pointer to the extended flag - */ -struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, - unsigned char *extended) -{ - int i, n; - struct v9fs_mux_data *m, *mtmp; - - dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize); - m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&m->lock); - INIT_LIST_HEAD(&m->mux_list); - m->msize = msize; - m->extended = extended; - m->trans = trans; - idr_init(&m->tagpool.pool); - init_MUTEX(&m->tagpool.lock); - m->err = 0; - init_waitqueue_head(&m->equeue); - INIT_LIST_HEAD(&m->req_list); - INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; - INIT_WORK(&m->rq, v9fs_read_work); - INIT_WORK(&m->wq, v9fs_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; - n = v9fs_mux_poll_start(m); - if (n) - return ERR_PTR(n); - - n = trans->poll(trans, &m->pt); - if (n & POLLIN) { - dprintk(DEBUG_MUX, "mux %p can read\n", m); - set_bit(Rpending, &m->wsched); - } - - if (n & POLLOUT) { - dprintk(DEBUG_MUX, "mux %p can write\n", m); - set_bit(Wpending, &m->wsched); - } - - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (IS_ERR(m->poll_waddr[i])) { - v9fs_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ - kfree(m); - m = mtmp; - break; - } - } - - return m; -} - -/** - * v9fs_mux_destroy - cancels all pending requests and frees mux resources - */ -void v9fs_mux_destroy(struct v9fs_mux_data *m) -{ - dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); - v9fs_mux_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n", - m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } - - v9fs_mux_poll_stop(m); - m->trans = NULL; - - kfree(m); -} - -/** - * v9fs_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue - */ -static void -v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, - poll_table * p) -{ - int i; - struct v9fs_mux_data *m; - - m = container_of(p, struct v9fs_mux_data, pt); - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) - if (m->poll_waddr[i] == NULL) - break; - - if (i >= ARRAY_SIZE(m->poll_waddr)) { - dprintk(DEBUG_ERROR, "not enough wait_address slots\n"); - return; - } - - m->poll_waddr[i] = wait_address; - - if (!wait_address) { - dprintk(DEBUG_ERROR, "no wait_address\n"); - m->poll_waddr[i] = ERR_PTR(-EIO); - return; - } - - init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); - add_wait_queue(wait_address, &m->poll_wait[i]); -} - -/** - * v9fs_poll_mux - polls a mux and schedules read or write works if necessary - */ -static void v9fs_poll_mux(struct v9fs_mux_data *m) -{ - int n; - - if (m->err < 0) - return; - - n = m->trans->poll(m->trans, NULL); - if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n); - if (n >= 0) - n = -ECONNRESET; - v9fs_mux_cancel(m, n); - } - - if (n & POLLIN) { - set_bit(Rpending, &m->wsched); - dprintk(DEBUG_MUX, "mux %p can read\n", m); - if (!test_and_set_bit(Rworksched, &m->wsched)) { - dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->rq); - } - } - - if (n & POLLOUT) { - set_bit(Wpending, &m->wsched); - dprintk(DEBUG_MUX, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { - dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->wq); - } - } -} - -/** - * v9fs_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue - */ -static int v9fs_poll_proc(void *a) -{ - struct v9fs_mux_data *m, *mtmp; - struct v9fs_mux_poll_task *vpt; - - vpt = a; - dprintk(DEBUG_MUX, "start %p %p\n", current, vpt); - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - - list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { - v9fs_poll_mux(m); - } - - dprintk(DEBUG_MUX, "sleeping...\n"); - schedule_timeout(SCHED_TIMEOUT * HZ); - } - - __set_current_state(TASK_RUNNING); - dprintk(DEBUG_MUX, "finish\n"); - return 0; -} - -/** - * v9fs_write_work - called when a transport can send some data - */ -static void v9fs_write_work(struct work_struct *work) -{ - int n, err; - struct v9fs_mux_data *m; - struct v9fs_req *req; - - m = container_of(work, struct v9fs_mux_data, wq); - - if (m->err < 0) { - clear_bit(Wworksched, &m->wsched); - return; - } - - if (!m->wsize) { - if (list_empty(&m->unsent_req_list)) { - clear_bit(Wworksched, &m->wsched); - return; - } - - spin_lock(&m->lock); -again: - req = list_entry(m->unsent_req_list.next, struct v9fs_req, - req_list); - list_move_tail(&req->req_list, &m->req_list); - if (req->err == ERREQFLUSH) - goto again; - - m->wbuf = req->tcall->sdata; - m->wsize = req->tcall->size; - m->wpos = 0; - dump_data(m->wbuf, m->wsize); - spin_unlock(&m->lock); - } - - dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); - clear_bit(Wpending, &m->wsched); - err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); - dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } - - if (err <= 0) - goto error; - - m->wpos += err; - if (m->wpos == m->wsize) - m->wpos = m->wsize = 0; - - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLOUT) { - dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); - - return; - - error: - v9fs_mux_cancel(m, err); - clear_bit(Wworksched, &m->wsched); -} - -static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - int ecode; - struct v9fs_str *ename; - - if (!req->err && req->rcall->id == RERROR) { - ecode = req->rcall->params.rerror.errno; - ename = &req->rcall->params.rerror.error; - - dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str); - - if (*m->extended) - req->err = -ecode; - - if (!req->err) { - req->err = v9fs_errstr2errno(ename->str, ename->len); - - if (!req->err) { /* string match failed */ - PRINT_FCALL_ERROR("unknown error", req->rcall); - } - - if (!req->err) - req->err = -ESERVERFAULT; - } - } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { - dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n", - req->tcall->id + 1, req->rcall->id); - if (!req->err) - req->err = -EIO; - } -} - -/** - * v9fs_read_work - called when there is some data to be read from a transport - */ -static void v9fs_read_work(struct work_struct *work) -{ - int n, err; - struct v9fs_mux_data *m; - struct v9fs_req *req, *rptr, *rreq; - struct v9fs_fcall *rcall; - char *rbuf; - - m = container_of(work, struct v9fs_mux_data, rq); - - if (m->err < 0) - return; - - rcall = NULL; - dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); - - if (!m->rcall) { - m->rcall = - kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); - m->rpos = 0; - } - - clear_bit(Rpending, &m->wsched); - err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); - dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); - return; - } - - if (err <= 0) - goto error; - - m->rpos += err; - while (m->rpos > 4) { - n = le32_to_cpu(*(__le32 *) m->rbuf); - if (n >= m->msize) { - dprintk(DEBUG_ERROR, - "requested packet size too big: %d\n", n); - err = -EIO; - goto error; - } - - if (m->rpos < n) - break; - - dump_data(m->rbuf, n); - err = - v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); - if (err < 0) { - goto error; - } - - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { - char buf[150]; - - v9fs_printfcall(buf, sizeof(buf), m->rcall, - *m->extended); - printk(KERN_NOTICE ">>> %p %s\n", m, buf); - } - - rcall = m->rcall; - rbuf = m->rbuf; - if (m->rpos > n) { - m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize, - GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); - memmove(m->rbuf, rbuf + n, m->rpos - n); - m->rpos -= n; - } else { - m->rcall = NULL; - m->rbuf = NULL; - m->rpos = 0; - } - - dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id, - rcall->tag); - - req = NULL; - spin_lock(&m->lock); - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == rcall->tag) { - req = rreq; - if (req->flush != Flushing) - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - req->rcall = rcall; - process_request(m, req); - - if (req->flush != Flushing) { - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - - wake_up(&m->equeue); - } - } else { - if (err >= 0 && rcall->id != RFLUSH) - dprintk(DEBUG_ERROR, - "unexpected response mux %p id %d tag %d\n", - m, rcall->id, rcall->tag); - kfree(rcall); - } - } - - if (!list_empty(&m->req_list)) { - if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLIN) { - dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); - - return; - - error: - v9fs_mux_cancel(m, err); - clear_bit(Rworksched, &m->wsched); -} - -/** - * v9fs_send_request - send 9P request - * The function can sleep until the request is scheduled for sending. - * The function can be interrupted. Return from the function is not - * a guarantee that the request is sent successfully. Can return errors - * that can be retrieved by PTR_ERR macros. - * - * @m: mux data - * @tc: request to be sent - * @cb: callback function to call when response is received - * @cba: parameter to pass to the callback function - */ -static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, - struct v9fs_fcall *tc, - v9fs_mux_req_callback cb, void *cba) -{ - int n; - struct v9fs_req *req; - - dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, - tc, tc->id); - if (m->err < 0) - return ERR_PTR(m->err); - - req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - if (tc->id == TVERSION) - n = V9FS_NOTAG; - else - n = v9fs_mux_get_tag(m); - - if (n < 0) - return ERR_PTR(-ENOMEM); - - v9fs_set_tag(tc, n); - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { - char buf[150]; - - v9fs_printfcall(buf, sizeof(buf), tc, *m->extended); - printk(KERN_NOTICE "<<< %p %s\n", m, buf); - } - - spin_lock_init(&req->lock); - req->tag = n; - req->tcall = tc; - req->rcall = NULL; - req->err = 0; - req->cb = cb; - req->cba = cba; - req->flush = None; - - spin_lock(&m->lock); - list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&m->lock); - - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(v9fs_mux_wq, &m->wq); - - return req; -} - -static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - v9fs_mux_put_tag(m, req->tag); - kfree(req); -} - -static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a) -{ - v9fs_mux_req_callback cb; - int tag; - struct v9fs_mux_data *m; - struct v9fs_req *req, *rreq, *rptr; - - m = a; - dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, - freq->tcall, freq->rcall, freq->err, - freq->tcall->params.tflush.oldtag); - - spin_lock(&m->lock); - cb = NULL; - tag = freq->tcall->params.tflush.oldtag; - req = NULL; - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == tag) { - req = rreq; - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - spin_lock(&req->lock); - req->flush = Flushed; - spin_unlock(&req->lock); - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - - wake_up(&m->equeue); - } - - kfree(freq->tcall); - kfree(freq->rcall); - v9fs_mux_free_request(m, freq); -} - -static int -v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - struct v9fs_fcall *fc; - struct v9fs_req *rreq, *rptr; - - dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); - - /* if a response was received for a request, do nothing */ - spin_lock(&req->lock); - if (req->rcall || req->err) { - spin_unlock(&req->lock); - dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req); - return 0; - } - - req->flush = Flushing; - spin_unlock(&req->lock); - - spin_lock(&m->lock); - /* if the request is not sent yet, just remove it from the list */ - list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { - if (rreq->tag == req->tag) { - dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req); - list_del(&rreq->req_list); - req->flush = Flushed; - spin_unlock(&m->lock); - if (req->cb) - (*req->cb) (req, req->cba); - return 0; - } - } - spin_unlock(&m->lock); - - clear_thread_flag(TIF_SIGPENDING); - fc = v9fs_create_tflush(req->tag); - v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); - return 1; -} - -static void -v9fs_mux_rpc_cb(struct v9fs_req *req, void *a) -{ - struct v9fs_mux_rpc *r; - - dprintk(DEBUG_MUX, "req %p r %p\n", req, a); - r = a; - r->rcall = req->rcall; - r->err = req->err; - - if (req->flush!=None && !req->err) - r->err = -ERESTARTSYS; - - wake_up(&r->wqueue); -} - -/** - * v9fs_mux_rpc - sends 9P request and waits until a response is available. - * The function can be interrupted. - * @m: mux data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored - */ -int -v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, - struct v9fs_fcall **rc) -{ - int err, sigpending; - unsigned long flags; - struct v9fs_req *req; - struct v9fs_mux_rpc r; - - r.err = 0; - r.tcall = tc; - r.rcall = NULL; - r.m = m; - init_waitqueue_head(&r.wqueue); - - if (rc) - *rc = NULL; - - sigpending = 0; - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } - - req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); - if (IS_ERR(req)) { - err = PTR_ERR(req); - dprintk(DEBUG_MUX, "error %d\n", err); - return err; - } - - err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); - if (r.err < 0) - err = r.err; - - if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { - if (v9fs_mux_flush_request(m, req)) { - /* wait until we get response of the flush message */ - do { - clear_thread_flag(TIF_SIGPENDING); - err = wait_event_interruptible(r.wqueue, - r.rcall || r.err); - } while (!r.rcall && !r.err && err==-ERESTARTSYS && - m->trans->status==Connected && !m->err); - - err = -ERESTARTSYS; - } - sigpending = 1; - } - - if (sigpending) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } - - if (rc) - *rc = r.rcall; - else - kfree(r.rcall); - - v9fs_mux_free_request(m, req); - if (err > 0) - err = -EIO; - - return err; -} - -#if 0 -/** - * v9fs_mux_rpcnb - sends 9P request without waiting for response. - * @m: mux data - * @tc: request to be sent - * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function - */ -int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, - v9fs_mux_req_callback cb, void *a) -{ - int err; - struct v9fs_req *req; - - req = v9fs_send_request(m, tc, cb, a); - if (IS_ERR(req)) { - err = PTR_ERR(req); - dprintk(DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); - } - - dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); - return 0; -} -#endif /* 0 */ - -/** - * v9fs_mux_cancel - cancel all pending requests with error - * @m: mux data - * @err: error code - */ -void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) -{ - struct v9fs_req *req, *rtmp; - LIST_HEAD(cancel_list); - - dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err); - m->err = err; - spin_lock(&m->lock); - list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - spin_unlock(&m->lock); - - list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); - if (!req->err) - req->err = err; - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } - - wake_up(&m->equeue); -} - -static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m) -{ - int tag; - - tag = v9fs_get_idpool(&m->tagpool); - if (tag < 0) - return V9FS_NOTAG; - else - return (u16) tag; -} - -static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag) -{ - if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool)) - v9fs_put_idpool(tag, &m->tagpool); -} diff --git a/fs/9p/mux.h b/fs/9p/mux.h deleted file mode 100644 index fb10c50186a1..000000000000 --- a/fs/9p/mux.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * linux/fs/9p/mux.h - * - * Multiplexer Definitions - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -struct v9fs_mux_data; -struct v9fs_req; - -/** - * v9fs_mux_req_callback - callback function that is called when the - * response of a request is received. The callback is called from - * a workqueue and shouldn't block. - * - * @a - the pointer that was specified when the request was send to be - * passed to the callback - * @tc - request call - * @rc - response call - * @err - error code (non-zero if error occured) - */ -typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a); - -int v9fs_mux_global_init(void); -void v9fs_mux_global_exit(void); - -struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, - unsigned char *extended); -void v9fs_mux_destroy(struct v9fs_mux_data *); - -int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc); -struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m); -int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc); - -void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush); -void v9fs_mux_cancel(struct v9fs_mux_data *m, int err); -int v9fs_errstr2errno(char *errstr, int len); diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c deleted file mode 100644 index 34d43355beb7..000000000000 --- a/fs/9p/trans_fd.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * linux/fs/9p/trans_fd.c - * - * Fd transport layer. Includes deprecated socket layer. - * - * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> - * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/in.h> -#include <linux/module.h> -#include <linux/net.h> -#include <linux/ipv6.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/un.h> -#include <asm/uaccess.h> -#include <linux/inet.h> -#include <linux/idr.h> -#include <linux/file.h> - -#include "debug.h" -#include "v9fs.h" -#include "transport.h" - -#define V9FS_PORT 564 - -struct v9fs_trans_fd { - struct file *rd; - struct file *wr; -}; - -/** - * v9fs_fd_read- read from a fd - * @v9ses: session information - * @v: buffer to receive data into - * @len: size of receive buffer - * - */ -static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len) -{ - int ret; - struct v9fs_trans_fd *ts; - - if (!trans || trans->status == Disconnected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!(ts->rd->f_flags & O_NONBLOCK)) - dprintk(DEBUG_ERROR, "blocking read ...\n"); - - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -/** - * v9fs_fd_write - write to a socket - * @v9ses: session information - * @v: buffer to send data from - * @len: size of send buffer - * - */ -static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len) -{ - int ret; - mm_segment_t oldfs; - struct v9fs_trans_fd *ts; - - if (!trans || trans->status == Disconnected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!(ts->wr->f_flags & O_NONBLOCK)) - dprintk(DEBUG_ERROR, "blocking write ...\n"); - - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -static unsigned int -v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt) -{ - int ret, n; - struct v9fs_trans_fd *ts; - mm_segment_t oldfs; - - if (!trans || trans->status != Connected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!ts->rd->f_op || !ts->rd->f_op->poll) - return -EIO; - - if (!ts->wr->f_op || !ts->wr->f_op->poll) - return -EIO; - - oldfs = get_fs(); - set_fs(get_ds()); - - ret = ts->rd->f_op->poll(ts->rd, pt); - if (ret < 0) - goto end; - - if (ts->rd != ts->wr) { - n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } - ret = (ret & ~POLLOUT) | (n & ~POLLIN); - } - - end: - set_fs(oldfs); - return ret; -} - -static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd) -{ - struct v9fs_transport *trans = v9ses->transport; - struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd), - GFP_KERNEL); - if (!ts) - return -ENOMEM; - - ts->rd = fget(rfd); - ts->wr = fget(wfd); - if (!ts->rd || !ts->wr) { - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); - return -EIO; - } - - trans->priv = ts; - trans->status = Connected; - - return 0; -} - -static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, - char *data) -{ - if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return -ENOPROTOOPT; - } - - return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno); -} - -static int v9fs_socket_open(struct v9fs_session_info *v9ses, - struct socket *csocket) -{ - int fd, ret; - - csocket->sk->sk_allocation = GFP_NOIO; - if ((fd = sock_map_fd(csocket)) < 0) { - eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n"); - ret = fd; - release_csocket: - sock_release(csocket); - return ret; - } - - if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) { - sockfd_put(csocket); - eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n"); - goto release_csocket; - } - - ((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |= - O_NONBLOCK; - return 0; -} - -static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, - char *data) -{ - int ret; - struct socket *csocket = NULL; - struct sockaddr_in sin_server; - - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(v9ses->port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - - if (!csocket) { - eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n"); - return -1; - } - - ret = csocket->ops->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); - if (ret < 0) { - eprintk(KERN_ERR, - "v9fs_trans_tcp: problem connecting socket to %s\n", - addr); - return ret; - } - - return v9fs_socket_open(v9ses, csocket); -} - -static int -v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data) -{ - int ret; - struct socket *csocket; - struct sockaddr_un sun_server; - - if (strlen(addr) > UNIX_PATH_MAX) { - eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", - addr); - return -ENAMETOOLONG; - } - - sun_server.sun_family = PF_UNIX; - strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); - ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); - if (ret < 0) { - eprintk(KERN_ERR, - "v9fs_trans_unix: problem connecting socket: %s: %d\n", - addr, ret); - return ret; - } - - return v9fs_socket_open(v9ses, csocket); -} - -/** - * v9fs_sock_close - shutdown socket - * @trans: private socket structure - * - */ -static void v9fs_fd_close(struct v9fs_transport *trans) -{ - struct v9fs_trans_fd *ts; - - if (!trans) - return; - - ts = xchg(&trans->priv, NULL); - - if (!ts) - return; - - trans->status = Disconnected; - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); -} - -struct v9fs_transport v9fs_trans_fd = { - .init = v9fs_fd_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; - -struct v9fs_transport v9fs_trans_tcp = { - .init = v9fs_tcp_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; - -struct v9fs_transport v9fs_trans_unix = { - .init = v9fs_unix_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; diff --git a/fs/9p/transport.h b/fs/9p/transport.h deleted file mode 100644 index b38a4b8a41ce..000000000000 --- a/fs/9p/transport.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/fs/9p/transport.h - * - * Transport Definition - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -enum v9fs_transport_status { - Connected, - Disconnected, - Hung, -}; - -struct v9fs_transport { - enum v9fs_transport_status status; - void *priv; - - int (*init) (struct v9fs_session_info *, const char *, char *); - int (*write) (struct v9fs_transport *, void *, int); - int (*read) (struct v9fs_transport *, void *, int); - void (*close) (struct v9fs_transport *); - unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *); -}; - -extern struct v9fs_transport v9fs_trans_tcp; -extern struct v9fs_transport v9fs_trans_unix; -extern struct v9fs_transport v9fs_trans_fd; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6ad6f192b6e4..45c35986d49f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -29,16 +29,12 @@ #include <linux/sched.h> #include <linux/parser.h> #include <linux/idr.h> - -#include "debug.h" +#include <net/9p/9p.h> +#include <net/9p/transport.h> +#include <net/9p/conn.h> +#include <net/9p/client.h> #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" -#include "transport.h" -#include "mux.h" - -/* TODO: sysfs or debugfs interface */ -int v9fs_debug_level = 0; /* feature-rific global debug level */ /* * Option Parsing (code inspired by NFS code) @@ -47,12 +43,12 @@ int v9fs_debug_level = 0; /* feature-rific global debug level */ enum { /* Options that take integer arguments */ - Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, + Opt_debug, Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_rfdno, Opt_wfdno, /* String options */ Opt_uname, Opt_remotename, /* Options that take no arguments */ - Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, + Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci, /* Cache options */ Opt_cache_loose, /* Error token */ @@ -60,6 +56,7 @@ enum { }; static match_table_t tokens = { + {Opt_debug, "debug=%x"}, {Opt_port, "port=%u"}, {Opt_msize, "msize=%u"}, {Opt_uid, "uid=%u"}, @@ -67,12 +64,14 @@ static match_table_t tokens = { {Opt_afid, "afid=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, - {Opt_debug, "debug=%x"}, {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, {Opt_unix, "proto=unix"}, {Opt_tcp, "proto=tcp"}, {Opt_fd, "proto=fd"}, +#ifdef CONFIG_PCI_9P + {Opt_pci, "proto=pci"}, +#endif {Opt_tcp, "tcp"}, {Opt_unix, "unix"}, {Opt_fd, "fd"}, @@ -83,6 +82,8 @@ static match_table_t tokens = { {Opt_err, NULL} }; +extern struct p9_transport *p9pci_trans_create(void); + /* * Parse option string. */ @@ -122,12 +123,16 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) token = match_token(p, tokens, args); if (token < Opt_uname) { if ((ret = match_int(&args[0], &option)) < 0) { - dprintk(DEBUG_ERROR, + P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { + case Opt_debug: + v9ses->debug = option; + p9_debug_level = option; + break; case Opt_port: v9ses->port = option; break; @@ -149,15 +154,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) case Opt_wfdno: v9ses->wfdno = option; break; - case Opt_debug: - v9ses->debug = option; - break; case Opt_tcp: v9ses->proto = PROTO_TCP; break; case Opt_unix: v9ses->proto = PROTO_UNIX; break; + case Opt_pci: + v9ses->proto = PROTO_PCI; + break; case Opt_fd: v9ses->proto = PROTO_FD; break; @@ -183,82 +188,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) } /** - * v9fs_inode2v9ses - safely extract v9fs session info from super block - * @inode: inode to extract information from - * - * Paranoid function to extract v9ses information from superblock, - * if anything is missing it will report an error. - * - */ - -struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) -{ - return (inode->i_sb->s_fs_info); -} - -/** - * v9fs_get_idpool - allocate numeric id from pool - * @p - pool to allocate from - * - * XXX - This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -int v9fs_get_idpool(struct v9fs_idpool *p) -{ - int i = 0; - int error; - -retry: - if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) - return 0; - - if (down_interruptible(&p->lock) == -EINTR) { - eprintk(KERN_WARNING, "Interrupted while locking\n"); - return -1; - } - - /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - up(&p->lock); - - if (error == -EAGAIN) - goto retry; - else if (error) - return -1; - - return i; -} - -/** - * v9fs_put_idpool - release numeric id from pool - * @p - pool to allocate from - * - * XXX - This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -void v9fs_put_idpool(int id, struct v9fs_idpool *p) -{ - if (down_interruptible(&p->lock) == -EINTR) { - eprintk(KERN_WARNING, "Interrupted while locking\n"); - return; - } - idr_remove(&p->pool, id); - up(&p->lock); -} - -/** - * v9fs_check_idpool - check if the specified id is available - * @id - id to check - * @p - pool - */ -int v9fs_check_idpool(int id, struct v9fs_idpool *p) -{ - return idr_find(&p->pool, id) != NULL; -} - -/** * v9fs_session_init - initialize session * @v9ses: session information structure * @dev_name: device being mounted @@ -266,25 +195,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p) * */ -int -v9fs_session_init(struct v9fs_session_info *v9ses, +struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { - struct v9fs_fcall *fcall = NULL; - struct v9fs_transport *trans_proto; - int n = 0; - int newfid = -1; int retval = -EINVAL; - struct v9fs_str *version; + struct p9_transport *trans; + struct p9_fid *fid; v9ses->name = __getname(); if (!v9ses->name) - return -ENOMEM; + return ERR_PTR(-ENOMEM); v9ses->remotename = __getname(); if (!v9ses->remotename) { __putname(v9ses->name); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } strcpy(v9ses->name, V9FS_DEFUSER); @@ -292,130 +217,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses, v9fs_parse_options(data, v9ses); - /* set global debug level */ - v9fs_debug_level = v9ses->debug; - - /* id pools that are session-dependent: fids and tags */ - idr_init(&v9ses->fidpool.pool); - init_MUTEX(&v9ses->fidpool.lock); - switch (v9ses->proto) { case PROTO_TCP: - trans_proto = &v9fs_trans_tcp; + trans = p9_trans_create_tcp(dev_name, v9ses->port); break; case PROTO_UNIX: - trans_proto = &v9fs_trans_unix; + trans = p9_trans_create_unix(dev_name); *v9ses->remotename = 0; break; case PROTO_FD: - trans_proto = &v9fs_trans_fd; + trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno); *v9ses->remotename = 0; break; +#ifdef CONFIG_PCI_9P + case PROTO_PCI: + trans = p9pci_trans_create(); + *v9ses->remotename = 0; + break; +#endif default: printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); retval = -ENOPROTOOPT; - goto SessCleanUp; + goto error; }; - v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); - if (!v9ses->transport) { - retval = -ENOMEM; - goto SessCleanUp; + if (IS_ERR(trans)) { + retval = PTR_ERR(trans); + trans = NULL; + goto error; } - memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); + v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ, + v9ses->extended); - if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { - eprintk(KERN_ERR, "problem initializing transport\n"); - goto SessCleanUp; + if (IS_ERR(v9ses->clnt)) { + retval = PTR_ERR(v9ses->clnt); + v9ses->clnt = NULL; + P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n"); + goto error; } - v9ses->inprogress = 0; - v9ses->shutdown = 0; - v9ses->session_hung = 0; - - v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, - &v9ses->extended); - - if (IS_ERR(v9ses->mux)) { - retval = PTR_ERR(v9ses->mux); - v9ses->mux = NULL; - dprintk(DEBUG_ERROR, "problem initializing mux\n"); - goto SessCleanUp; + fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name, + v9ses->remotename); + if (IS_ERR(fid)) { + retval = PTR_ERR(fid); + fid = NULL; + P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n"); + goto error; } - if (v9ses->afid == ~0) { - if (v9ses->extended) - retval = - v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u", - &fcall); - else - retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000", - &fcall); - - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_version failed\n"); - goto FreeFcall; - } - - version = &fcall->params.rversion.version; - if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) { - dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); - v9ses->extended = 1; - } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) { - dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); - v9ses->extended = 0; - } else { - retval = -EREMOTEIO; - goto FreeFcall; - } + return fid; - n = fcall->params.rversion.msize; - kfree(fcall); - - if (n < v9ses->maxdata) - v9ses->maxdata = n; - } - - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "couldn't allocate FID\n"); - retval = -ENOMEM; - goto SessCleanUp; - } - /* it is a little bit ugly, but we have to prevent newfid */ - /* being the same as afid, so if it is, get a new fid */ - if (v9ses->afid != ~0 && newfid == v9ses->afid) { - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "couldn't allocate FID\n"); - retval = -ENOMEM; - goto SessCleanUp; - } - } - - if ((retval = - v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid, - v9ses->afid, NULL)) - < 0) { - dprintk(DEBUG_ERROR, "cannot attach\n"); - goto SessCleanUp; - } - - if (v9ses->afid != ~0) { - dprintk(DEBUG_ERROR, "afid not equal to ~0\n"); - if (v9fs_t_clunk(v9ses, v9ses->afid)) - dprintk(DEBUG_ERROR, "clunk failed\n"); - } - - return newfid; - - FreeFcall: - kfree(fcall); - - SessCleanUp: +error: v9fs_session_close(v9ses); - return retval; + return ERR_PTR(retval); } /** @@ -426,15 +281,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses, void v9fs_session_close(struct v9fs_session_info *v9ses) { - if (v9ses->mux) { - v9fs_mux_destroy(v9ses->mux); - v9ses->mux = NULL; - } - - if (v9ses->transport) { - v9ses->transport->close(v9ses->transport); - kfree(v9ses->transport); - v9ses->transport = NULL; + if (v9ses->clnt) { + p9_client_destroy(v9ses->clnt); + v9ses->clnt = NULL; } __putname(v9ses->name); @@ -446,9 +295,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) * and cancel all pending requests. */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { - dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); - v9ses->transport->status = Disconnected; - v9fs_mux_cancel(v9ses->mux, -EIO); + P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); + p9_client_disconnect(v9ses->clnt); } extern int v9fs_error_init(void); @@ -460,24 +308,9 @@ extern int v9fs_error_init(void); static int __init init_v9fs(void) { - int ret; - - v9fs_error_init(); - printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); - ret = v9fs_mux_global_init(); - if (ret) { - printk(KERN_WARNING "v9fs: starting mux failed\n"); - return ret; - } - ret = register_filesystem(&v9fs_fs_type); - if (ret) { - printk(KERN_WARNING "v9fs: registering file system failed\n"); - v9fs_mux_global_exit(); - } - - return ret; + return register_filesystem(&v9fs_fs_type); } /** @@ -487,13 +320,13 @@ static int __init init_v9fs(void) static void __exit exit_v9fs(void) { - v9fs_mux_global_exit(); unregister_filesystem(&v9fs_fs_type); } module_init(init_v9fs) module_exit(exit_v9fs) +MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); MODULE_LICENSE("GPL"); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 820bf5ca35d8..abc4b1668ace 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -22,16 +22,6 @@ */ /* - * Idpool structure provides lock and id management - * - */ - -struct v9fs_idpool { - struct semaphore lock; - struct idr pool; -}; - -/* * Session structure provides information for an opened session * */ @@ -54,15 +44,7 @@ struct v9fs_session_info { unsigned int uid; /* default uid/muid for legacy support */ unsigned int gid; /* default gid for legacy support */ - /* book keeping */ - struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ - - struct v9fs_transport *transport; - struct v9fs_mux_data *mux; - - int inprogress; /* session in progress => true */ - int shutdown; /* session shutting down. no more attaches. */ - unsigned char session_hung; + struct p9_client *clnt; /* 9p client */ struct dentry *debugfs_dir; }; @@ -71,6 +53,7 @@ enum { PROTO_TCP, PROTO_UNIX, PROTO_FD, + PROTO_PCI, }; /* possible values of ->cache */ @@ -82,12 +65,9 @@ enum { extern struct dentry *v9fs_debugfs_root; -int v9fs_session_init(struct v9fs_session_info *, const char *, char *); -struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); +struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, + char *); void v9fs_session_close(struct v9fs_session_info *v9ses); -int v9fs_get_idpool(struct v9fs_idpool *p); -void v9fs_put_idpool(int id, struct v9fs_idpool *p); -int v9fs_check_idpool(int id, struct v9fs_idpool *p); void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_MAGIC 0x01021997 @@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" +static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) +{ + return (inode->i_sb->s_fs_info); +} diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 6a82d39dc498..fd01d90cada5 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations; extern struct dentry_operations v9fs_cached_dentry_operations; struct inode *v9fs_get_inode(struct super_block *sb, int mode); -ino_t v9fs_qid2ino(struct v9fs_qid *qid); -void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); +ino_t v9fs_qid2ino(struct p9_qid *qid); +void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); -void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); +void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); void v9fs_dentry_release(struct dentry *); int v9fs_uflags2omode(int uflags); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 9ac4ffe9ac7d..6248f0e727a3 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -33,10 +33,10 @@ #include <linux/pagemap.h> #include <linux/idr.h> #include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -50,55 +50,26 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) { - char *buffer = NULL; - int retval = -EIO; - loff_t offset = page_offset(page); - int count = PAGE_CACHE_SIZE; - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - int rsize = v9ses->maxdata - V9FS_IOHDRSZ; - struct v9fs_fid *v9f = filp->private_data; - struct v9fs_fcall *fcall = NULL; - int fid = v9f->fid; - int total = 0; - int result = 0; - - dprintk(DEBUG_VFS, "\n"); + int retval; + loff_t offset; + char *buffer; + struct p9_fid *fid; + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + fid = filp->private_data; buffer = kmap(page); - do { - if (count < rsize) - rsize = count; - - result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall); - - if (result < 0) { - printk(KERN_ERR "v9fs_t_read returned %d\n", - result); - - kfree(fcall); - goto UnmapAndUnlock; - } else - offset += result; - - memcpy(buffer, fcall->params.rread.data, result); - - count -= result; - buffer += result; - total += result; - - kfree(fcall); + offset = page_offset(page); - if (result < rsize) - break; - } while (count); + retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); + if (retval < 0) + goto done; - memset(buffer, 0, count); + memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); flush_dcache_page(page); SetPageUptodate(page); retval = 0; -UnmapAndUnlock: +done: kunmap(page); unlock_page(page); return retval; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index d93960429c09..f9534f18df0a 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -34,10 +34,10 @@ #include <linux/namei.h> #include <linux/idr.h> #include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -52,7 +52,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) { - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); return 1; } @@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) static int v9fs_cached_dentry_delete(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); if(!inode) return 1; @@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry) void v9fs_dentry_release(struct dentry *dentry) { - int err; - - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); - - if (dentry->d_fsdata != NULL) { - struct list_head *fid_list = dentry->d_fsdata; - struct v9fs_fid *temp = NULL; - struct v9fs_fid *current_fid = NULL; - - list_for_each_entry_safe(current_fid, temp, fid_list, list) { - err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid); - - if (err < 0) - dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n", - err, dentry->d_iname); - - v9fs_fid_destroy(current_fid); + struct v9fs_dentry *dent; + struct p9_fid *temp, *current_fid; + + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + dent = dentry->d_fsdata; + if (dent) { + list_for_each_entry_safe(current_fid, temp, &dent->fidlist, + dlist) { + p9_client_clunk(current_fid); } - kfree(dentry->d_fsdata); /* free the list_head */ + kfree(dent); + dentry->d_fsdata = NULL; } } diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 1dd86ee90bc5..0924d4477da3 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -32,11 +32,10 @@ #include <linux/sched.h> #include <linux/inet.h> #include <linux/idr.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" -#include "conv.h" #include "v9fs_vfs.h" #include "fid.h" @@ -46,14 +45,14 @@ * */ -static inline int dt_type(struct v9fs_stat *mistat) +static inline int dt_type(struct p9_stat *mistat) { unsigned long perm = mistat->mode; int rettype = DT_REG; - if (perm & V9FS_DMDIR) + if (perm & P9_DMDIR) rettype = DT_DIR; - if (perm & V9FS_DMSYMLINK) + if (perm & P9_DMSYMLINK) rettype = DT_LNK; return rettype; @@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat) static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct v9fs_fcall *fcall = NULL; - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *file = filp->private_data; - unsigned int i, n, s; - int fid = -1; - int ret = 0; - struct v9fs_stat stat; - int over = 0; - - dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); - - fid = file->fid; - - if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { - kfree(file->rdir_fcall); - file->rdir_fcall = NULL; - } - - if (file->rdir_fcall) { - n = file->rdir_fcall->params.rread.count; - i = file->rdir_fpos; - while (i < n) { - s = v9fs_deserialize_stat( - file->rdir_fcall->params.rread.data + i, - n - i, &stat, v9ses->extended); - - if (s == 0) { - dprintk(DEBUG_ERROR, - "error while deserializing stat\n"); - ret = -EIO; - goto FreeStructs; - } - - over = filldir(dirent, stat.name.str, stat.name.len, - filp->f_pos, v9fs_qid2ino(&stat.qid), - dt_type(&stat)); - - if (over) { - file->rdir_fpos = i; - file->rdir_pos = filp->f_pos; - break; - } - - i += s; - filp->f_pos += s; - } - - if (!over) { - kfree(file->rdir_fcall); - file->rdir_fcall = NULL; - } - } - - while (!over) { - ret = v9fs_t_read(v9ses, fid, filp->f_pos, - v9ses->maxdata-V9FS_IOHDRSZ, &fcall); - if (ret < 0) { - dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", - ret, fcall); - goto FreeStructs; - } else if (ret == 0) + int over; + struct p9_fid *fid; + struct v9fs_session_info *v9ses; + struct inode *inode; + struct p9_stat *st; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + inode = filp->f_path.dentry->d_inode; + v9ses = v9fs_inode2v9ses(inode); + fid = filp->private_data; + while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) { + if (IS_ERR(st)) + return PTR_ERR(st); + + over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, + v9fs_qid2ino(&st->qid), dt_type(st)); + + if (over) break; - n = ret; - i = 0; - while (i < n) { - s = v9fs_deserialize_stat(fcall->params.rread.data + i, - n - i, &stat, v9ses->extended); - - if (s == 0) { - dprintk(DEBUG_ERROR, - "error while deserializing stat\n"); - return -EIO; - } - - over = filldir(dirent, stat.name.str, stat.name.len, - filp->f_pos, v9fs_qid2ino(&stat.qid), - dt_type(&stat)); - - if (over) { - file->rdir_fcall = fcall; - file->rdir_fpos = i; - file->rdir_pos = filp->f_pos; - fcall = NULL; - break; - } - - i += s; - filp->f_pos += s; - } - - kfree(fcall); + filp->f_pos += st->size; + kfree(st); + st = NULL; } - FreeStructs: - kfree(fcall); - return ret; + kfree(st); + return 0; } + /** * v9fs_dir_release - close a directory * @inode: inode of the directory @@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) int v9fs_dir_release(struct inode *inode, struct file *filp) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *fid = filp->private_data; - int fidnum = -1; - - dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, - fid->fid); - fidnum = fid->fid; + struct p9_fid *fid; + fid = filp->private_data; + P9_DPRINTK(P9_DEBUG_VFS, + "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); filemap_write_and_wait(inode->i_mapping); - - if (fidnum >= 0) { - dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, - fid->fid); - - if (v9fs_t_clunk(v9ses, fidnum)) - dprintk(DEBUG_ERROR, "clunk failed\n"); - - kfree(fid->rdir_fcall); - kfree(fid); - - filp->private_data = NULL; - } - + p9_client_clunk(fid); return 0; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 6e7678e4852f..2a40c2946d0a 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -34,10 +34,10 @@ #include <linux/list.h> #include <asm/uaccess.h> #include <linux/idr.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -52,48 +52,40 @@ static const struct file_operations v9fs_cached_file_operations; int v9fs_file_open(struct inode *inode, struct file *file) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *vfid; - struct v9fs_fcall *fcall = NULL; - int omode; int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + int omode; - dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); - - vfid = v9fs_fid_clone(file->f_path.dentry); - if (IS_ERR(vfid)) - return PTR_ERR(vfid); - + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); + v9ses = v9fs_inode2v9ses(inode); omode = v9fs_uflags2omode(file->f_flags); - err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall); - if (err < 0) { - PRINT_FCALL_ERROR("open failed", fcall); - goto Clunk_Fid; + fid = file->private_data; + if (!fid) { + fid = v9fs_fid_clone(file->f_path.dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + err = p9_client_open(fid, omode); + if (err < 0) { + p9_client_clunk(fid); + return err; + } + if (omode & P9_OTRUNC) { + inode->i_size = 0; + inode->i_blocks = 0; + } } - file->private_data = vfid; - vfid->fidopen = 1; - vfid->fidclunked = 0; - vfid->iounit = fcall->params.ropen.iounit; - vfid->rdir_pos = 0; - vfid->rdir_fcall = NULL; - vfid->filp = file; - kfree(fcall); - - if((vfid->qid.version) && (v9ses->cache)) { - dprintk(DEBUG_VFS, "cached"); + file->private_data = fid; + if ((fid->qid.version) && (v9ses->cache)) { + P9_DPRINTK(P9_DEBUG_VFS, "cached"); /* enable cached file options */ if(file->f_op == &v9fs_file_operations) file->f_op = &v9fs_cached_file_operations; } return 0; - -Clunk_Fid: - v9fs_fid_clunk(v9ses, vfid); - kfree(fcall); - - return err; } /** @@ -110,7 +102,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) int res = 0; struct inode *inode = filp->f_path.dentry->d_inode; - dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); + P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) @@ -136,55 +128,16 @@ static ssize_t v9fs_file_read(struct file *filp, char __user * data, size_t count, loff_t * offset) { - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9f = filp->private_data; - struct v9fs_fcall *fcall = NULL; - int fid = v9f->fid; - int rsize = 0; - int result = 0; - int total = 0; - int n; - - dprintk(DEBUG_VFS, "\n"); - - rsize = v9ses->maxdata - V9FS_IOHDRSZ; - if (v9f->iounit != 0 && rsize > v9f->iounit) - rsize = v9f->iounit; - - do { - if (count < rsize) - rsize = count; + int ret; + struct p9_fid *fid; - result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + fid = filp->private_data; + ret = p9_client_uread(fid, data, *offset, count); + if (ret > 0) + *offset += ret; - if (result < 0) { - printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n", - result); - - kfree(fcall); - return total; - } else - *offset += result; - - n = copy_to_user(data, fcall->params.rread.data, result); - if (n) { - dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n); - kfree(fcall); - return -EFAULT; - } - - count -= result; - data += result; - total += result; - - kfree(fcall); - - if (result < rsize) - break; - } while (count); - - return total; + return ret; } /** @@ -200,50 +153,25 @@ static ssize_t v9fs_file_write(struct file *filp, const char __user * data, size_t count, loff_t * offset) { + int ret; + struct p9_fid *fid; struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9fid = filp->private_data; - struct v9fs_fcall *fcall; - int fid = v9fid->fid; - int result = -EIO; - int rsize = 0; - int total = 0; - - dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, - (int)*offset); - rsize = v9ses->maxdata - V9FS_IOHDRSZ; - if (v9fid->iounit != 0 && rsize > v9fid->iounit) - rsize = v9fid->iounit; - - do { - if (count < rsize) - rsize = count; - result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); - if (result < 0) { - PRINT_FCALL_ERROR("error while writing", fcall); - kfree(fcall); - return result; - } else - *offset += result; + P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, + (int)count, (int)*offset); - kfree(fcall); - fcall = NULL; + fid = filp->private_data; + ret = p9_client_uwrite(fid, data, *offset, count); + if (ret > 0) + *offset += ret; - if (result != rsize) { - eprintk(KERN_ERR, - "short write: v9fs_t_write returned %d\n", - result); - break; - } - - count -= result; - data += result; - total += result; - } while (count); + if (*offset > inode->i_size) { + inode->i_size = *offset; + inode->i_blocks = (inode->i_size + 512 - 1) >> 9; + } invalidate_inode_pages2(inode->i_mapping); - return total; + return ret; } static const struct file_operations v9fs_cached_file_operations = { diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c76cd8fa3f6c..e5c45eed58a9 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -34,10 +34,10 @@ #include <linux/namei.h> #include <linux/idr.h> #include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) int res; res = mode & 0777; if (S_ISDIR(mode)) - res |= V9FS_DMDIR; + res |= P9_DMDIR; if (v9ses->extended) { if (S_ISLNK(mode)) - res |= V9FS_DMSYMLINK; + res |= P9_DMSYMLINK; if (v9ses->nodev == 0) { if (S_ISSOCK(mode)) - res |= V9FS_DMSOCKET; + res |= P9_DMSOCKET; if (S_ISFIFO(mode)) - res |= V9FS_DMNAMEDPIPE; + res |= P9_DMNAMEDPIPE; if (S_ISBLK(mode)) - res |= V9FS_DMDEVICE; + res |= P9_DMDEVICE; if (S_ISCHR(mode)) - res |= V9FS_DMDEVICE; + res |= P9_DMDEVICE; } if ((mode & S_ISUID) == S_ISUID) - res |= V9FS_DMSETUID; + res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) - res |= V9FS_DMSETGID; - if ((mode & V9FS_DMLINK)) - res |= V9FS_DMLINK; + res |= P9_DMSETGID; + if ((mode & P9_DMLINK)) + res |= P9_DMLINK; } return res; @@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) res = mode & 0777; - if ((mode & V9FS_DMDIR) == V9FS_DMDIR) + if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; - else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) + else if ((mode & P9_DMSYMLINK) && (v9ses->extended)) res |= S_IFLNK; - else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) + else if ((mode & P9_DMSOCKET) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFSOCK; - else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) + else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFIFO; - else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) + else if ((mode & P9_DMDEVICE) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFBLK; else res |= S_IFREG; if (v9ses->extended) { - if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) + if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; - if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) + if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; } @@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags) switch (uflags&3) { default: case O_RDONLY: - ret = V9FS_OREAD; + ret = P9_OREAD; break; case O_WRONLY: - ret = V9FS_OWRITE; + ret = P9_OWRITE; break; case O_RDWR: - ret = V9FS_ORDWR; + ret = P9_ORDWR; break; } if (uflags & O_EXCL) - ret |= V9FS_OEXCL; + ret |= P9_OEXCL; if (uflags & O_TRUNC) - ret |= V9FS_OTRUNC; + ret |= P9_OTRUNC; if (uflags & O_APPEND) - ret |= V9FS_OAPPEND; + ret |= P9_OAPPEND; return ret; } @@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags) */ static void -v9fs_blank_wstat(struct v9fs_wstat *wstat) +v9fs_blank_wstat(struct p9_wstat *wstat) { wstat->type = ~0; wstat->dev = ~0; @@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); + P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); inode = new_inode(sb); if (inode) { @@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) case S_IFCHR: case S_IFSOCK: if(!v9ses->extended) { - dprintk(DEBUG_ERROR, "special files without extended mode\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "special files without extended mode\n"); return ERR_PTR(-EINVAL); } init_special_inode(inode, inode->i_mode, @@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) break; case S_IFLNK: if(!v9ses->extended) { - dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "extended modes used w/o 9P2000.u\n"); return ERR_PTR(-EINVAL); } inode->i_op = &v9fs_symlink_inode_operations; @@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) inode->i_fop = &v9fs_dir_operations; break; default: - dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", + P9_DPRINTK(P9_DEBUG_ERROR, + "BAD mode 0x%x S_IFMT 0x%x\n", mode, mode & S_IFMT); return ERR_PTR(-EINVAL); } } else { - eprintk(KERN_WARNING, "Problem allocating inode\n"); + P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); return ERR_PTR(-ENOMEM); } return inode; } -static int -v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm, - u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit) -{ - int fid; - int err; - struct v9fs_fcall *fcall; - - fid = v9fs_get_idpool(&v9ses->fidpool); - if (fid < 0) { - eprintk(KERN_WARNING, "no free fids available\n"); - return -ENOSPC; - } - - err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall); - if (err < 0) { - PRINT_FCALL_ERROR("clone error", fcall); - if (fcall && fcall->id == RWALK) - goto clunk_fid; - else - goto put_fid; - } - kfree(fcall); - - err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall); - if (err < 0) { - PRINT_FCALL_ERROR("create fails", fcall); - goto clunk_fid; - } - - if (iounit) - *iounit = fcall->params.rcreate.iounit; - - if (qid) - *qid = fcall->params.rcreate.qid; - - if (fidp) - *fidp = fid; - - kfree(fcall); - return 0; - -clunk_fid: - v9fs_t_clunk(v9ses, fid); - fid = V9FS_NOFID; - -put_fid: - if (fid != V9FS_NOFID) - v9fs_put_idpool(fid, &v9ses->fidpool); - - kfree(fcall); - return err; -} - +/* static struct v9fs_fid* v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) { @@ -355,23 +305,25 @@ error: kfree(fcall); return ERR_PTR(err); } +*/ static struct inode * -v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, +v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { int err, umode; struct inode *ret; - struct v9fs_fcall *fcall; + struct p9_stat *st; ret = NULL; - err = v9fs_t_stat(v9ses, fid, &fcall); - if (err) { - PRINT_FCALL_ERROR("stat error", fcall); + st = p9_client_stat(fid); + if (IS_ERR(st)) { + err = PTR_ERR(st); + st = NULL; goto error; } - umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode); + umode = p9mode2unixmode(v9ses, st->mode); ret = v9fs_get_inode(sb, umode); if (IS_ERR(ret)) { err = PTR_ERR(ret); @@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, goto error; } - v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb); - kfree(fcall); + v9fs_stat2inode(st, ret, sb); + ret->i_ino = v9fs_qid2ino(&st->qid); + kfree(st); return ret; error: - kfree(fcall); + kfree(st); if (ret) iput(ret); @@ -401,43 +354,20 @@ error: static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) { - struct v9fs_fcall *fcall = NULL; - struct super_block *sb = NULL; - struct v9fs_session_info *v9ses = NULL; - struct v9fs_fid *v9fid = NULL; - struct inode *file_inode = NULL; - int fid = -1; - int result = 0; + struct inode *file_inode; + struct v9fs_session_info *v9ses; + struct p9_fid *v9fid; - dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, rmdir); file_inode = file->d_inode; - sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); v9fid = v9fs_fid_clone(file); if(IS_ERR(v9fid)) return PTR_ERR(v9fid); - fid = v9fid->fid; - if (fid < 0) { - dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n", - file_inode->i_ino); - return -EBADF; - } - - result = v9fs_t_remove(v9ses, fid, &fcall); - if (result < 0) { - PRINT_FCALL_ERROR("remove fails", fcall); - goto Error; - } - - v9fs_put_idpool(fid, &v9ses->fidpool); - v9fs_fid_destroy(v9fid); - -Error: - kfree(fcall); - return result; + return p9_client_remove(v9fid); } static int @@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file) return 0; } + /** - * v9fs_vfs_create - VFS hook to create files - * @inode: directory inode that is being deleted - * @dentry: dentry that is being deleted - * @mode: create permissions - * @nd: path information + * v9fs_create - Create a file + * @dentry: dentry that is being created + * @perm: create permissions + * @mode: open mode * */ - -static int -v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static struct p9_fid * +v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, + struct dentry *dentry, char *extension, u32 perm, u8 mode) { int err; - u32 fid, perm, iounit; - int flags; - struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid, *ffid; + char *name; + struct p9_fid *dfid, *ofid, *fid; struct inode *inode; - struct v9fs_qid qid; - struct file *filp; - inode = NULL; - vfid = NULL; - v9ses = v9fs_inode2v9ses(dir); + err = 0; + ofid = NULL; + fid = NULL; + name = (char *) dentry->d_name.name; dfid = v9fs_fid_clone(dentry->d_parent); if(IS_ERR(dfid)) { err = PTR_ERR(dfid); + dfid = NULL; goto error; } - perm = unixmode2p9mode(v9ses, mode); - if (nd && nd->flags & LOOKUP_OPEN) - flags = nd->intent.open.flags - 1; - else - flags = O_RDWR; - - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit); + /* clone a fid to use for creation */ + ofid = p9_client_walk(dfid, 0, NULL, 1); + if (IS_ERR(ofid)) { + err = PTR_ERR(ofid); + ofid = NULL; + goto error; + } - if (err) - goto clunk_dfid; + err = p9_client_fcreate(ofid, name, perm, mode, extension); + if (err < 0) + goto error; - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - v9fs_fid_clunk(v9ses, dfid); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; + /* now walk from the parent so we can get unopened fid */ + fid = p9_client_walk(dfid, 1, &name, 0); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; goto error; - } + } else + dfid = NULL; - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); + /* instantiate inode and assign the unopened fid to the dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - inode = NULL; goto error; } @@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, dentry->d_op = &v9fs_cached_dentry_operations; else dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + v9fs_fid_add(dentry, fid); + return ofid; - if (nd && nd->flags & LOOKUP_OPEN) { - ffid = v9fs_fid_create(v9ses, fid); - if (!ffid) - return -ENOMEM; +error: + if (dfid) + p9_client_clunk(dfid); + + if (ofid) + p9_client_clunk(ofid); + + if (fid) + p9_client_clunk(fid); + + return ERR_PTR(err); +} + +/** + * v9fs_vfs_create - VFS hook to create files + * @inode: directory inode that is being created + * @dentry: dentry that is being deleted + * @mode: create permissions + * @nd: path information + * + */ +static int +v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + int err; + u32 perm; + int flags; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct file *filp; + + err = 0; + fid = NULL; + v9ses = v9fs_inode2v9ses(dir); + perm = unixmode2p9mode(v9ses, mode); + if (nd && nd->flags & LOOKUP_OPEN) + flags = nd->intent.open.flags - 1; + else + flags = O_RDWR; + + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, + v9fs_uflags2omode(flags)); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + /* if we are opening a file, assign the open fid to the file */ + if (nd && nd->flags & LOOKUP_OPEN) { filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); if (IS_ERR(filp)) { - v9fs_fid_destroy(ffid); - return PTR_ERR(filp); + err = PTR_ERR(filp); + goto error; } - ffid->rdir_pos = 0; - ffid->rdir_fcall = NULL; - ffid->fidopen = 1; - ffid->iounit = iounit; - ffid->filp = filp; - filp->private_data = ffid; - } + filp->private_data = fid; + } else + p9_client_clunk(fid); return 0; -clunk_dfid: - v9fs_fid_clunk(v9ses, dfid); - error: - if (vfid) - v9fs_fid_destroy(vfid); + if (fid) + p9_client_clunk(fid); return err; } @@ -552,57 +523,23 @@ error: static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int err; - u32 fid, perm; + u32 perm; struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid; - struct inode *inode; + struct p9_fid *fid; - inode = NULL; - vfid = NULL; + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + err = 0; v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_clone(dentry->d_parent); - if(IS_ERR(dfid)) { - err = PTR_ERR(dfid); - goto error; - } - perm = unixmode2p9mode(v9ses, mode | S_IFDIR); - - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, V9FS_OREAD, NULL, &fid, NULL, NULL); - - if (err) { - dprintk(DEBUG_ERROR, "create error %d\n", err); - goto clean_up_dfid; + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; } - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; - goto clean_up_dfid; - } + if (fid) + p9_client_clunk(fid); - v9fs_fid_clunk(v9ses, dfid); - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - inode = NULL; - v9fs_fid_destroy(vfid); - goto error; - } - - if(v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; - else - dentry->d_op = &v9fs_dentry_operations; - d_instantiate(dentry, inode); - return 0; - -clean_up_dfid: - v9fs_fid_clunk(v9ses, dfid); - -error: return err; } @@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb; struct v9fs_session_info *v9ses; - struct v9fs_fid *dirfid; - struct v9fs_fid *fid; + struct p9_fid *dfid, *fid; struct inode *inode; - struct v9fs_fcall *fcall = NULL; - int dirfidnum = -1; - int newfid = -1; + char *name; int result = 0; - dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", + P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", dir, dentry->d_name.name, dentry, nameidata); sb = dir->i_sb; v9ses = v9fs_inode2v9ses(dir); - dirfid = v9fs_fid_lookup(dentry->d_parent); - - if(IS_ERR(dirfid)) - return ERR_PTR(PTR_ERR(dirfid)); - - dirfidnum = dirfid->fid; - - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "newfid fails!\n"); - result = -ENOSPC; - goto Release_Dirfid; - } - - result = v9fs_t_walk(v9ses, dirfidnum, newfid, - (char *)dentry->d_name.name, &fcall); - - up(&dirfid->lock); - - if (result < 0) { - if (fcall && fcall->id == RWALK) - v9fs_t_clunk(v9ses, newfid); - else - v9fs_put_idpool(newfid, &v9ses->fidpool); - + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) + return ERR_PTR(PTR_ERR(dfid)); + + name = (char *) dentry->d_name.name; + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + result = PTR_ERR(fid); if (result == -ENOENT) { d_add(dentry, NULL); - dprintk(DEBUG_VFS, - "Return negative dentry %p count %d\n", - dentry, atomic_read(&dentry->d_count)); - kfree(fcall); return NULL; } - dprintk(DEBUG_ERROR, "walk error:%d\n", result); - goto FreeFcall; - } - kfree(fcall); - - result = v9fs_t_stat(v9ses, newfid, &fcall); - if (result < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - goto FreeFcall; - } - - inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses, - fcall->params.rstat.stat.mode)); - if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { - eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", - PTR_ERR(inode)); - - result = -ENOSPC; - goto FreeFcall; + return ERR_PTR(result); } - inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); - - fid = v9fs_fid_create(v9ses, newfid); - if (fid == NULL) { - dprintk(DEBUG_ERROR, "couldn't insert\n"); - result = -ENOMEM; - goto FreeFcall; + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + result = PTR_ERR(inode); + inode = NULL; + goto error; } - result = v9fs_fid_insert(fid, dentry); + result = v9fs_fid_add(dentry, fid); if (result < 0) - goto FreeFcall; + goto error; - fid->qid = fcall->params.rstat.stat.qid; - v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); if((fid->qid.version)&&(v9ses->cache)) dentry->d_op = &v9fs_cached_dentry_operations; else dentry->d_op = &v9fs_dentry_operations; d_add(dentry, inode); - kfree(fcall); - return NULL; -Release_Dirfid: - up(&dirfid->lock); - -FreeFcall: - kfree(fcall); +error: + if (fid) + p9_client_clunk(fid); return ERR_PTR(result); } @@ -758,73 +645,54 @@ static int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct inode *old_inode = old_dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); - struct v9fs_fid *olddirfid; - struct v9fs_fid *newdirfid; - struct v9fs_wstat wstat; - struct v9fs_fcall *fcall = NULL; - int fid = -1; - int olddirfidnum = -1; - int newdirfidnum = -1; - int retval = 0; - - dprintk(DEBUG_VFS, "\n"); + struct inode *old_inode; + struct v9fs_session_info *v9ses; + struct p9_fid *oldfid; + struct p9_fid *olddirfid; + struct p9_fid *newdirfid; + struct p9_wstat wstat; + int retval; + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + retval = 0; + old_inode = old_dentry->d_inode; + v9ses = v9fs_inode2v9ses(old_inode); + oldfid = v9fs_fid_lookup(old_dentry); if(IS_ERR(oldfid)) return PTR_ERR(oldfid); olddirfid = v9fs_fid_clone(old_dentry->d_parent); if(IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); - goto Release_lock; + goto done; } newdirfid = v9fs_fid_clone(new_dentry->d_parent); if(IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); - goto Clunk_olddir; + goto clunk_olddir; } /* 9P can only handle file rename in the same directory */ if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { - dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "old dir and new dir are different\n"); retval = -EXDEV; - goto Clunk_newdir; - } - - fid = oldfid->fid; - olddirfidnum = olddirfid->fid; - newdirfidnum = newdirfid->fid; - - if (fid < 0) { - dprintk(DEBUG_ERROR, "no fid for old file #%lu\n", - old_inode->i_ino); - retval = -EBADF; - goto Clunk_newdir; + goto clunk_newdir; } v9fs_blank_wstat(&wstat); wstat.muid = v9ses->name; wstat.name = (char *) new_dentry->d_name.name; + retval = p9_client_wstat(oldfid, &wstat); - retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); +clunk_newdir: + p9_client_clunk(olddirfid); - if (retval < 0) - PRINT_FCALL_ERROR("wstat error", fcall); - - kfree(fcall); - -Clunk_newdir: - v9fs_fid_clunk(v9ses, newdirfid); - -Clunk_olddir: - v9fs_fid_clunk(v9ses, olddirfid); - -Release_lock: - up(&oldfid->lock); +clunk_olddir: + p9_client_clunk(newdirfid); +done: return retval; } @@ -840,28 +708,30 @@ static int v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct v9fs_fcall *fcall = NULL; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); - int err = -EPERM; + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat *st; - dprintk(DEBUG_VFS, "dentry: %p\n", dentry); - if(IS_ERR(fid)) + P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + err = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + if (v9ses->cache == CACHE_LOOSE) + return simple_getattr(mnt, dentry, stat); + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) return PTR_ERR(fid); - err = v9fs_t_stat(v9ses, fid->fid, &fcall); + st = p9_client_stat(fid); + if (IS_ERR(st)) + return PTR_ERR(st); - if (err < 0) - dprintk(DEBUG_ERROR, "stat error\n"); - else { - v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode, - dentry->d_inode->i_sb); + v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); - } - kfree(fcall); - v9fs_fid_clunk(v9ses, fid); - return err; + kfree(st); + return 0; } /** @@ -873,13 +743,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); - struct v9fs_fcall *fcall = NULL; - struct v9fs_wstat wstat; - int res = -EPERM; + int retval; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_wstat wstat; - dprintk(DEBUG_VFS, "\n"); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); @@ -904,17 +776,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) wstat.n_gid = iattr->ia_gid; } - res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); + retval = p9_client_wstat(fid, &wstat); + if (retval >= 0) + retval = inode_setattr(dentry->d_inode, iattr); - if (res < 0) - PRINT_FCALL_ERROR("wstat error", fcall); - - kfree(fcall); - if (res >= 0) - res = inode_setattr(dentry->d_inode, iattr); - - v9fs_fid_clunk(v9ses, fid); - return res; + return retval; } /** @@ -926,7 +792,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) */ void -v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, +v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, struct super_block *sb) { int n; @@ -967,8 +833,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, case 'b': break; default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", - type, stat->extension.len, stat->extension.str); + P9_DPRINTK(P9_DEBUG_ERROR, + "Unknown special type %c (%.*s)\n", type, + stat->extension.len, stat->extension.str); }; inode->i_rdev = MKDEV(major, minor); } else @@ -976,8 +843,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, inode->i_size = stat->length; - inode->i_blocks = - (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + /* not real number of blocks, but 512 byte ones ... */ + inode->i_blocks = (inode->i_size + 512 - 1) >> 9; } /** @@ -987,7 +854,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, * BUG: potential for inode number collisions? */ -ino_t v9fs_qid2ino(struct v9fs_qid *qid) +ino_t v9fs_qid2ino(struct p9_qid *qid) { u64 path = qid->path + 2; ino_t i = 0; @@ -1010,56 +877,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid) static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) { - int retval = -EPERM; + int retval; - struct v9fs_fcall *fcall = NULL; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat *st; + P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); - if (!v9ses->extended) { - retval = -EBADF; - dprintk(DEBUG_ERROR, "not extended\n"); - goto ClunkFid; - } - - dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name); - retval = v9fs_t_stat(v9ses, fid->fid, &fcall); - - if (retval < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - goto FreeFcall; - } + if (!v9ses->extended) + return -EBADF; - if (!fcall) { - retval = -EIO; - goto ClunkFid; - } + st = p9_client_stat(fid); + if (IS_ERR(st)) + return PTR_ERR(st); - if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { + if (!(st->mode & P9_DMSYMLINK)) { retval = -EINVAL; - goto FreeFcall; + goto done; } /* copy extension buffer into buffer */ - if (fcall->params.rstat.stat.extension.len < buflen) - buflen = fcall->params.rstat.stat.extension.len + 1; + if (st->extension.len < buflen) + buflen = st->extension.len + 1; - memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); + memmove(buffer, st->extension.str, buflen - 1); buffer[buflen-1] = 0; - dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len, - fcall->params.rstat.stat.extension.str, buffer); - retval = buflen; + P9_DPRINTK(P9_DEBUG_VFS, + "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, + st->extension.str, buffer); -FreeFcall: - kfree(fcall); - -ClunkFid: - v9fs_fid_clunk(v9ses, fid); + retval = buflen; +done: + kfree(st); return retval; } @@ -1084,14 +941,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, if (buflen > PATH_MAX) buflen = PATH_MAX; - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); retval = v9fs_readlink(dentry, link, buflen); if (retval > 0) { if ((ret = copy_to_user(buffer, link, retval)) != 0) { - dprintk(DEBUG_ERROR, "problem copying to user: %d\n", - ret); + P9_DPRINTK(P9_DEBUG_ERROR, + "problem copying to user: %d\n", ret); retval = ret; } } @@ -1112,7 +969,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) int len = 0; char *link = __getname(); - dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); + P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); if (!link) link = ERR_PTR(-ENOMEM); @@ -1141,7 +998,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void { char *s = nd_get_link(nd); - dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); + P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); if (!IS_ERR(s)) __putname(s); } @@ -1149,66 +1006,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int mode, const char *extension) { - int err; - u32 fid, perm; + u32 perm; struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid = NULL; - struct inode *inode = NULL; + struct p9_fid *fid; v9ses = v9fs_inode2v9ses(dir); if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); + P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); return -EPERM; } - dfid = v9fs_fid_clone(dentry->d_parent); - if(IS_ERR(dfid)) { - err = PTR_ERR(dfid); - goto error; - } - perm = unixmode2p9mode(v9ses, mode); + fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, + P9_OREAD); + if (IS_ERR(fid)) + return PTR_ERR(fid); - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL); - - if (err) - goto clunk_dfid; - - err = v9fs_t_clunk(v9ses, fid); - if (err) - goto clunk_dfid; - - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; - goto clunk_dfid; - } - - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - inode = NULL; - goto free_vfid; - } - - if(v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; - else - dentry->d_op = &v9fs_dentry_operations; - d_instantiate(dentry, inode); + p9_client_clunk(fid); return 0; - -free_vfid: - v9fs_fid_destroy(vfid); - -clunk_dfid: - v9fs_fid_clunk(v9ses, dfid); - -error: - return err; - } /** @@ -1224,8 +1039,8 @@ error: static int v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, - symname); + P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, + dentry->d_name.name, symname); return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); } @@ -1247,11 +1062,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fid *oldfid; + struct p9_fid *oldfid; char *name; - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); oldfid = v9fs_fid_clone(old_dentry); @@ -1265,11 +1080,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, } sprintf(name, "%d\n", oldfid->fid); - retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); + retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); __putname(name); clunk_fid: - v9fs_fid_clunk(v9ses, oldfid); + p9_client_clunk(oldfid); return retval; } @@ -1288,7 +1103,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) int retval; char *name; - dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7bdf8b326841..ba904371218b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,10 +37,10 @@ #include <linux/mount.h> #include <linux/idr.h> #include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, struct vfsmount *mnt) { struct super_block *sb = NULL; - struct v9fs_fcall *fcall = NULL; struct inode *inode = NULL; struct dentry *root = NULL; struct v9fs_session_info *v9ses = NULL; - struct v9fs_fid *root_fid = NULL; + struct p9_stat *st = NULL; int mode = S_IRWXUGO | S_ISVTX; uid_t uid = current->fsuid; gid_t gid = current->fsgid; - int stat_result = 0; - int newfid = 0; + struct p9_fid *fid; int retval = 0; - dprintk(DEBUG_VFS, " \n"); + P9_DPRINTK(P9_DEBUG_VFS, " \n"); v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) return -ENOMEM; - if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { - dprintk(DEBUG_ERROR, "problem initiating session\n"); - retval = newfid; - goto out_free_session; + fid = v9fs_session_init(v9ses, dev_name, data); + if (IS_ERR(fid)) { + retval = PTR_ERR(fid); + fid = NULL; + kfree(v9ses); + v9ses = NULL; + goto error; + } + + st = p9_client_stat(fid); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto error; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto out_close_session; + goto error; } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); if (IS_ERR(inode)) { retval = PTR_ERR(inode); - goto put_back_sb; + goto error; } inode->i_uid = uid; @@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { retval = -ENOMEM; - goto put_back_sb; + goto error; } sb->s_root = root; + root->d_inode->i_ino = v9fs_qid2ino(&st->qid); + v9fs_stat2inode(st, root->d_inode, sb); + v9fs_fid_add(root, fid); - stat_result = v9fs_t_stat(v9ses, newfid, &fcall); - if (stat_result < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - v9fs_t_clunk(v9ses, newfid); - } else { - /* Setup the Root Inode */ - root_fid = v9fs_fid_create(v9ses, newfid); - if (root_fid == NULL) { - retval = -ENOMEM; - goto put_back_sb; - } - - retval = v9fs_fid_insert(root_fid, root); - if (retval < 0) { - kfree(fcall); - goto put_back_sb; - } - - root_fid->qid = fcall->params.rstat.stat.qid; - root->d_inode->i_ino = - v9fs_qid2ino(&fcall->params.rstat.stat.qid); - v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb); - } + return simple_set_mnt(mnt, sb); - kfree(fcall); +error: + if (fid) + p9_client_clunk(fid); - if (stat_result < 0) { - retval = stat_result; - goto put_back_sb; + if (v9ses) { + v9fs_session_close(v9ses); + kfree(v9ses); } - return simple_set_mnt(mnt, sb); - -out_close_session: - v9fs_session_close(v9ses); -out_free_session: - kfree(v9ses); - return retval; + if (sb) { + up_write(&sb->s_umount); + deactivate_super(sb); + } -put_back_sb: - /* deactivate_super calls v9fs_kill_super which will frees the rest */ - up_write(&sb->s_umount); - deactivate_super(sb); return retval; } @@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s) { struct v9fs_session_info *v9ses = s->s_fs_info; - dprintk(DEBUG_VFS, " %p\n", s); + P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); v9fs_dentry_release(s->s_root); /* clunk root */ @@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s) v9fs_session_close(v9ses); kfree(v9ses); - dprintk(DEBUG_VFS, "exiting kill_super\n"); + P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); } /** @@ -234,7 +217,7 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; if (v9ses->debug != 0) - seq_printf(m, ",debug=%u", v9ses->debug); + seq_printf(m, ",debug=%x", v9ses->debug); if (v9ses->port != V9FS_PORT) seq_printf(m, ",port=%u", v9ses->port); if (v9ses->maxdata != 9000) diff --git a/fs/Kconfig b/fs/Kconfig index 0fa0c1193e81..94b9d861bf9b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -2048,7 +2048,7 @@ config AFS_DEBUG config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" - depends on INET && EXPERIMENTAL + depends on INET && NET_9P && EXPERIMENTAL help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. diff --git a/fs/adfs/file.c b/fs/adfs/file.c index f544a2855923..36e381c6a99a 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = { .fsync = file_fsync, .write = do_sync_write, .aio_write = generic_file_aio_write, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations adfs_file_inode_operations = { diff --git a/fs/affs/file.c b/fs/affs/file.c index c8796906f584..c314a35f0918 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = { .open = affs_file_open, .release = affs_file_release, .fsync = file_fsync, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations affs_file_inode_operations = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 9c0e721d9fc2..aede7eb66dd4 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = afs_file_write, .mmap = generic_file_readonly_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .fsync = afs_fsync, }; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2dac3ad2c44b..2c55dd94a1de 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -17,6 +17,8 @@ #include <linux/rxrpc.h> #include <linux/key.h> #include <linux/workqueue.h> +#include <linux/sched.h> + #include "afs.h" #include "afs_vl.h" diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 329ee473eede..521ff7caadbd 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl) return -EIO; } -static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos, - size_t count, read_actor_t actor, void *target) -{ - return -EIO; -} - static ssize_t bad_file_sendpage(struct file *file, struct page *page, int off, size_t len, loff_t *pos, int more) { @@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops = .aio_fsync = bad_file_aio_fsync, .fasync = bad_file_fasync, .lock = bad_file_lock, - .sendfile = bad_file_sendfile, .sendpage = bad_file_sendpage, .get_unmapped_area = bad_file_get_unmapped_area, .check_flags = bad_file_check_flags, diff --git a/fs/bfs/file.c b/fs/bfs/file.c index ef4d1fa04e65..24310e9ee05a 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fa8ea33ab0be..08e4414b8374 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1499,6 +1499,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) #endif int thread_status_size = 0; elf_addr_t *auxv; +#ifdef ELF_CORE_WRITE_EXTRA_NOTES + int extra_notes_size; +#endif /* * We no longer stop all VM operations. @@ -1628,7 +1631,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) sz += thread_status_size; #ifdef ELF_CORE_WRITE_EXTRA_NOTES - sz += ELF_CORE_EXTRA_NOTES_SIZE; + extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE; + sz += extra_notes_size; #endif fill_elf_note_phdr(&phdr, sz, offset); @@ -1674,6 +1678,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) #ifdef ELF_CORE_WRITE_EXTRA_NOTES ELF_CORE_WRITE_EXTRA_NOTES; + foffset += extra_notes_size; #endif /* write out the thread status notes section */ diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 7b0265d7f3a8..861141b4f6d6 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -558,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, if (!realdatastart) realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", - (int)-datapos); + (int)-realdatastart); do_munmap(current->mm, textpos, text_len); ret = realdatastart; goto err; @@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_add_pc_page); EXPORT_SYMBOL(bio_get_nr_vecs); -EXPORT_SYMBOL(bio_map_user); -EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); diff --git a/fs/block_dev.c b/fs/block_dev.c index ea1480a16f51..b3e9bfa748cf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 6017c465440e..07838b2ac1ce 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -7,16 +7,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> @@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length) char *charptr = data; char buf[10], line[80]; - printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", + printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", label, length, data); for (i = 0; i < length; i += 16) { line[0] = 0; @@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length) #ifdef CONFIG_CIFS_DEBUG2 void cifs_dump_detail(struct smb_hdr * smb) { - cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", + cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", smb->Command, smb->Status.CifsError, smb->Flags, smb->Flags2, smb->Mid, smb->Pid)); - cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb))); + cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb))); } @@ -72,36 +72,35 @@ void cifs_dump_mids(struct TCP_Server_Info * server) struct list_head *tmp; struct mid_q_entry * mid_entry; - if(server == NULL) + if (server == NULL) return; - cERROR(1,("Dump pending requests:")); + cERROR(1, ("Dump pending requests:")); spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if(mid_entry) { - cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", + if (mid_entry) { + cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", mid_entry->midState, (int)mid_entry->command, mid_entry->pid, mid_entry->tsk, mid_entry->mid)); #ifdef CONFIG_CIFS_STATS2 - cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld", + cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", mid_entry->largeBuf, mid_entry->resp_buf, mid_entry->when_received, jiffies)); #endif /* STATS2 */ - cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp, + cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, mid_entry->multiEnd)); - if(mid_entry->resp_buf) { + if (mid_entry->resp_buf) { cifs_dump_detail(mid_entry->resp_buf); cifs_dump_mem("existing buf: ", mid_entry->resp_buf, 62 /* fixme */); } - } } spin_unlock(&GlobalMid_Lock); @@ -129,9 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, "Display Internal CIFS Data Structures for Debugging\n" "---------------------------------------------------\n"); buf += length; - length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION); + length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION); buf += length; - length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid); + length = sprintf(buf, + "Active VFS Requests: %d\n", GlobalTotalActiveXid); buf += length; length = sprintf(buf, "Servers:"); buf += length; @@ -141,7 +141,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, list_for_each(tmp, &GlobalSMBSessionList) { i++; ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if((ses->serverDomain == NULL) || (ses->serverOS == NULL) || + if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { buf += sprintf(buf, "\nentry for %s not fully " "displayed\n\t", ses->serverName); @@ -149,15 +149,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, } else { length = sprintf(buf, - "\n%d) Name: %s Domain: %s Mounts: %d OS: %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t", + "\n%d) Name: %s Domain: %s Mounts: %d OS:" + " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" + " session status: %d\t", i, ses->serverName, ses->serverDomain, atomic_read(&ses->inUse), ses->serverOS, ses->serverNOS, - ses->capabilities,ses->status); + ses->capabilities, ses->status); buf += length; } - if(ses->server) { - buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d", + if (ses->server) { + buf += sprintf(buf, "TCP status: %d\n\tLocal Users To " + "Server: %d SecMode: 0x%x Req On Wire: %d", ses->server->tcpStatus, atomic_read(&ses->server->socketUseCount), ses->server->secMode, @@ -165,7 +168,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, #ifdef CONFIG_CIFS_STATS2 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", - atomic_read(&ses->server->inSend), + atomic_read(&ses->server->inSend), atomic_read(&ses->server->num_waiters)); #endif @@ -177,17 +180,19 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, mid_entry = list_entry(tmp1, struct mid_q_entry, qhead); - if(mid_entry) { - length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p mid %d\n", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid); + if (mid_entry) { + length = sprintf(buf, + "State: %d com: %d pid:" + " %d tsk: %p mid %d\n", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid); buf += length; } } - spin_unlock(&GlobalMid_Lock); + spin_unlock(&GlobalMid_Lock); } } @@ -207,7 +212,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); length = sprintf(buf, - "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d", + "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x " + "Attributes: 0x%x\nPathComponentMax: %d Status: %d", i, tcon->treeName, atomic_read(&tcon->useCount), tcon->nativeFileSystem, @@ -215,7 +221,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), tcon->tidStatus); - buf += length; + buf += length; if (dev_type == FILE_DEVICE_DISK) length = sprintf(buf, " type: DISK "); else if (dev_type == FILE_DEVICE_CD_ROM) @@ -224,7 +230,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, length = sprintf(buf, " type: %d ", dev_type); buf += length; - if(tcon->tidStatus == CifsNeedReconnect) { + if (tcon->tidStatus == CifsNeedReconnect) { buf += sprintf(buf, "\tDISCONNECTED "); length += 14; } @@ -238,9 +244,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, /* Now calculate total size of returned data */ length = buf - original_buf; - if(offset + count >= length) + if (offset + count >= length) *eof = 1; - if(length < offset) { + if (length < offset) { *eof = 1; return 0; } else { @@ -256,18 +262,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, static int cifs_stats_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { - char c; - int rc; + char c; + int rc; struct list_head *tmp; struct cifsTconInfo *tcon; - rc = get_user(c, buffer); - if (rc) - return rc; + rc = get_user(c, buffer); + if (rc) + return rc; - if (c == '1' || c == 'y' || c == 'Y' || c == '0') { + if (c == '1' || c == 'y' || c == 'Y' || c == '0') { read_lock(&GlobalSMBSeslock); #ifdef CONFIG_CIFS_STATS2 atomic_set(&totBufAllocCount, 0); @@ -297,14 +303,14 @@ cifs_stats_write(struct file *file, const char __user *buffer, read_unlock(&GlobalSMBSeslock); } - return count; + return count; } static int cifs_stats_read(char *buf, char **beginBuffer, off_t offset, int count, int *eof, void *data) { - int item_length,i,length; + int item_length, i, length; struct list_head *tmp; struct cifsTconInfo *tcon; @@ -314,44 +320,44 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, "Resources in use\nCIFS Session: %d\n", sesInfoAllocCount.counter); buf += length; - item_length = - sprintf(buf,"Share (unique mount targets): %d\n", + item_length = + sprintf(buf, "Share (unique mount targets): %d\n", tconInfoAllocCount.counter); length += item_length; - buf += item_length; - item_length = - sprintf(buf,"SMB Request/Response Buffer: %d Pool size: %d\n", + buf += item_length; + item_length = + sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n", bufAllocCount.counter, cifs_min_rcv + tcpSesAllocCount.counter); length += item_length; buf += item_length; - item_length = - sprintf(buf,"SMB Small Req/Resp Buffer: %d Pool size: %d\n", - smBufAllocCount.counter,cifs_min_small); + item_length = + sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n", + smBufAllocCount.counter, cifs_min_small); length += item_length; buf += item_length; #ifdef CONFIG_CIFS_STATS2 - item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", + item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", atomic_read(&totBufAllocCount), - atomic_read(&totSmBufAllocCount)); + atomic_read(&totSmBufAllocCount)); length += item_length; buf += item_length; #endif /* CONFIG_CIFS_STATS2 */ - item_length = - sprintf(buf,"Operations (MIDs): %d\n", + item_length = + sprintf(buf, "Operations (MIDs): %d\n", midCount.counter); length += item_length; buf += item_length; item_length = sprintf(buf, "\n%d session %d share reconnects\n", - tcpSesReconnectCount.counter,tconInfoReconnectCount.counter); + tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); length += item_length; buf += item_length; item_length = sprintf(buf, "Total vfs operations: %d maximum at one time: %d\n", - GlobalCurrentXid,GlobalMaxActiveXid); + GlobalCurrentXid, GlobalMaxActiveXid); length += item_length; buf += item_length; @@ -360,10 +366,10 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, list_for_each(tmp, &GlobalTreeConnectionList) { i++; tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - item_length = sprintf(buf,"\n%d) %s",i, tcon->treeName); + item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName); buf += item_length; length += item_length; - if(tcon->tidStatus == CifsNeedReconnect) { + if (tcon->tidStatus == CifsNeedReconnect) { buf += sprintf(buf, "\tDISCONNECTED "); length += 14; } @@ -380,15 +386,15 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, item_length = sprintf(buf, "\nWrites: %d Bytes: %lld", atomic_read(&tcon->num_writes), (long long)(tcon->bytes_written)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, + buf += item_length; + length += item_length; + item_length = sprintf(buf, "\nLocks: %d HardLinks: %d Symlinks: %d", - atomic_read(&tcon->num_locks), + atomic_read(&tcon->num_locks), atomic_read(&tcon->num_hardlinks), atomic_read(&tcon->num_symlinks)); - buf += item_length; - length += item_length; + buf += item_length; + length += item_length; item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", atomic_read(&tcon->num_opens), @@ -415,12 +421,12 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, } read_unlock(&GlobalSMBSeslock); - buf += sprintf(buf,"\n"); + buf += sprintf(buf, "\n"); length++; - if(offset + count >= length) + if (offset + count >= length) *eof = 1; - if(length < offset) { + if (length < offset) { *eof = 1; return 0; } else { @@ -428,7 +434,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, } if (length > count) length = count; - + return length; } #endif @@ -547,11 +553,11 @@ cifs_proc_clean(void) remove_proc_entry("MultiuserMount", proc_fs_cifs); remove_proc_entry("OplockEnabled", proc_fs_cifs); /* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */ - remove_proc_entry("SecurityFlags",proc_fs_cifs); -/* remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */ - remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); - remove_proc_entry("Experimental",proc_fs_cifs); - remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); + remove_proc_entry("SecurityFlags", proc_fs_cifs); +/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */ + remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); + remove_proc_entry("Experimental", proc_fs_cifs); + remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); remove_proc_entry("cifs", proc_root_fs); } @@ -590,7 +596,7 @@ cifsFYI_write(struct file *file, const char __user *buffer, cifsFYI = 0; else if (c == '1' || c == 'y' || c == 'Y') cifsFYI = 1; - else if((c > '1') && (c <= '9')) + else if ((c > '1') && (c <= '9')) cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */ return count; @@ -637,28 +643,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer, static int experimEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - int len; + int len; - len = sprintf(page, "%d\n", experimEnabled); + len = sprintf(page, "%d\n", experimEnabled); - len -= off; - *start = page + off; + len -= off; + *start = page + off; - if (len > count) - len = count; - else - *eof = 1; + if (len > count) + len = count; + else + *eof = 1; - if (len < 0) - len = 0; + if (len < 0) + len = 0; - return len; + return len; } static int experimEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { char c; int rc; @@ -678,46 +684,46 @@ experimEnabled_write(struct file *file, const char __user *buffer, static int linuxExtensionsEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - int len; + int len; - len = sprintf(page, "%d\n", linuxExtEnabled); - len -= off; - *start = page + off; + len = sprintf(page, "%d\n", linuxExtEnabled); + len -= off; + *start = page + off; - if (len > count) - len = count; - else - *eof = 1; + if (len > count) + len = count; + else + *eof = 1; - if (len < 0) - len = 0; + if (len < 0) + len = 0; - return len; + return len; } static int linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { - char c; - int rc; - - rc = get_user(c, buffer); - if (rc) - return rc; - if (c == '0' || c == 'n' || c == 'N') - linuxExtEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - linuxExtEnabled = 1; - - return count; + char c; + int rc; + + rc = get_user(c, buffer); + if (rc) + return rc; + if (c == '0' || c == 'n' || c == 'N') + linuxExtEnabled = 0; + else if (c == '1' || c == 'y' || c == 'Y') + linuxExtEnabled = 1; + + return count; } static int lookupFlag_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { int len; @@ -860,15 +866,15 @@ security_flags_write(struct file *file, const char __user *buffer, char flags_string[12]; char c; - if((count < 1) || (count > 11)) + if ((count < 1) || (count > 11)) return -EINVAL; memset(flags_string, 0, 12); - if(copy_from_user(flags_string, buffer, count)) + if (copy_from_user(flags_string, buffer, count)) return -EFAULT; - if(count < 3) { + if (count < 3) { /* single char or single char followed by null */ c = flags_string[0]; if (c == '0' || c == 'n' || c == 'N') @@ -881,15 +887,15 @@ security_flags_write(struct file *file, const char __user *buffer, flags = simple_strtoul(flags_string, NULL, 0); - cFYI(1,("sec flags 0x%x", flags)); + cFYI(1, ("sec flags 0x%x", flags)); - if(flags <= 0) { - cERROR(1,("invalid security flags %s",flags_string)); + if (flags <= 0) { + cERROR(1, ("invalid security flags %s", flags_string)); return -EINVAL; } - if(flags & ~CIFSSEC_MASK) { - cERROR(1,("attempt to set unsupported security flags 0x%x", + if (flags & ~CIFSSEC_MASK) { + cERROR(1, ("attempt to set unsupported security flags 0x%x", flags & ~CIFSSEC_MASK)); return -EINVAL; } diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 793c4b95c164..701e9a9185f2 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -6,16 +6,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> @@ -32,7 +32,7 @@ * */ int -cifs_strfromUCS_le(char *to, const __le16 * from, +cifs_strfromUCS_le(char *to, const __le16 * from, int len, const struct nls_table *codepage) { int i; @@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len, { int charlen; int i; - wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ + wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ for (i = 0; len && *from; i++, from += charlen, len -= charlen) { @@ -79,7 +79,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len, /* A question mark */ to[i] = cpu_to_le16(0x003f); charlen = 1; - } else + } else to[i] = cpu_to_le16(wchar_to[i]); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d38c69b591cf..8b0cbf4a4ad0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX .ioctl = cifs_ioctl, @@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = { .lock = cifs_lock, .fsync = cifs_fsync, .flush = cifs_flush, - .sendfile = generic_file_sendfile, /* BB removeme BB */ + .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX .ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ @@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX .ioctl = cifs_ioctl, @@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .release = cifs_close, .fsync = cifs_fsync, .flush = cifs_flush, - .sendfile = generic_file_sendfile, /* BB removeme BB */ + .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX .ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ @@ -825,8 +825,8 @@ cifs_init_mids(void) sizeof (struct oplock_q_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (cifs_oplock_cachep == NULL) { - kmem_cache_destroy(cifs_mid_cachep); mempool_destroy(cifs_mid_poolp); + kmem_cache_destroy(cifs_mid_cachep); return -ENOMEM; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 14de58fa1437..57419a176688 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -433,8 +433,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) cFYI(1,("secFlags 0x%x",secFlags)); pSMB->hdr.Mid = GetNextMid(server); - pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; - if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) + pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); + if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; count = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 216fb625843f..f4e92661b223 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2069,8 +2069,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, srvTcp->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); if (srvTcp->tsk) { + struct task_struct *tsk; + /* If we could verify that kthread_stop would + always wake up processes blocked in + tcp in recv_mesg then we could remove the + send_sig call */ send_sig(SIGKILL,srvTcp->tsk,1); - kthread_stop(srvTcp->tsk); + tsk = srvTcp->tsk; + if(tsk) + kthread_stop(tsk); } } /* If find_unc succeeded then rc == 0 so we can not end */ @@ -2085,8 +2092,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* if the socketUseCount is now zero */ if ((temp_rc == -ESHUTDOWN) && (pSesInfo->server) && (pSesInfo->server->tsk)) { + struct task_struct *tsk; send_sig(SIGKILL,pSesInfo->server->tsk,1); - kthread_stop(pSesInfo->server->tsk); + tsk = pSesInfo->server->tsk; + if (tsk) + kthread_stop(tsk); } } else cFYI(1, ("No session or bad tcon")); @@ -3334,7 +3344,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) return 0; } else if (rc == -ESHUTDOWN) { cFYI(1,("Waking up socket by sending it signal")); - if(cifsd_task) { + if (cifsd_task) { send_sig(SIGKILL,cifsd_task,1); kthread_stop(cifsd_task); } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e5210519ac4b..8e86aaceb68a 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -2,7 +2,7 @@ * fs/cifs/dir.c * * vfs operations that deal with dentries - * + * * Copyright (C) International Business Machines Corp., 2002,2005 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -34,11 +34,12 @@ static void renew_parental_timestamps(struct dentry *direntry) { - /* BB check if there is a way to get the kernel to do this or if we really need this */ + /* BB check if there is a way to get the kernel to do this or if we + really need this */ do { direntry->d_time = jiffies; direntry = direntry->d_parent; - } while (!IS_ROOT(direntry)); + } while (!IS_ROOT(direntry)); } /* Note: caller must free return buffer */ @@ -51,7 +52,7 @@ build_path_from_dentry(struct dentry *direntry) char *full_path; char dirsep; - if(direntry == NULL) + if (direntry == NULL) return NULL; /* not much we can do if dentry is freed and we need to reopen the file after it was closed implicitly when the server crashed */ @@ -59,18 +60,18 @@ build_path_from_dentry(struct dentry *direntry) dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); pplen = CIFS_SB(direntry->d_sb)->prepathlen; cifs_bp_rename_retry: - namelen = pplen; + namelen = pplen; for (temp = direntry; !IS_ROOT(temp);) { namelen += (1 + temp->d_name.len); temp = temp->d_parent; - if(temp == NULL) { - cERROR(1,("corrupt dentry")); + if (temp == NULL) { + cERROR(1, ("corrupt dentry")); return NULL; } } full_path = kmalloc(namelen+1, GFP_KERNEL); - if(full_path == NULL) + if (full_path == NULL) return full_path; full_path[namelen] = 0; /* trailing null */ for (temp = direntry; !IS_ROOT(temp);) { @@ -84,8 +85,8 @@ cifs_bp_rename_retry: cFYI(0, ("name: %s", full_path + namelen)); } temp = temp->d_parent; - if(temp == NULL) { - cERROR(1,("corrupt dentry")); + if (temp == NULL) { + cERROR(1, ("corrupt dentry")); kfree(full_path); return NULL; } @@ -94,7 +95,7 @@ cifs_bp_rename_retry: cERROR(1, ("did not end path lookup where expected namelen is %d", namelen)); - /* presumably this is only possible if racing with a rename + /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ kfree(full_path); @@ -106,7 +107,7 @@ cifs_bp_rename_retry: since the '\' is a valid posix character so we can not switch those safely to '/' if any are found in the middle of the prepath */ /* BB test paths to Windows with '/' in the midst of prepath */ - strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen); + strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen); return full_path; } @@ -147,12 +148,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } - if(nd && (nd->flags & LOOKUP_OPEN)) { + if (nd && (nd->flags & LOOKUP_OPEN)) { int oflags = nd->intent.open.flags; desiredAccess = 0; @@ -164,28 +165,29 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, write_only = TRUE; } - if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; - else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) disposition = FILE_OVERWRITE_IF; - else if((oflags & O_CREAT) == O_CREAT) + else if ((oflags & O_CREAT) == O_CREAT) disposition = FILE_OPEN_IF; else { - cFYI(1,("Create flag not set in create function")); + cFYI(1, ("Create flag not set in create function")); } } - /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ + /* BB add processing to set equivalent of mode - e.g. via CreateX with + ACLs */ if (oplockEnabled) oplock = REQ_OPLOCK; - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); - if(buf == NULL) { + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (buf == NULL) { kfree(full_path); FreeXid(xid); return -ENOMEM; } - if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) + if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &fileHandle, &oplock, buf, cifs_sb->local_nls, @@ -193,27 +195,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, else rc = -EIO; /* no NT SMB support fall into legacy open below */ - if(rc == -EIO) { + if (rc == -EIO) { /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } + } if (rc) { cFYI(1, ("cifs_create returned 0x%x", rc)); } else { /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (oplock & CIFS_CREATE_ACTION)) - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + (oplock & CIFS_CREATE_ACTION)) { + mode &= ~current->fs->umask; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)current->fsuid, (__u64)current->fsgid, 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -221,26 +224,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (__u64)-1, 0 /* dev */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - else { - /* BB implement mode setting via Windows security descriptors */ - /* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/ - /* could set r/o dos attribute if mode & 0222 == 0 */ + } else { + /* BB implement mode setting via Windows security + descriptors e.g. */ + /* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/ + + /* Could set r/o dos attribute if mode & 0222 == 0 */ } /* BB server might mask mode so we have to query for Unix case*/ if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); else { rc = cifs_get_inode_info(&newinode, full_path, - buf, inode->i_sb,xid); - if(newinode) { + buf, inode->i_sb, xid); + if (newinode) { newinode->i_mode = mode; - if((oplock & CIFS_CREATE_ACTION) && - (cifs_sb->mnt_cifs_flags & + if ((oplock & CIFS_CREATE_ACTION) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current->fsuid; newinode->i_gid = current->fsgid; @@ -259,14 +264,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, direntry->d_op = &cifs_dentry_ops; d_instantiate(direntry, newinode); } - if((nd->flags & LOOKUP_OPEN) == FALSE) { + if ((nd->flags & LOOKUP_OPEN) == FALSE) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, pTcon, fileHandle); - } else if(newinode) { + } else if (newinode) { pCifsFile = kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); - - if(pCifsFile == NULL) + + if (pCifsFile == NULL) goto cifs_create_out; pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; @@ -276,33 +281,33 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, init_MUTEX(&pCifsFile->fh_sem); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); - atomic_set(&pCifsFile->wrtPending,0); + atomic_set(&pCifsFile->wrtPending, 0); - /* set the following in open now + /* set the following in open now pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist,&pTcon->openFileList); + list_add(&pCifsFile->tlist, &pTcon->openFileList); pCifsInode = CIFS_I(newinode); - if(pCifsInode) { + if (pCifsInode) { /* if readable file instance put first in list*/ if (write_only == TRUE) { - list_add_tail(&pCifsFile->flist, + list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); } else { list_add(&pCifsFile->flist, &pCifsInode->openFileList); } - if((oplock & 0xF) == OPLOCK_EXCLUSIVE) { + if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = TRUE; pCifsInode->clientCanCacheRead = TRUE; - cFYI(1,("Exclusive Oplock for inode %p", + cFYI(1, ("Exclusive Oplock inode %p", newinode)); - } else if((oplock & 0xF) == OPLOCK_READ) + } else if ((oplock & 0xF) == OPLOCK_READ) pCifsInode->clientCanCacheRead = TRUE; } write_unlock(&GlobalSMBSeslock); } - } + } cifs_create_out: kfree(buf); kfree(full_path); @@ -310,8 +315,8 @@ cifs_create_out: return rc; } -int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, - dev_t device_number) +int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, + dev_t device_number) { int rc = -EPERM; int xid; @@ -329,43 +334,45 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) + if (full_path == NULL) rc = -ENOMEM; else if (pTcon->ses->capabilities & CAP_UNIX) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + mode &= ~current->fs->umask; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode,(__u64)current->fsuid,(__u64)current->fsgid, + mode, (__u64)current->fsuid, + (__u64)current->fsgid, device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)-1, (__u64)-1, device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - if(!rc) { + if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); if (pTcon->nocase) direntry->d_op = &cifs_ci_dentry_ops; else direntry->d_op = &cifs_dentry_ops; - if(rc == 0) + if (rc == 0) d_instantiate(direntry, newinode); } } else { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { int oplock = 0; u16 fileHandle; FILE_ALL_INFO * buf; - cFYI(1,("sfu compat create special file")); + cFYI(1, ("sfu compat create special file")); - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); - if(buf == NULL) { + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (buf == NULL) { kfree(full_path); FreeXid(xid); return -ENOMEM; @@ -373,39 +380,38 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, /* fail if exists */ - GENERIC_WRITE /* BB would + GENERIC_WRITE /* BB would WRITE_OWNER | WRITE_DAC be better? */, /* Create a file and set the file attribute to SYSTEM */ CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, &fileHandle, &oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* BB FIXME - add handling for backlevel servers which need legacy open and check for all - calls to SMBOpen for fallback to - SMBLeagcyOpen */ - if(!rc) { + calls to SMBOpen for fallback to SMBLeagcyOpen */ + if (!rc) { /* BB Do not bother to decode buf since no local inode yet to put timestamps in, but we can reuse it safely */ int bytes_written; struct win_dev *pdev; pdev = (struct win_dev *)buf; - if(S_ISCHR(mode)) { + if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); - pdev->minor = + pdev->minor = cpu_to_le64(MINOR(device_number)); rc = CIFSSMBWrite(xid, pTcon, fileHandle, sizeof(struct win_dev), 0, &bytes_written, (char *)pdev, NULL, 0); - } else if(S_ISBLK(mode)) { + } else if (S_ISBLK(mode)) { memcpy(pdev->type, "IntxBLK", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); @@ -432,7 +438,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, struct dentry * -cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd) +cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, + struct nameidata *nd) { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ @@ -447,8 +454,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name (" parent inode = 0x%p name is: %s and dentry = 0x%p", parent_dir_inode, direntry->d_name.name, direntry)); - /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ - /* check whether path exists */ cifs_sb = CIFS_SB(parent_dir_inode->i_sb); @@ -472,7 +477,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return ERR_PTR(-ENOMEM); } @@ -487,10 +492,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&newInode, full_path, - parent_dir_inode->i_sb,xid); + parent_dir_inode->i_sb, xid); else rc = cifs_get_inode_info(&newInode, full_path, NULL, - parent_dir_inode->i_sb,xid); + parent_dir_inode->i_sb, xid); if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) @@ -499,7 +504,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name direntry->d_op = &cifs_dentry_ops; d_add(direntry, newInode); - /* since paths are not looked up by component - the parent + /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); @@ -511,13 +516,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name else direntry->d_op = &cifs_dentry_ops; d_add(direntry, NULL); - /* if it was once a directory (but how can we tell?) we could do - shrink_dcache_parent(direntry); */ + /* if it was once a directory (but how can we tell?) we could do + shrink_dcache_parent(direntry); */ } else { - cERROR(1,("Error 0x%x on cifs_get_inode_info in lookup of %s", - rc,full_path)); - /* BB special case check for Access Denied - watch security - exposure of returning dir info implicitly via different rc + cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s", + rc, full_path)); + /* BB special case check for Access Denied - watch security + exposure of returning dir info implicitly via different rc if file exists or not but no access BB */ } @@ -538,11 +543,11 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) } else { cFYI(1, ("neg dentry 0x%p name = %s", direntry, direntry->d_name.name)); - if(time_after(jiffies, direntry->d_time + HZ) || + if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) { d_drop(direntry); isValid = 0; - } + } } return isValid; @@ -559,8 +564,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) struct dentry_operations cifs_dentry_ops = { .d_revalidate = cifs_d_revalidate, -/* d_delete: cifs_d_delete, *//* not needed except for debugging */ - /* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */ +/* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index da12b482ebe5..8e375bb4b379 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -2,7 +2,7 @@ * fs/cifs/fcntl.c * * vfs operations that deal with the file control API - * + * * Copyright (C) International Business Machines Corp., 2003,2004 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -35,35 +35,34 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) /* No way on Linux VFS to ask to monitor xattr changes (and no stream support either */ - if(fcntl_notify_flags & DN_ACCESS) { + if (fcntl_notify_flags & DN_ACCESS) { cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; } - if(fcntl_notify_flags & DN_MODIFY) { + if (fcntl_notify_flags & DN_MODIFY) { /* What does this mean on directories? */ cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_SIZE; } - if(fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | + if (fcntl_notify_flags & DN_CREATE) { + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | FILE_NOTIFY_CHANGE_LAST_WRITE; } - if(fcntl_notify_flags & DN_DELETE) { + if (fcntl_notify_flags & DN_DELETE) { cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; } - if(fcntl_notify_flags & DN_RENAME) { + if (fcntl_notify_flags & DN_RENAME) { /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | FILE_NOTIFY_CHANGE_FILE_NAME; } - if(fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | + if (fcntl_notify_flags & DN_ATTRIB) { + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | FILE_NOTIFY_CHANGE_ATTRIBUTES; } -/* if(fcntl_notify_flags & DN_MULTISHOT) { +/* if (fcntl_notify_flags & DN_MULTISHOT) { cifs_ntfy_flags |= ; } */ /* BB fixme - not sure how to handle this with CIFS yet */ - return cifs_ntfy_flags; } @@ -78,8 +77,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg) __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; __u16 netfid; - - if(experimEnabled == 0) + if (experimEnabled == 0) return 0; xid = GetXid(); @@ -88,21 +86,21 @@ int cifs_dir_notify(struct file * file, unsigned long arg) full_path = build_path_from_dentry(file->f_path.dentry); - if(full_path == NULL) { + if (full_path == NULL) { rc = -ENOMEM; } else { - cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, + cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); + rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock,NULL, cifs_sb->local_nls, + &netfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* BB fixme - add this handle to a notify handle list */ - if(rc) { - cFYI(1,("Could not open directory for notify")); + if (rc) { + cFYI(1, ("Could not open directory for notify")); } else { filter = convert_to_cifs_notify_flags(arg); - if(filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, + if (filter != 0) { + rc = CIFSSMBNotify(xid, pTcon, 0 /* no subdirs */, netfid, filter, file, arg & DN_MULTISHOT, cifs_sb->local_nls); @@ -113,10 +111,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg) it would close automatically but may be a way to do it easily when inode freed or when notify info is cleared/changed */ - cFYI(1,("notify rc %d",rc)); + cFYI(1, ("notify rc %d", rc)); } } - + FreeXid(xid); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3e87dad3367c..f0ff12b3f398 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -986,7 +986,8 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) + if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -1004,7 +1005,7 @@ mkdir_get_info: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - else { + } else { /* BB to be implemented via Windows secrty descriptors eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, -1, -1, local_nls); */ diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index e34c7db00f6f..a414f1775ae0 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -30,7 +30,7 @@ #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) -int cifs_ioctl (struct inode * inode, struct file * filep, +int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg) { int rc = -ENOTTY; /* strange error - but the precedent */ @@ -47,13 +47,13 @@ int cifs_ioctl (struct inode * inode, struct file * filep, xid = GetXid(); - cFYI(1,("ioctl file %p cmd %u arg %lu",filep,command,arg)); + cFYI(1, ("ioctl file %p cmd %u arg %lu", filep, command, arg)); cifs_sb = CIFS_SB(inode->i_sb); #ifdef CONFIG_CIFS_POSIX tcon = cifs_sb->tcon; - if(tcon) + if (tcon) caps = le64_to_cpu(tcon->fsUnixInfo.Capability); else { rc = -EIO; @@ -62,24 +62,24 @@ int cifs_ioctl (struct inode * inode, struct file * filep, } #endif /* CONFIG_CIFS_POSIX */ - switch(command) { + switch (command) { case CIFS_IOC_CHECKUMOUNT: - cFYI(1,("User unmount attempted")); - if(cifs_sb->mnt_uid == current->uid) + cFYI(1, ("User unmount attempted")); + if (cifs_sb->mnt_uid == current->uid) rc = 0; else { rc = -EACCES; - cFYI(1,("uids do not match")); + cFYI(1, ("uids do not match")); } break; #ifdef CONFIG_CIFS_POSIX case FS_IOC_GETFLAGS: - if(CIFS_UNIX_EXTATTR_CAP & caps) { + if (CIFS_UNIX_EXTATTR_CAP & caps) { if (pSMBFile == NULL) break; rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, &ExtAttrBits, &ExtAttrMask); - if(rc == 0) + if (rc == 0) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); @@ -87,8 +87,8 @@ int cifs_ioctl (struct inode * inode, struct file * filep, break; case FS_IOC_SETFLAGS: - if(CIFS_UNIX_EXTATTR_CAP & caps) { - if(get_user(ExtAttrBits,(int __user *)arg)) { + if (CIFS_UNIX_EXTATTR_CAP & caps) { + if (get_user(ExtAttrBits, (int __user *)arg)) { rc = -EFAULT; break; } @@ -96,16 +96,15 @@ int cifs_ioctl (struct inode * inode, struct file * filep, break; /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, extAttrBits, &ExtAttrMask);*/ - } - cFYI(1,("set flags not implemented yet")); + cFYI(1, ("set flags not implemented yet")); break; #endif /* CONFIG_CIFS_POSIX */ default: - cFYI(1,("unsupported ioctl")); + cFYI(1, ("unsupported ioctl")); break; } FreeXid(xid); return rc; -} +} diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h index aede606132aa..8b69fcceb597 100644 --- a/fs/cifs/rfc1002pdu.h +++ b/fs/cifs/rfc1002pdu.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9ddf5ed62162..898a86dde8f5 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) ret = -ENOENT; if (!IS_DEADDIR(host_inode)) { - ret = host_file->f_op->readdir(host_file, filldir, dirent); + ret = host_file->f_op->readdir(host_file, dirent, filldir); file_accessed(host_file); } } diff --git a/fs/coda/file.c b/fs/coda/file.c index 5ef2b609ec7d..99dbe866816d 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p } static ssize_t -coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count, - read_actor_t actor, void *target) +coda_file_splice_read(struct file *coda_file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) { struct coda_file_info *cfi; struct file *host_file; @@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count, BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->sendfile) + if (!host_file->f_op || !host_file->f_op->splice_read) return -EINVAL; - return host_file->f_op->sendfile(host_file, ppos, count, actor, target); + return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags); } static ssize_t @@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = { .flush = coda_flush, .release = coda_release, .fsync = coda_fsync, - .sendfile = coda_file_sendfile, + .splice_read = coda_file_splice_read, }; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index ec8896b264de..1d533a2ec3a6 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(debugfs_remove); +/** + * debugfs_rename - rename a file/directory in the debugfs filesystem + * @old_dir: a pointer to the parent dentry for the renamed object. This + * should be a directory dentry. + * @old_dentry: dentry of an object to be renamed. + * @new_dir: a pointer to the parent dentry where the object should be + * moved. This should be a directory dentry. + * @new_name: a pointer to a string containing the target name. + * + * This function renames a file/directory in debugfs. The target must not + * exist for rename to succeed. + * + * This function will return a pointer to old_dentry (which is updated to + * reflect renaming) if it succeeds. If an error occurs, %NULL will be + * returned. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, const char *new_name) +{ + int error; + struct dentry *dentry = NULL, *trap; + const char *old_name; + + trap = lock_rename(new_dir, old_dir); + /* Source or destination directories don't exist? */ + if (!old_dir->d_inode || !new_dir->d_inode) + goto exit; + /* Source does not exist, cyclic rename, or mountpoint? */ + if (!old_dentry->d_inode || old_dentry == trap || + d_mountpoint(old_dentry)) + goto exit; + dentry = lookup_one_len(new_name, new_dir, strlen(new_name)); + /* Lookup failed, cyclic rename or target exists? */ + if (IS_ERR(dentry) || dentry == trap || dentry->d_inode) + goto exit; + + old_name = fsnotify_oldname_init(old_dentry->d_name.name); + + error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, + dentry); + if (error) { + fsnotify_oldname_free(old_name); + goto exit; + } + d_move(old_dentry, dentry); + fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name, + old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode), + NULL, old_dentry->d_inode); + fsnotify_oldname_free(old_name); + unlock_rename(new_dir, old_dir); + dput(dentry); + return old_dentry; +exit: + if (dentry && !IS_ERR(dentry)) + dput(dentry); + unlock_rename(new_dir, old_dir); + return NULL; +} +EXPORT_SYMBOL_GPL(debugfs_rename); + static decl_subsys(debug, NULL, NULL); static int __init debugfs_init(void) diff --git a/fs/direct-io.c b/fs/direct-io.c index 8593f3dfd299..52bb2638f7ab 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1106,7 +1106,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, spin_lock_irqsave(&dio->bio_lock, flags); ret2 = --dio->refcount; spin_unlock_irqrestore(&dio->bio_lock, flags); - BUG_ON(!dio->is_async && ret2 != 0); + if (ret2 == 0) { ret = dio_complete(dio, offset, ret); kfree(dio); diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 69a94690e493..54bcc00ec8df 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -3,7 +3,7 @@ menu "Distributed Lock Manager" config DLM tristate "Distributed Lock Manager (DLM)" - depends on IPV6 || IPV6=n + depends on SYSFS && (IPV6 || IPV6=n) select CONFIGFS_FS select IP_SCTP help diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 604cf7dc5f39..d248e60951ba 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -8,6 +8,7 @@ dlm-y := ast.o \ member.o \ memory.o \ midcomms.o \ + netlink.o \ lowcomms.o \ rcom.o \ recover.o \ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 822abdcd1434..5069b2cb5a1f 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -90,6 +90,7 @@ struct cluster { unsigned int cl_scan_secs; unsigned int cl_log_debug; unsigned int cl_protocol; + unsigned int cl_timewarn_cs; }; enum { @@ -103,6 +104,7 @@ enum { CLUSTER_ATTR_SCAN_SECS, CLUSTER_ATTR_LOG_DEBUG, CLUSTER_ATTR_PROTOCOL, + CLUSTER_ATTR_TIMEWARN_CS, }; struct cluster_attribute { @@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1); CLUSTER_ATTR(scan_secs, 1); CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); +CLUSTER_ATTR(timewarn_cs, 1); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, + [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, NULL, }; @@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_toss_secs = dlm_config.ci_toss_secs; cl->cl_scan_secs = dlm_config.ci_scan_secs; cl->cl_log_debug = dlm_config.ci_log_debug; + cl->cl_protocol = dlm_config.ci_protocol; + cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) static struct space *get_space(char *name) { + struct config_item *i; + if (!space_list) return NULL; - return to_space(config_group_find_obj(space_list, name)); + + down(&space_list->cg_subsys->su_sem); + i = config_group_find_obj(space_list, name); + up(&space_list->cg_subsys->su_sem); + + return to_space(i); } static void put_space(struct space *sp) @@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) if (cm->nodeid != nodeid) continue; found = 1; + config_item_get(i); break; } else { if (!cm->addr_count || memcmp(cm->addr[0], addr, sizeof(*addr))) continue; found = 1; + config_item_get(i); break; } } up(&clusters_root.subsys.su_sem); - if (found) - config_item_get(i); - else + if (!found) cm = NULL; return cm; } @@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_SCAN_SECS 5 #define DEFAULT_LOG_DEBUG 0 #define DEFAULT_PROTOCOL 0 +#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = { .ci_toss_secs = DEFAULT_TOSS_SECS, .ci_scan_secs = DEFAULT_SCAN_SECS, .ci_log_debug = DEFAULT_LOG_DEBUG, - .ci_protocol = DEFAULT_PROTOCOL + .ci_protocol = DEFAULT_PROTOCOL, + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 967cc3d72e5e..a3170fe22090 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -27,6 +27,7 @@ struct dlm_config_info { int ci_scan_secs; int ci_log_debug; int ci_protocol; + int ci_timewarn_cs; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 61ba670b9e02..12c3bfd5e660 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -17,6 +17,7 @@ #include <linux/debugfs.h> #include "dlm_internal.h" +#include "lock.h" #define DLM_DEBUG_BUF_LEN 4096 static char debug_buf[DLM_DEBUG_BUF_LEN]; @@ -26,6 +27,8 @@ static struct dentry *dlm_root; struct rsb_iter { int entry; + int locks; + int header; struct dlm_ls *ls; struct list_head *next; struct dlm_rsb *rsb; @@ -57,8 +60,8 @@ static char *print_lockmode(int mode) } } -static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, - struct dlm_rsb *res) +static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb, + struct dlm_rsb *res) { seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); @@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) struct dlm_lkb *lkb; int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; + lock_rsb(res); + seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); for (i = 0; i < res->res_length; i++) { if (isprint(res->res_name[i])) @@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) /* Print the locks attached to this resource */ seq_printf(s, "Granted Queue\n"); list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Conversion Queue\n"); list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); seq_printf(s, "Waiting Queue\n"); list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) - print_lock(s, lkb, res); + print_resource_lock(s, lkb, res); if (list_empty(&res->res_lookup)) goto out; @@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s) seq_printf(s, "\n"); } out: + unlock_rsb(res); + return 0; +} + +static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r) +{ + struct dlm_user_args *ua; + unsigned int waiting = 0; + uint64_t xid = 0; + + if (lkb->lkb_flags & DLM_IFL_USER) { + ua = (struct dlm_user_args *) lkb->lkb_astparam; + if (ua) + xid = ua->xid; + } + + if (lkb->lkb_timestamp) + waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp); + + /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms + r_nodeid r_len r_name */ + + seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n", + lkb->lkb_id, + lkb->lkb_nodeid, + lkb->lkb_remid, + lkb->lkb_ownpid, + (unsigned long long)xid, + lkb->lkb_exflags, + lkb->lkb_flags, + lkb->lkb_status, + lkb->lkb_grmode, + lkb->lkb_rqmode, + waiting, + r->res_nodeid, + r->res_length, + r->res_name); +} + +static int print_locks(struct dlm_rsb *r, struct seq_file *s) +{ + struct dlm_lkb *lkb; + + lock_rsb(r); + + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) + print_lock(s, lkb, r); + + list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) + print_lock(s, lkb, r); + + unlock_rsb(r); return 0; } @@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri) read_lock(&ls->ls_rsbtbl[i].lock); if (!list_empty(&ls->ls_rsbtbl[i].list)) { ri->next = ls->ls_rsbtbl[i].list.next; + ri->rsb = list_entry(ri->next, struct dlm_rsb, + res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); break; } @@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri) if (ri->entry >= ls->ls_rsbtbl_size) return 1; } else { + struct dlm_rsb *old = ri->rsb; i = ri->entry; read_lock(&ls->ls_rsbtbl[i].lock); ri->next = ri->next->next; @@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri) ri->next = NULL; ri->entry++; read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); goto top; } + ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); + dlm_hold_rsb(ri->rsb); read_unlock(&ls->ls_rsbtbl[i].lock); + dlm_put_rsb(old); } - ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); return 0; } @@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls) { struct rsb_iter *ri; - ri = kmalloc(sizeof *ri, GFP_KERNEL); + ri = kzalloc(sizeof *ri, GFP_KERNEL); if (!ri) return NULL; @@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr) { struct rsb_iter *ri = iter_ptr; - print_resource(ri->rsb, file); + if (ri->locks) { + if (ri->header) { + seq_printf(file, "id nodeid remid pid xid exflags flags " + "sts grmode rqmode time_ms r_nodeid " + "r_len r_name\n"); + ri->header = 0; + } + print_locks(ri->rsb, file); + } else { + print_resource(ri->rsb, file); + } return 0; } @@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = { }; /* + * Dump state in compact per-lock listing + */ + +static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos) +{ + struct rsb_iter *ri; + + ri = kzalloc(sizeof *ri, GFP_KERNEL); + if (!ri) + return NULL; + + ri->ls = ls; + ri->entry = 0; + ri->next = NULL; + ri->locks = 1; + + if (*pos == 0) + ri->header = 1; + + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + + return ri; +} + +static void *locks_seq_start(struct seq_file *file, loff_t *pos) +{ + struct rsb_iter *ri; + loff_t n = *pos; + + ri = locks_iter_init(file->private, pos); + if (!ri) + return NULL; + + while (n--) { + if (rsb_iter_next(ri)) { + rsb_iter_free(ri); + return NULL; + } + } + + return ri; +} + +static struct seq_operations locks_seq_ops = { + .start = locks_seq_start, + .next = rsb_seq_next, + .stop = rsb_seq_stop, + .show = rsb_seq_show, +}; + +static int locks_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &locks_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations locks_fops = { + .owner = THIS_MODULE, + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* * dump lkb's on the ls_waiters list */ @@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls) return -ENOMEM; } + memset(name, 0, sizeof(name)); + snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name); + + ls->ls_debug_locks_dentry = debugfs_create_file(name, + S_IFREG | S_IRUGO, + dlm_root, + ls, + &locks_fops); + if (!ls->ls_debug_locks_dentry) { + debugfs_remove(ls->ls_debug_waiters_dentry); + debugfs_remove(ls->ls_debug_rsb_dentry); + return -ENOMEM; + } + return 0; } @@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_rsb_dentry); if (ls->ls_debug_waiters_dentry) debugfs_remove(ls->ls_debug_waiters_dentry); + if (ls->ls_debug_locks_dentry) + debugfs_remove(ls->ls_debug_locks_dentry); } int dlm_register_debugfs(void) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 30994d68f6a0..74901e981e10 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -151,6 +151,7 @@ struct dlm_args { void *bastaddr; int mode; struct dlm_lksb *lksb; + unsigned long timeout; }; @@ -213,6 +214,9 @@ struct dlm_args { #define DLM_IFL_OVERLAP_UNLOCK 0x00080000 #define DLM_IFL_OVERLAP_CANCEL 0x00100000 #define DLM_IFL_ENDOFLIFE 0x00200000 +#define DLM_IFL_WATCH_TIMEWARN 0x00400000 +#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 +#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 @@ -243,6 +247,9 @@ struct dlm_lkb { struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ + struct list_head lkb_time_list; + unsigned long lkb_timestamp; + unsigned long lkb_timeout_cs; char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ @@ -447,12 +454,16 @@ struct dlm_ls { struct mutex ls_orphans_mutex; struct list_head ls_orphans; + struct mutex ls_timeout_mutex; + struct list_head ls_timeout; + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ int ls_low_nodeid; int ls_total_weight; int *ls_node_array; + gfp_t ls_allocation; struct dlm_rsb ls_stub_rsb; /* for returning errors */ struct dlm_lkb ls_stub_lkb; /* for returning errors */ @@ -460,9 +471,12 @@ struct dlm_ls { struct dentry *ls_debug_rsb_dentry; /* debugfs */ struct dentry *ls_debug_waiters_dentry; /* debugfs */ + struct dentry *ls_debug_locks_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; + struct completion ls_members_done; + int ls_members_result; struct miscdevice ls_device; @@ -472,6 +486,7 @@ struct dlm_ls { struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; spinlock_t ls_recover_lock; + unsigned long ls_recover_begin; /* jiffies timestamp */ uint32_t ls_recover_status; /* DLM_RS_ */ uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; @@ -501,6 +516,7 @@ struct dlm_ls { #define LSFL_RCOM_READY 3 #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 +#define LSFL_TIMEWARN 6 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -518,6 +534,7 @@ struct dlm_user_args { void __user *castaddr; void __user *bastparam; void __user *bastaddr; + uint64_t xid; }; #define DLM_PROC_FLAGS_CLOSING 1 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d8d6e729f96b..b455919c1998 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); static int send_remove(struct dlm_rsb *r); static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms); static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); +static void del_timeout(struct dlm_lkb *lkb); +void dlm_timeout_warn(struct dlm_lkb *lkb); /* * Lock compatibilty matrix - thanks Steve @@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r) /* Threads cannot use the lockspace while it's being recovered */ -static inline void lock_recovery(struct dlm_ls *ls) +static inline void dlm_lock_recovery(struct dlm_ls *ls) { down_read(&ls->ls_in_recovery); } -static inline void unlock_recovery(struct dlm_ls *ls) +void dlm_unlock_recovery(struct dlm_ls *ls) { up_read(&ls->ls_in_recovery); } -static inline int lock_recovery_try(struct dlm_ls *ls) +int dlm_lock_recovery_try(struct dlm_ls *ls) { return down_read_trylock(&ls->ls_in_recovery); } @@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) if (is_master_copy(lkb)) return; + del_timeout(lkb); + DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); + /* if the operation was a cancel, then return -DLM_ECANCEL, if a + timeout caused the cancel then return -ETIMEDOUT */ + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL; + rv = -ETIMEDOUT; + } + + if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) { + lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL; + rv = -EDEADLK; + } + lkb->lkb_lksb->sb_status = rv; lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; @@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) kref_init(&lkb->lkb_ref); INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); + INIT_LIST_HEAD(&lkb->lkb_time_list); get_random_bytes(&bucket, sizeof(bucket)); bucket &= (ls->ls_lkbtbl_size - 1); @@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls) { int i; - if (dlm_locking_stopped(ls)) - return; - for (i = 0; i < ls->ls_rsbtbl_size; i++) { shrink_bucket(ls, i); + if (dlm_locking_stopped(ls)) + break; cond_resched(); } } +static void add_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + if (is_master_copy(lkb)) { + lkb->lkb_timestamp = jiffies; + return; + } + + if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && + !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { + lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; + goto add_it; + } + if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) + goto add_it; + return; + + add_it: + DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb);); + mutex_lock(&ls->ls_timeout_mutex); + hold_lkb(lkb); + lkb->lkb_timestamp = jiffies; + list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout); + mutex_unlock(&ls->ls_timeout_mutex); +} + +static void del_timeout(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + mutex_lock(&ls->ls_timeout_mutex); + if (!list_empty(&lkb->lkb_time_list)) { + list_del_init(&lkb->lkb_time_list); + unhold_lkb(lkb); + } + mutex_unlock(&ls->ls_timeout_mutex); +} + +/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and + lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex + and then lock rsb because of lock ordering in add_timeout. We may need + to specify some special timeout-related bits in the lkb that are just to + be accessed under the timeout_mutex. */ + +void dlm_scan_timeout(struct dlm_ls *ls) +{ + struct dlm_rsb *r; + struct dlm_lkb *lkb; + int do_cancel, do_warn; + + for (;;) { + if (dlm_locking_stopped(ls)) + break; + + do_cancel = 0; + do_warn = 0; + mutex_lock(&ls->ls_timeout_mutex); + list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) { + + if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) && + time_after_eq(jiffies, lkb->lkb_timestamp + + lkb->lkb_timeout_cs * HZ/100)) + do_cancel = 1; + + if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) && + time_after_eq(jiffies, lkb->lkb_timestamp + + dlm_config.ci_timewarn_cs * HZ/100)) + do_warn = 1; + + if (!do_cancel && !do_warn) + continue; + hold_lkb(lkb); + break; + } + mutex_unlock(&ls->ls_timeout_mutex); + + if (!do_cancel && !do_warn) + break; + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + if (do_warn) { + /* clear flag so we only warn once */ + lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN; + if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT)) + del_timeout(lkb); + dlm_timeout_warn(lkb); + } + + if (do_cancel) { + log_debug(ls, "timeout cancel %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN; + lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL; + del_timeout(lkb); + _cancel_lock(r, lkb); + } + + unlock_rsb(r); + unhold_rsb(r); + dlm_put_lkb(lkb); + } +} + +/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping + dlm_recoverd before checking/setting ls_recover_begin. */ + +void dlm_adjust_timeouts(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb; + long adj = jiffies - ls->ls_recover_begin; + + ls->ls_recover_begin = 0; + mutex_lock(&ls->ls_timeout_mutex); + list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) + lkb->lkb_timestamp += adj; + mutex_unlock(&ls->ls_timeout_mutex); +} + /* lkb is master or local copy */ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) * queue for one resource. The granted mode of each lock blocks the requested * mode of the other lock." * - * Part 2: if the granted mode of lkb is preventing the first lkb in the - * convert queue from being granted, then demote lkb (set grmode to NL). - * This second form requires that we check for conv-deadlk even when - * now == 0 in _can_be_granted(). + * Part 2: if the granted mode of lkb is preventing an earlier lkb in the + * convert queue from being granted, then deadlk/demote lkb. * * Example: * Granted Queue: empty @@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) * * The first lock can't be granted because of the granted mode of the second * lock and the second lock can't be granted because it's not first in the - * list. We demote the granted mode of the second lock (the lkb passed to this - * function). + * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we + * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK + * flag set and return DEMOTED in the lksb flags. + * + * Originally, this function detected conv-deadlk in a more limited scope: + * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or + * - if lkb1 was the first entry in the queue (not just earlier), and was + * blocked by the granted mode of lkb2, and there was nothing on the + * granted queue preventing lkb1 from being granted immediately, i.e. + * lkb2 was the only thing preventing lkb1 from being granted. + * + * That second condition meant we'd only say there was conv-deadlk if + * resolving it (by demotion) would lead to the first lock on the convert + * queue being granted right away. It allowed conversion deadlocks to exist + * between locks on the convert queue while they couldn't be granted anyway. * - * After the resolution, the "grant pending" function needs to go back and try - * to grant locks on the convert queue again since the first lock can now be - * granted. + * Now, we detect and take action on conversion deadlocks immediately when + * they're created, even if they may not be immediately consequential. If + * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted + * mode that would prevent lkb1's conversion from being granted, we do a + * deadlk/demote on lkb2 right away and don't let it onto the convert queue. + * I think this means that the lkb_is_ahead condition below should always + * be zero, i.e. there will never be conv-deadlk between two locks that are + * both already on the convert queue. */ -static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) +static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2) { - struct dlm_lkb *this, *first = NULL, *self = NULL; + struct dlm_lkb *lkb1; + int lkb_is_ahead = 0; - list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { - if (!first) - first = this; - if (this == lkb) { - self = lkb; + list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) { + if (lkb1 == lkb2) { + lkb_is_ahead = 1; continue; } - if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) - return 1; - } - - /* if lkb is on the convert queue and is preventing the first - from being granted, then there's deadlock and we demote lkb. - multiple converting locks may need to do this before the first - converting lock can be granted. */ - - if (self && self != first) { - if (!modes_compat(lkb, first) && - !queue_conflict(&rsb->res_grantqueue, first)) - return 1; + if (!lkb_is_ahead) { + if (!modes_compat(lkb2, lkb1)) + return 1; + } else { + if (!modes_compat(lkb2, lkb1) && + !modes_compat(lkb1, lkb2)) + return 1; + } } - return 0; } @@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (!now && !conv && list_empty(&r->res_convertqueue) && first_in_list(lkb, &r->res_waitqueue)) return 1; - out: - /* - * The following, enabled by CONVDEADLK, departs from VMS. - */ - - if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && - conversion_deadlock_detect(r, lkb)) { - lkb->lkb_grmode = DLM_LOCK_NL; - lkb->lkb_sbflags |= DLM_SBF_DEMOTED; - } - return 0; } -/* - * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a - * simple way to provide a big optimization to applications that can use them. - */ - -static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, + int *err) { - uint32_t flags = lkb->lkb_exflags; int rv; int8_t alt = 0, rqmode = lkb->lkb_rqmode; + int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV); + + if (err) + *err = 0; rv = _can_be_granted(r, lkb, now); if (rv) goto out; - if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) + /* + * The CONVDEADLK flag is non-standard and tells the dlm to resolve + * conversion deadlocks by demoting grmode to NL, otherwise the dlm + * cancels one of the locks. + */ + + if (is_convert && can_be_queued(lkb) && + conversion_deadlock_detect(r, lkb)) { + if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) { + lkb->lkb_grmode = DLM_LOCK_NL; + lkb->lkb_sbflags |= DLM_SBF_DEMOTED; + } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { + if (err) + *err = -EDEADLK; + else { + log_print("can_be_granted deadlock %x now %d", + lkb->lkb_id, now); + dlm_dump_rsb(r); + } + } goto out; + } - if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) + /* + * The ALTPR and ALTCW flags are non-standard and tell the dlm to try + * to grant a request in a mode other than the normal rqmode. It's a + * simple way to provide a big optimization to applications that can + * use them. + */ + + if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR)) alt = DLM_LOCK_PR; - else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) + else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW)) alt = DLM_LOCK_CW; if (alt) { @@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) return rv; } +/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock + for locks pending on the convert list. Once verified (watch for these + log_prints), we should be able to just call _can_be_granted() and not + bother with the demote/deadlk cases here (and there's no easy way to deal + with a deadlk here, we'd have to generate something like grant_lock with + the deadlk error.) */ + +/* returns the highest requested mode of all blocked conversions */ + static int grant_pending_convert(struct dlm_rsb *r, int high) { struct dlm_lkb *lkb, *s; int hi, demoted, quit, grant_restart, demote_restart; + int deadlk; quit = 0; restart: @@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high) list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { demoted = is_demoted(lkb); - if (can_be_granted(r, lkb, 0)) { + deadlk = 0; + + if (can_be_granted(r, lkb, 0, &deadlk)) { grant_lock_pending(r, lkb); grant_restart = 1; - } else { - hi = max_t(int, lkb->lkb_rqmode, hi); - if (!demoted && is_demoted(lkb)) - demote_restart = 1; + continue; } + + if (!demoted && is_demoted(lkb)) { + log_print("WARN: pending demoted %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + demote_restart = 1; + continue; + } + + if (deadlk) { + log_print("WARN: pending deadlock %x node %d %s", + lkb->lkb_id, lkb->lkb_nodeid, r->res_name); + dlm_dump_rsb(r); + continue; + } + + hi = max_t(int, lkb->lkb_rqmode, hi); } if (grant_restart) @@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high) struct dlm_lkb *lkb, *s; list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { - if (can_be_granted(r, lkb, 0)) + if (can_be_granted(r, lkb, 0, NULL)) grant_lock_pending(r, lkb); else high = max_t(int, lkb->lkb_rqmode, high); @@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error) } static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, - int namelen, uint32_t parent_lkid, void *ast, + int namelen, unsigned long timeout_cs, void *ast, void *astarg, void *bast, struct dlm_args *args) { int rv = -EINVAL; @@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) goto out; - /* parent/child locks not yet supported */ - if (parent_lkid) - goto out; - if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) goto out; @@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, args->astaddr = ast; args->astparam = (long) astarg; args->bastaddr = bast; + args->timeout = timeout_cs; args->mode = mode; args->lksb = lksb; rv = 0; @@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_lksb = args->lksb; lkb->lkb_lvbptr = args->lksb->sb_lvbptr; lkb->lkb_ownpid = (int) current->pid; + lkb->lkb_timeout_cs = args->timeout; rv = 0; out: return rv; @@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) if (is_overlap(lkb)) goto out; + /* don't let scand try to do a cancel */ + del_timeout(lkb); + if (lkb->lkb_flags & DLM_IFL_RESEND) { lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; rv = -EBUSY; @@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) if (is_overlap_unlock(lkb)) goto out; + /* don't let scand try to do a cancel */ + del_timeout(lkb); + if (lkb->lkb_flags & DLM_IFL_RESEND) { lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; rv = -EBUSY; @@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error = 0; - if (can_be_granted(r, lkb, 1)) { + if (can_be_granted(r, lkb, 1, NULL)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); goto out; @@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) error = -EINPROGRESS; add_lkb(r, lkb, DLM_LKSTS_WAITING); send_blocking_asts(r, lkb); + add_timeout(lkb); goto out; } @@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error = 0; + int deadlk = 0; /* changing an existing lock may allow others to be granted */ - if (can_be_granted(r, lkb, 1)) { + if (can_be_granted(r, lkb, 1, &deadlk)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); grant_pending_locks(r); goto out; } + /* can_be_granted() detected that this lock would block in a conversion + deadlock, so we leave it on the granted queue and return EDEADLK in + the ast for the convert. */ + + if (deadlk) { + /* it's left on the granted queue */ + log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s", + lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status, + lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name); + revert_lock(r, lkb); + queue_cast(r, lkb, -EDEADLK); + error = -EDEADLK; + goto out; + } + /* is_demoted() means the can_be_granted() above set the grmode to NL, and left us on the granted queue. This auto-demotion (due to CONVDEADLK) might mean other locks, and/or this lock, are @@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) del_lkb(r, lkb); add_lkb(r, lkb, DLM_LKSTS_CONVERT); send_blocking_asts(r, lkb); + add_timeout(lkb); goto out; } @@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, if (!ls) return -EINVAL; - lock_recovery(ls); + dlm_lock_recovery(ls); if (convert) error = find_lkb(ls, lksb->sb_lkid, &lkb); @@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, if (error) goto out; - error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, + error = set_lock_args(mode, lksb, flags, namelen, 0, ast, astarg, bast, &args); if (error) goto out_put; @@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace, out_put: if (convert || error) __put_lkb(ls, lkb); - if (error == -EAGAIN) + if (error == -EAGAIN || error == -EDEADLK) error = 0; out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); dlm_put_lockspace(ls); return error; } @@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace, if (!ls) return -EINVAL; - lock_recovery(ls); + dlm_lock_recovery(ls); error = find_lkb(ls, lkid, &lkb); if (error) @@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace, out_put: dlm_put_lkb(lkb); out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); dlm_put_lockspace(ls); return error; } @@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len, pass into lowcomms_commit and a message buffer (mb) that we write our data into */ - mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); + mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); if (!mh) return -ENOBUFS; @@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) lkb->lkb_remid = ms->m_lkid; if (is_altmode(lkb)) munge_altmode(lkb, ms); - if (result) + if (result) { add_lkb(r, lkb, DLM_LKSTS_WAITING); - else { + add_timeout(lkb); + } else { grant_lock_pc(r, lkb, ms); queue_cast(r, lkb, 0); } @@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, queue_cast(r, lkb, -EAGAIN); break; + case -EDEADLK: + receive_flags_reply(lkb, ms); + revert_lock_pc(r, lkb); + queue_cast(r, lkb, -EDEADLK); + break; + case -EINPROGRESS: /* convert was queued on remote master */ receive_flags_reply(lkb, ms); @@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, munge_demoted(lkb, ms); del_lkb(r, lkb); add_lkb(r, lkb, DLM_LKSTS_CONVERT); + add_timeout(lkb); break; case 0: @@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) case -DLM_ECANCEL: receive_flags_reply(lkb, ms); revert_lock_pc(r, lkb); - if (ms->m_result) - queue_cast(r, lkb, -DLM_ECANCEL); + queue_cast(r, lkb, -DLM_ECANCEL); break; case 0: break; @@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) } } - if (lock_recovery_try(ls)) + if (dlm_lock_recovery_try(ls)) break; schedule(); } @@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) log_error(ls, "unknown message type %d", ms->m_type); } - unlock_recovery(ls); + dlm_unlock_recovery(ls); out: dlm_put_lockspace(ls); dlm_astd_wake(); @@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, uint32_t flags, void *name, unsigned int namelen, - uint32_t parent_lkid) + unsigned long timeout_cs) { struct dlm_lkb *lkb; struct dlm_args args; int error; - lock_recovery(ls); + dlm_lock_recovery(ls); error = create_lkb(ls, &lkb); if (error) { @@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, When DLM_IFL_USER is set, the dlm knows that this is a userspace lock and that lkb_astparam is the dlm_user_args structure. */ - error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, + error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); lkb->lkb_flags |= DLM_IFL_USER; ua->old_mode = DLM_LOCK_IV; @@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); spin_unlock(&ua->proc->locks_spin); out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); return error; } int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, - int mode, uint32_t flags, uint32_t lkid, char *lvb_in) + int mode, uint32_t flags, uint32_t lkid, char *lvb_in, + unsigned long timeout_cs) { struct dlm_lkb *lkb; struct dlm_args args; struct dlm_user_args *ua; int error; - lock_recovery(ls); + dlm_lock_recovery(ls); error = find_lkb(ls, lkid, &lkb); if (error) @@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (lvb_in && ua->lksb.sb_lvbptr) memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); + ua->xid = ua_tmp->xid; ua->castparam = ua_tmp->castparam; ua->castaddr = ua_tmp->castaddr; ua->bastparam = ua_tmp->bastparam; @@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua->user_lksb = ua_tmp->user_lksb; ua->old_mode = lkb->lkb_grmode; - error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, - ua, DLM_FAKE_USER_AST, &args); + error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs, + DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); if (error) goto out_put; error = convert_lock(ls, lkb, &args); - if (error == -EINPROGRESS || error == -EAGAIN) + if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK) error = 0; out_put: dlm_put_lkb(lkb); out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); kfree(ua_tmp); return error; } @@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, struct dlm_user_args *ua; int error; - lock_recovery(ls); + dlm_lock_recovery(ls); error = find_lkb(ls, lkid, &lkb); if (error) @@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, out_put: dlm_put_lkb(lkb); out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); kfree(ua_tmp); return error; } @@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, struct dlm_user_args *ua; int error; - lock_recovery(ls); + dlm_lock_recovery(ls); error = find_lkb(ls, lkid, &lkb); if (error) @@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, out_put: dlm_put_lkb(lkb); out: - unlock_recovery(ls); + dlm_unlock_recovery(ls); kfree(ua_tmp); return error; } +int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) +{ + struct dlm_lkb *lkb; + struct dlm_args args; + struct dlm_user_args *ua; + struct dlm_rsb *r; + int error; + + dlm_lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + ua = (struct dlm_user_args *)lkb->lkb_astparam; + + error = set_unlock_args(flags, ua, &args); + if (error) + goto out_put; + + /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */ + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + error = validate_unlock_args(lkb, &args); + if (error) + goto out_r; + lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL; + + error = _cancel_lock(r, lkb); + out_r: + unlock_rsb(r); + put_rsb(r); + + if (error == -DLM_ECANCEL) + error = 0; + /* from validate_unlock_args() */ + if (error == -EBUSY) + error = 0; + out_put: + dlm_put_lkb(lkb); + out: + dlm_unlock_recovery(ls); + return error; +} + /* lkb's that are removed from the waiters list by revert are just left on the orphans list with the granted orphan locks, to be freed by purge */ @@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) { struct dlm_lkb *lkb, *safe; - lock_recovery(ls); + dlm_lock_recovery(ls); while (1) { lkb = del_proc_lock(ls, proc); if (!lkb) break; + del_timeout(lkb); if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) orphan_proc_lock(ls, lkb); else @@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) } mutex_unlock(&ls->ls_clear_proc_locks); - unlock_recovery(ls); + dlm_unlock_recovery(ls); } static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) @@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, if (nodeid != dlm_our_nodeid()) { error = send_purge(ls, nodeid, pid); } else { - lock_recovery(ls); + dlm_lock_recovery(ls); if (pid == current->pid) purge_proc_locks(ls, proc); else do_purge(ls, nodeid, pid); - unlock_recovery(ls); + dlm_unlock_recovery(ls); } return error; } diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 64fc4ec40668..1720313c22df 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r); void dlm_hold_rsb(struct dlm_rsb *r); int dlm_put_lkb(struct dlm_lkb *lkb); void dlm_scan_rsbs(struct dlm_ls *ls); +int dlm_lock_recovery_try(struct dlm_ls *ls); +void dlm_unlock_recovery(struct dlm_ls *ls); +void dlm_scan_timeout(struct dlm_ls *ls); +void dlm_adjust_timeouts(struct dlm_ls *ls); int dlm_purge_locks(struct dlm_ls *ls); void dlm_purge_mstcpy_locks(struct dlm_rsb *r); @@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, - uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); + uint32_t flags, void *name, unsigned int namelen, + unsigned long timeout_cs); int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, - int mode, uint32_t flags, uint32_t lkid, char *lvb_in); + int mode, uint32_t flags, uint32_t lkid, char *lvb_in, + unsigned long timeout_cs); int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in); int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid); int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, int nodeid, int pid); +int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid); void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); static inline int is_master(struct dlm_rsb *r) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index a677b2a5eed4..1dc72105ab12 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in) else kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); + log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving"); + + /* dlm_controld will see the uevent, do the necessary group management + and then write to sysfs to wake us */ + error = wait_event_interruptible(ls->ls_uevent_wait, test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); + + log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result); + if (error) goto out; error = ls->ls_uevent_result; out: + if (error) + log_error(ls, "group %s failed %d %d", in ? "join" : "leave", + error, ls->ls_uevent_result); return error; } @@ -234,8 +245,13 @@ static int dlm_scand(void *data) struct dlm_ls *ls; while (!kthread_should_stop()) { - list_for_each_entry(ls, &lslist, ls_list) - dlm_scan_rsbs(ls); + list_for_each_entry(ls, &lslist, ls_list) { + if (dlm_lock_recovery_try(ls)) { + dlm_scan_rsbs(ls); + dlm_scan_timeout(ls); + dlm_unlock_recovery(ls); + } + } schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); } return 0; @@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, { struct dlm_ls *ls; int i, size, error = -ENOMEM; + int do_unreg = 0; if (namelen > DLM_LOCKSPACE_LEN) return -EINVAL; @@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace, goto out; memcpy(ls->ls_name, name, namelen); ls->ls_namelen = namelen; - ls->ls_exflags = flags; ls->ls_lvblen = lvblen; ls->ls_count = 0; ls->ls_flags = 0; + if (flags & DLM_LSFL_TIMEWARN) + set_bit(LSFL_TIMEWARN, &ls->ls_flags); + + if (flags & DLM_LSFL_FS) + ls->ls_allocation = GFP_NOFS; + else + ls->ls_allocation = GFP_KERNEL; + + /* ls_exflags are forced to match among nodes, and we don't + need to require all nodes to have TIMEWARN or FS set */ + ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS)); + size = dlm_config.ci_rsbtbl_size; ls->ls_rsbtbl_size = size; @@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, mutex_init(&ls->ls_waiters_mutex); INIT_LIST_HEAD(&ls->ls_orphans); mutex_init(&ls->ls_orphans_mutex); + INIT_LIST_HEAD(&ls->ls_timeout); + mutex_init(&ls->ls_timeout_mutex); INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); @@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, init_waitqueue_head(&ls->ls_uevent_wait); ls->ls_uevent_result = 0; + init_completion(&ls->ls_members_done); + ls->ls_members_result = -1; ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); @@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace, error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); - goto out_rcomfree; + goto out_delist; } - dlm_create_debug_file(ls); - error = kobject_setup(ls); if (error) - goto out_del; + goto out_stop; error = kobject_register(&ls->ls_kobj); if (error) - goto out_del; + goto out_stop; + + /* let kobject handle freeing of ls if there's an error */ + do_unreg = 1; + + /* This uevent triggers dlm_controld in userspace to add us to the + group of nodes that are members of this lockspace (managed by the + cluster infrastructure.) Once it's done that, it tells us who the + current lockspace members are (via configfs) and then tells the + lockspace to start running (via sysfs) in dlm_ls_start(). */ error = do_uevent(ls, 1); if (error) - goto out_unreg; + goto out_stop; + + wait_for_completion(&ls->ls_members_done); + error = ls->ls_members_result; + if (error) + goto out_members; + + dlm_create_debug_file(ls); + + log_debug(ls, "join complete"); *lockspace = ls; return 0; - out_unreg: - kobject_unregister(&ls->ls_kobj); - out_del: - dlm_delete_debug_file(ls); + out_members: + do_uevent(ls, 0); + dlm_clear_members(ls); + kfree(ls->ls_node_array); + out_stop: dlm_recoverd_stop(ls); - out_rcomfree: + out_delist: spin_lock(&lslist_lock); list_del(&ls->ls_list); spin_unlock(&lslist_lock); @@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace, out_rsbfree: kfree(ls->ls_rsbtbl); out_lsfree: - kfree(ls); + if (do_unreg) + kobject_unregister(&ls->ls_kobj); + else + kfree(ls); out: module_put(THIS_MODULE); return error; @@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, error = new_lockspace(name, namelen, lockspace, flags, lvblen); if (!error) ls_count++; + else if (!ls_count) + threads_stop(); out: mutex_unlock(&ls_lock); return error; @@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_clear_members_gone(ls); kfree(ls->ls_node_array); kobject_unregister(&ls->ls_kobj); - /* The ls structure will be freed when the kobject is done with */ + /* The ls structure will be freed when the kobject is done with */ mutex_lock(&ls_lock); ls_count--; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 27970a58d29b..0553a6158dcb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) static void lowcomms_data_ready(struct sock *sk, int count_unused) { struct connection *con = sock2con(sk); - if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) + if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) queue_work(recv_workqueue, &con->rwork); } @@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk) { struct connection *con = sock2con(sk); - if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) queue_work(send_workqueue, &con->swork); } @@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con) INIT_WORK(&othercon->rwork, process_recv_sockets); set_bit(CF_IS_OTHERCON, &othercon->flags); newcon->othercon = othercon; + othercon->sock = newsock; + newsock->sk->sk_user_data = othercon; + add_sock(newsock, othercon); + addcon = othercon; + } + else { + printk("Extra connection from node %d attempted\n", nodeid); + result = -EAGAIN; + mutex_unlock(&newcon->sock_mutex); + goto accept_err; } - othercon->sock = newsock; - newsock->sk->sk_user_data = othercon; - add_sock(newsock, othercon); - addcon = othercon; } else { newsock->sk->sk_user_data = newcon; @@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void) down(&connections_lock); for (i = 0; i <= max_nodeid; i++) { con = __nodeid2con(i, 0); - if (con) + if (con) { con->flags |= 0xFF; + if (con->sock) + con->sock->sk->sk_user_data = NULL; + } } up(&connections_lock); diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 162fbae58fe5..eca2907f2386 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void); static inline int dlm_register_debugfs(void) { return 0; } static inline void dlm_unregister_debugfs(void) { } #endif +int dlm_netlink_init(void); +void dlm_netlink_exit(void); static int __init init_dlm(void) { @@ -50,10 +52,16 @@ static int __init init_dlm(void) if (error) goto out_debug; + error = dlm_netlink_init(); + if (error) + goto out_user; + printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; + out_user: + dlm_user_exit(); out_debug: dlm_unregister_debugfs(); out_config: @@ -68,6 +76,7 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { + dlm_netlink_exit(); dlm_user_exit(); dlm_config_exit(); dlm_memory_exit(); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 85e2897bd740..073599dced2a 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) *neg_out = neg; error = ping_members(ls); + if (!error || error == -EPROTO) { + /* new_lockspace() may be waiting to know if the config + is good or bad */ + ls->ls_members_result = error; + complete(&ls->ls_members_done); + } if (error) goto out; @@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls) dlm_recoverd_suspend(ls); ls->ls_recover_status = 0; dlm_recoverd_resume(ls); + + if (!ls->ls_recover_begin) + ls->ls_recover_begin = jiffies; return 0; } diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c new file mode 100644 index 000000000000..863b87d0dc71 --- /dev/null +++ b/fs/dlm/netlink.c @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include <net/genetlink.h> +#include <linux/dlm.h> +#include <linux/dlm_netlink.h> + +#include "dlm_internal.h" + +static uint32_t dlm_nl_seqnum; +static uint32_t listener_nlpid; + +static struct genl_family family = { + .id = GENL_ID_GENERATE, + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, +}; + +static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) +{ + struct sk_buff *skb; + void *data; + + skb = genlmsg_new(size, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* add the message headers */ + data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd); + if (!data) { + nlmsg_free(skb); + return -EINVAL; + } + + *skbp = skb; + return 0; +} + +static struct dlm_lock_data *mk_data(struct sk_buff *skb) +{ + struct nlattr *ret; + + ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data)); + if (!ret) + return NULL; + return nla_data(ret); +} + +static int send_data(struct sk_buff *skb) +{ + struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); + void *data = genlmsg_data(genlhdr); + int rv; + + rv = genlmsg_end(skb, data); + if (rv < 0) { + nlmsg_free(skb); + return rv; + } + + return genlmsg_unicast(skb, listener_nlpid); +} + +static int user_cmd(struct sk_buff *skb, struct genl_info *info) +{ + listener_nlpid = info->snd_pid; + printk("user_cmd nlpid %u\n", listener_nlpid); + return 0; +} + +static struct genl_ops dlm_nl_ops = { + .cmd = DLM_CMD_HELLO, + .doit = user_cmd, +}; + +int dlm_netlink_init(void) +{ + int rv; + + rv = genl_register_family(&family); + if (rv) + return rv; + + rv = genl_register_ops(&family, &dlm_nl_ops); + if (rv < 0) + goto err; + return 0; + err: + genl_unregister_family(&family); + return rv; +} + +void dlm_netlink_exit(void) +{ + genl_unregister_ops(&family, &dlm_nl_ops); + genl_unregister_family(&family); +} + +static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb) +{ + struct dlm_rsb *r = lkb->lkb_resource; + struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam; + + memset(data, 0, sizeof(struct dlm_lock_data)); + + data->version = DLM_LOCK_DATA_VERSION; + data->nodeid = lkb->lkb_nodeid; + data->ownpid = lkb->lkb_ownpid; + data->id = lkb->lkb_id; + data->remid = lkb->lkb_remid; + data->status = lkb->lkb_status; + data->grmode = lkb->lkb_grmode; + data->rqmode = lkb->lkb_rqmode; + data->timestamp = lkb->lkb_timestamp; + if (ua) + data->xid = ua->xid; + if (r) { + data->lockspace_id = r->res_ls->ls_global_id; + data->resource_namelen = r->res_length; + memcpy(data->resource_name, r->res_name, r->res_length); + } +} + +void dlm_timeout_warn(struct dlm_lkb *lkb) +{ + struct dlm_lock_data *data; + struct sk_buff *send_skb; + size_t size; + int rv; + + size = nla_total_size(sizeof(struct dlm_lock_data)) + + nla_total_size(0); /* why this? */ + + rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size); + if (rv < 0) + return; + + data = mk_data(send_skb); + if (!data) { + nlmsg_free(send_skb); + return; + } + + fill_data(data, lkb); + + send_data(send_skb); +} + diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 6bfbd6153809..e3a1527cbdbe 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, char *mb; int mb_len = sizeof(struct dlm_rcom) + len; - mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); + mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); if (!mh) { log_print("create_rcom to %d type %d len %d ENOBUFS", to_nodeid, type, len); @@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) log_error(ls, "version mismatch: %x nodeid %d: %x", DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, rc->rc_header.h_version); - return -EINVAL; + return -EPROTO; } if (rf->rf_lvblen != ls->ls_lvblen || @@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", ls->ls_lvblen, ls->ls_exflags, nodeid, rf->rf_lvblen, rf->rf_lsflags); - return -EINVAL; + return -EPROTO; } return 0; } @@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) dlm_recover_process_copy(ls, rc_in); } -static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) +static int send_ls_not_ready(struct dlm_ls *ls, int nodeid, + struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct rcom_config *rf; @@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) char *mb; int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); - mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); + mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb); if (!mh) return -ENOBUFS; memset(mb, 0, mb_len); @@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) log_print("lockspace %x from %d type %x not found", hd->h_lockspace, nodeid, rc->rc_type); if (rc->rc_type == DLM_RCOM_STATUS) - send_ls_not_ready(nodeid, rc); + send_ls_not_ready(ls, nodeid, rc); return; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 3cb636d60249..66575997861c 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_clear_members_gone(ls); + dlm_adjust_timeouts(ls); + error = enable_locking(ls, rv->seq); if (error) { log_debug(ls, "enable_locking failed %d", error); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index b0201ec325a7..6438941ab1f8 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -33,16 +33,17 @@ static const struct file_operations device_fops; struct dlm_lock_params32 { __u8 mode; __u8 namelen; - __u16 flags; + __u16 unused; + __u32 flags; __u32 lkid; __u32 parent; - + __u64 xid; + __u64 timeout; __u32 castparam; __u32 castaddr; __u32 bastparam; __u32 bastaddr; __u32 lksb; - char lvb[DLM_USER_LVB_LEN]; char name[0]; }; @@ -68,6 +69,7 @@ struct dlm_lksb32 { }; struct dlm_lock_result32 { + __u32 version[3]; __u32 length; __u32 user_astaddr; __u32 user_astparam; @@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.flags = kb32->i.lock.flags; kb->i.lock.lkid = kb32->i.lock.lkid; kb->i.lock.parent = kb32->i.lock.parent; + kb->i.lock.xid = kb32->i.lock.xid; + kb->i.lock.timeout = kb32->i.lock.timeout; kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; @@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb, static void compat_output(struct dlm_lock_result *res, struct dlm_lock_result32 *res32) { + res32->version[0] = res->version[0]; + res32->version[1] = res->version[1]; + res32->version[2] = res->version[2]; + res32->user_astaddr = (__u32)(long)res->user_astaddr; res32->user_astparam = (__u32)(long)res->user_astparam; res32->user_lksb = (__u32)(long)res->user_lksb; @@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res, } #endif +/* Figure out if this lock is at the end of its life and no longer + available for the application to use. The lkb still exists until + the final ast is read. A lock becomes EOL in three situations: + 1. a noqueue request fails with EAGAIN + 2. an unlock completes with EUNLOCK + 3. a cancel of a waiting request completes with ECANCEL/EDEADLK + An EOL lock needs to be removed from the process's list of locks. + And we can't allow any new operation on an EOL lock. This is + not related to the lifetime of the lkb struct which is managed + entirely by refcount. */ + +static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type) +{ + switch (sb_status) { + case -DLM_EUNLOCK: + return 1; + case -DLM_ECANCEL: + case -ETIMEDOUT: + case -EDEADLK: + if (lkb->lkb_grmode == DLM_LOCK_IV) + return 1; + break; + case -EAGAIN: + if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV) + return 1; + break; + } + return 0; +} + /* we could possibly check if the cancel of an orphan has resulted in the lkb being removed and then remove that lkb from the orphans list and free it */ @@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) log_debug(ls, "ast overlap %x status %x %x", lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); - /* Figure out if this lock is at the end of its life and no longer - available for the application to use. The lkb still exists until - the final ast is read. A lock becomes EOL in three situations: - 1. a noqueue request fails with EAGAIN - 2. an unlock completes with EUNLOCK - 3. a cancel of a waiting request completes with ECANCEL - An EOL lock needs to be removed from the process's list of locks. - And we can't allow any new operation on an EOL lock. This is - not related to the lifetime of the lkb struct which is managed - entirely by refcount. */ - - if (type == AST_COMP && - lkb->lkb_grmode == DLM_LOCK_IV && - ua->lksb.sb_status == -EAGAIN) - eol = 1; - else if (ua->lksb.sb_status == -DLM_EUNLOCK || - (ua->lksb.sb_status == -DLM_ECANCEL && - lkb->lkb_grmode == DLM_LOCK_IV)) - eol = 1; + eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type); if (eol) { lkb->lkb_ast_type &= ~AST_BAST; lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; @@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc, ua->castaddr = params->castaddr; ua->bastparam = params->bastparam; ua->bastaddr = params->bastaddr; + ua->xid = params->xid; if (params->flags & DLM_LKF_CONVERT) error = dlm_user_convert(ls, ua, params->mode, params->flags, - params->lkid, params->lvb); + params->lkid, params->lvb, + (unsigned long) params->timeout); else { error = dlm_user_request(ls, ua, params->mode, params->flags, params->name, params->namelen, - params->parent); + (unsigned long) params->timeout); if (!error) error = ua->lksb.sb_lkid; } @@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc, return error; } +static int device_user_deadlock(struct dlm_user_proc *proc, + struct dlm_lock_params *params) +{ + struct dlm_ls *ls; + int error; + + ls = dlm_find_lockspace_local(proc->lockspace); + if (!ls) + return -ENOENT; + + error = dlm_user_deadlock(ls, params->flags, params->lkid); + + dlm_put_lockspace(ls); + return error; +} + static int create_misc_device(struct dlm_ls *ls, char *name) { int error, len; @@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) return -EPERM; error = dlm_new_lockspace(params->name, strlen(params->name), - &lockspace, 0, DLM_USER_LVB_LEN); + &lockspace, params->flags, DLM_USER_LVB_LEN); if (error) return error; @@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf, error = device_user_unlock(proc, &kbuf->i.lock); break; + case DLM_USER_DEADLOCK: + if (!proc) { + log_print("no locking on control device"); + goto out_sig; + } + error = device_user_deadlock(proc, &kbuf->i.lock); + break; + case DLM_USER_CREATE_LOCKSPACE: if (proc) { log_print("create/remove only on control device"); @@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, int struct_len; memset(&result, 0, sizeof(struct dlm_lock_result)); + result.version[0] = DLM_DEVICE_VERSION_MAJOR; + result.version[1] = DLM_DEVICE_VERSION_MINOR; + result.version[2] = DLM_DEVICE_VERSION_PATCH; memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); result.user_lksb = ua->user_lksb; @@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, return error; } +static int copy_version_to_user(char __user *buf, size_t count) +{ + struct dlm_device_version ver; + + memset(&ver, 0, sizeof(struct dlm_device_version)); + ver.version[0] = DLM_DEVICE_VERSION_MAJOR; + ver.version[1] = DLM_DEVICE_VERSION_MINOR; + ver.version[2] = DLM_DEVICE_VERSION_PATCH; + + if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version))) + return -EFAULT; + return sizeof(struct dlm_device_version); +} + /* a read returns a single ast described in a struct dlm_lock_result */ static ssize_t device_read(struct file *file, char __user *buf, size_t count, @@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, DECLARE_WAITQUEUE(wait, current); int error, type=0, bmode=0, removed = 0; + if (count == sizeof(struct dlm_device_version)) { + error = copy_version_to_user(buf, count); + return error; + } + + if (!proc) { + log_print("non-version read from control device %zu", count); + return -EINVAL; + } + #ifdef CONFIG_COMPAT if (count < sizeof(struct dlm_lock_result32)) #else @@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, } } - if (list_empty(&proc->asts)) { - spin_unlock(&proc->asts_spin); - return -EAGAIN; - } - /* there may be both completion and blocking asts to return for the lkb, don't remove lkb from asts list unless no asts remain */ @@ -823,6 +891,7 @@ static const struct file_operations device_fops = { static const struct file_operations ctl_device_fops = { .open = ctl_device_open, .release = ctl_device_close, + .read = device_read, .write = device_write, .owner = THIS_MODULE, }; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 403e3bad1455..1b9dd9a96f19 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -580,5 +580,7 @@ void ecryptfs_write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); +int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, + int num_zeros); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 59288d817078..94f456fe4d9b 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) return rc; } -static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, - size_t count, read_actor_t actor, void *target) +static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) { struct file *lower_file = NULL; int rc = -EINVAL; lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->sendfile) - rc = lower_file->f_op->sendfile(lower_file, ppos, count, - actor, target); + if (lower_file->f_op && lower_file->f_op->splice_read) + rc = lower_file->f_op->splice_read(lower_file, ppos, pipe, + count, flags); return rc; } @@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = { .release = ecryptfs_release, .fsync = ecryptfs_fsync, .fasync = ecryptfs_fasync, - .sendfile = ecryptfs_sendfile, + .splice_read = ecryptfs_splice_read, }; const struct file_operations ecryptfs_main_fops = { @@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = { .release = ecryptfs_release, .fsync = ecryptfs_fsync, .fasync = ecryptfs_fasync, - .sendfile = ecryptfs_sendfile, + .splice_read = ecryptfs_splice_read, }; static int diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1548be26b5e6..83e94fedd4e9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -800,6 +800,25 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) goto out_fput; } } else { /* new_length < i_size_read(inode) */ + pgoff_t index = 0; + int end_pos_in_page = -1; + + if (new_length != 0) { + index = ((new_length - 1) >> PAGE_CACHE_SHIFT); + end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); + } + if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) { + if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file, + index, + (end_pos_in_page + 1), + ((PAGE_CACHE_SIZE - 1) + - end_pos_in_page)))) { + printk(KERN_ERR "Error attempting to zero out " + "the remainder of the end page on " + "reducing truncate; rc = [%d]\n", rc); + goto out_fput; + } + } vmtruncate(inode, new_length); rc = ecryptfs_write_inode_size_to_metadata( lower_file, lower_dentry->d_inode, inode, dentry, @@ -875,9 +894,54 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) struct ecryptfs_crypt_stat *crypt_stat; crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; - lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + ecryptfs_init_crypt_stat(crypt_stat); inode = dentry->d_inode; lower_inode = ecryptfs_inode_to_lower(inode); + lower_dentry = ecryptfs_dentry_to_lower(dentry); + mutex_lock(&crypt_stat->cs_mutex); + if (S_ISDIR(dentry->d_inode->i_mode)) + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) + || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { + struct vfsmount *lower_mnt; + struct file *lower_file = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + int lower_flags; + + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_flags = O_RDONLY; + if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, + lower_mnt, lower_flags))) { + printk(KERN_ERR + "Error opening lower file; rc = [%d]\n", rc); + mutex_unlock(&crypt_stat->cs_mutex); + goto out; + } + mount_crypt_stat = &ecryptfs_superblock_to_private( + dentry->d_sb)->mount_crypt_stat; + if ((rc = ecryptfs_read_metadata(dentry, lower_file))) { + if (!(mount_crypt_stat->flags + & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { + rc = -EIO; + printk(KERN_WARNING "Attempt to read file that " + "is not in a valid eCryptfs format, " + "and plaintext passthrough mode is not " + "enabled; returning -EIO\n"); + + mutex_unlock(&crypt_stat->cs_mutex); + fput(lower_file); + goto out; + } + rc = 0; + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); + fput(lower_file); + goto out; + } + fput(lower_file); + } + mutex_unlock(&crypt_stat->cs_mutex); if (ia->ia_valid & ATTR_SIZE) { ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 606128f5c927..02ca6f1e55d7 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -840,8 +840,6 @@ static int __init ecryptfs_init(void) goto out; } kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); - sysfs_attr_version.attr.owner = THIS_MODULE; - sysfs_attr_version_str.attr.owner = THIS_MODULE; rc = do_sysfs_registration(); if (rc) { printk(KERN_ERR "sysfs registration failed\n"); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 55cec98a84e7..7d5a43cb0d5c 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -56,9 +56,6 @@ static struct page *ecryptfs_get1page(struct file *file, int index) return read_mapping_page(mapping, index, (void *)file); } -static -int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); - /** * ecryptfs_fill_zeros * @file: The ecryptfs file @@ -101,10 +98,13 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) if (old_end_page_index == new_end_page_index) { /* Start and end are in the same page; we just need to * set a portion of the existing page to zero's */ - rc = write_zeros(file, index, (old_end_pos_in_page + 1), - (new_end_pos_in_page - old_end_pos_in_page)); + rc = ecryptfs_write_zeros(file, index, + (old_end_pos_in_page + 1), + (new_end_pos_in_page + - old_end_pos_in_page)); if (rc) - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" + "file=[%p], " "index=[0x%.16x], " "old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - new_end_pos_in_page" @@ -117,10 +117,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) goto out; } /* Fill the remainder of the previous last page with zeros */ - rc = write_zeros(file, index, (old_end_pos_in_page + 1), + rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1), ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], " "index=[0x%.16x], old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " "returned [%d]\n", file, index, @@ -131,9 +131,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) index++; while (index < new_end_page_index) { /* Fill all intermediate pages with zeros */ - rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" + "file=[%p], " "index=[0x%.16x], " "old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - new_end_pos_in_page" @@ -149,9 +150,9 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) } /* Fill the portion at the beginning of the last new page with * zero's */ - rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); + rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1)); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=" + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=" "[%p], index=[0x%.16x], 0, " "new_end_pos_in_page=[%d]" "returned [%d]\n", file, index, @@ -400,7 +401,6 @@ out: static int ecryptfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - loff_t pos; int rc = 0; if (from == 0 && to == PAGE_CACHE_SIZE) @@ -408,15 +408,22 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, up to date. */ if (!PageUptodate(page)) rc = ecryptfs_do_readpage(file, page, page->index); - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (pos > i_size_read(page->mapping->host)) { - rc = ecryptfs_truncate(file->f_path.dentry, pos); - if (rc) { - printk(KERN_ERR "Error on attempt to " - "truncate to (higher) offset [%lld];" - " rc = [%d]\n", pos, rc); - goto out; + if (page->index != 0) { + loff_t end_of_prev_pg_pos = + (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1); + + if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) { + rc = ecryptfs_truncate(file->f_path.dentry, + end_of_prev_pg_pos); + if (rc) { + printk(KERN_ERR "Error on attempt to " + "truncate to (higher) offset [%lld];" + " rc = [%d]\n", end_of_prev_pg_pos, rc); + goto out; + } } + if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host)) + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); } out: return rc; @@ -753,7 +760,7 @@ out: } /** - * write_zeros + * ecryptfs_write_zeros * @file: The ecryptfs file * @index: The index in which we are writing * @start: The position after the last block of data @@ -763,8 +770,8 @@ out: * * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE */ -static -int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) +int +ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) { int rc = 0; struct page *tmp_page; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 566d4e2d3852..04afeecaaef3 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = { .open = generic_file_open, .release = ext2_release_file, .fsync = ext2_sync_file, - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = { .open = generic_file_open, .release = ext2_release_file, .fsync = ext2_sync_file, - .sendfile = xip_file_sendfile, }; #endif diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 16337bff0272..5de5061eb331 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1038,6 +1038,15 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset + EXT2_MOUNT_XIP if not */ + + if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { + printk("XIP: Unsupported blocksize\n"); + err = -EINVAL; + goto restore_opts; + } + es = sbi->s_es; if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != (old_mount_opt & EXT2_MOUNT_XIP)) && diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 1e6f13864536..acc4913d3019 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = { .open = generic_file_open, .release = ext3_release_file, .fsync = ext3_sync_file, - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a6cb6171c3af..2a85ddee4740 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2677,8 +2677,10 @@ void ext3_read_inode(struct inode * inode) */ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT3_INODE_SIZE(inode->i_sb)) + EXT3_INODE_SIZE(inode->i_sb)) { + brelse (bh); goto bad_inode; + } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext3_inode) - diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8a23483ca8d0..3b64bb16c727 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -30,15 +30,15 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned long *blockgrpp, ext4_grpblk_t *offsetp) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_grpblk_t offset; - blocknr = blocknr - le32_to_cpu(es->s_first_data_block); + blocknr = blocknr - le32_to_cpu(es->s_first_data_block); offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); if (offsetp) *offsetp = offset; if (blockgrpp) - *blockgrpp = blocknr; + *blockgrpp = blocknr; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a0f0c04e79b2..b9ce24129070 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -374,7 +374,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc le32_to_cpu(ix[-1].ei_block)); } BUG_ON(k && le32_to_cpu(ix->ei_block) - <= le32_to_cpu(ix[-1].ei_block)); + <= le32_to_cpu(ix[-1].ei_block)); if (block < le32_to_cpu(ix->ei_block)) break; chix = ix; @@ -423,8 +423,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) path->p_ext = l - 1; ext_debug(" -> %d:%llu:%d ", - le32_to_cpu(path->p_ext->ee_block), - ext_pblock(path->p_ext), + le32_to_cpu(path->p_ext->ee_block), + ext_pblock(path->p_ext), le16_to_cpu(path->p_ext->ee_len)); #ifdef CHECK_BINSEARCH @@ -435,7 +435,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) chex = ex = EXT_FIRST_EXTENT(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { BUG_ON(k && le32_to_cpu(ex->ee_block) - <= le32_to_cpu(ex[-1].ee_block)); + <= le32_to_cpu(ex[-1].ee_block)); if (block < le32_to_cpu(ex->ee_block)) break; chex = ex; @@ -577,7 +577,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max)); + > le16_to_cpu(curp->p_hdr->eh_max)); BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); err = ext4_ext_dirty(handle, inode, curp); @@ -621,12 +621,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, border = path[depth].p_ext[1].ee_block; ext_debug("leaf will be split." " next leaf starts at %d\n", - le32_to_cpu(border)); + le32_to_cpu(border)); } else { border = newext->ee_block; ext_debug("leaf will be added." " next leaf starts at %d\n", - le32_to_cpu(border)); + le32_to_cpu(border)); } /* @@ -684,9 +684,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, while (path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)) { ext_debug("move %d:%llu:%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext_pblock(path[depth].p_ext), - le16_to_cpu(path[depth].p_ext->ee_len), + le32_to_cpu(path[depth].p_ext->ee_block), + ext_pblock(path[depth].p_ext), + le16_to_cpu(path[depth].p_ext->ee_len), newblock); /*memmove(ex++, path[depth].p_ext++, sizeof(struct ext4_extent)); @@ -765,9 +765,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT_LAST_INDEX(path[i].p_hdr)); while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ext_debug("%d: move %d:%d in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - idx_pblock(path[i].p_idx), - newblock); + le32_to_cpu(path[i].p_idx->ei_block), + idx_pblock(path[i].p_idx), + newblock); /*memmove(++fidx, path[i].p_idx++, sizeof(struct ext4_extent_idx)); neh->eh_entries++; @@ -1128,6 +1128,55 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, } /* + * check if a portion of the "newext" extent overlaps with an + * existing extent. + * + * If there is an overlap discovered, it updates the length of the newext + * such that there will be no overlap, and then returns 1. + * If there is no overlap found, it returns 0. + */ +unsigned int ext4_ext_check_overlap(struct inode *inode, + struct ext4_extent *newext, + struct ext4_ext_path *path) +{ + unsigned long b1, b2; + unsigned int depth, len1; + unsigned int ret = 0; + + b1 = le32_to_cpu(newext->ee_block); + len1 = le16_to_cpu(newext->ee_len); + depth = ext_depth(inode); + if (!path[depth].p_ext) + goto out; + b2 = le32_to_cpu(path[depth].p_ext->ee_block); + + /* + * get the next allocated block if the extent in the path + * is before the requested block(s) + */ + if (b2 < b1) { + b2 = ext4_ext_next_allocated_block(path); + if (b2 == EXT_MAX_BLOCK) + goto out; + } + + /* check for wrap through zero */ + if (b1 + len1 < b1) { + len1 = EXT_MAX_BLOCK - b1; + newext->ee_len = cpu_to_le16(len1); + ret = 1; + } + + /* check for overlap */ + if (b1 + len1 > b2) { + newext->ee_len = cpu_to_le16(b2 - b1); + ret = 1; + } +out: + return ret; +} + +/* * ext4_ext_insert_extent: * tries to merge requsted extent into the existing extent or * inserts requested extent as new one into the tree, @@ -1212,12 +1261,12 @@ has_space: if (!nearex) { /* there is no extent in this leaf, create first one */ ext_debug("first extent in the leaf: %d:%llu:%d\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len)); + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len)); path[depth].p_ext = EXT_FIRST_EXTENT(eh); } else if (le32_to_cpu(newext->ee_block) - > le32_to_cpu(nearex->ee_block)) { + > le32_to_cpu(nearex->ee_block)) { /* BUG_ON(newext->ee_block == nearex->ee_block); */ if (nearex != EXT_LAST_EXTENT(eh)) { len = EXT_MAX_EXTENT(eh) - nearex; @@ -1225,9 +1274,9 @@ has_space: len = len < 0 ? 0 : len; ext_debug("insert %d:%llu:%d after: nearest 0x%p, " "move %d from 0x%p to 0x%p\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len), + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 2, nearex + 1, len); } @@ -1358,9 +1407,9 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, cbex.ec_start = 0; cbex.ec_type = EXT4_EXT_CACHE_GAP; } else { - cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = le16_to_cpu(ex->ee_len); - cbex.ec_start = ext_pblock(ex); + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_start = ext_pblock(ex); cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } @@ -1431,16 +1480,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, len = le32_to_cpu(ex->ee_block) - block; ext_debug("cache gap(before): %lu [%lu:%lu]", (unsigned long) block, - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len)); + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len)); } else if (block >= le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len)) { - lblock = le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len); + + le16_to_cpu(ex->ee_len)) { + lblock = le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len); len = ext4_ext_next_allocated_block(path); ext_debug("cache gap(after): [%lu:%lu] %lu", - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len), (unsigned long) block); BUG_ON(len == lblock); len = len - lblock; @@ -1468,9 +1517,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block, BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { - ex->ee_block = cpu_to_le32(cex->ec_block); + ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); + ex->ee_len = cpu_to_le16(cex->ec_len); ext_debug("%lu cached by %lu:%lu:%llu\n", (unsigned long) block, (unsigned long) cex->ec_block, @@ -1956,9 +2005,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* we should allocate requested block */ } else if (goal == EXT4_EXT_CACHE_EXTENT) { /* block is already allocated */ - newblock = iblock - - le32_to_cpu(newex.ee_block) - + ext_pblock(&newex); + newblock = iblock + - le32_to_cpu(newex.ee_block) + + ext_pblock(&newex); /* number of remaining blocks in the extent */ allocated = le16_to_cpu(newex.ee_len) - (iblock - le32_to_cpu(newex.ee_block)); @@ -1987,7 +2036,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ex = path[depth].p_ext; if (ex) { - unsigned long ee_block = le32_to_cpu(ex->ee_block); + unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); unsigned short ee_len = le16_to_cpu(ex->ee_len); @@ -2000,7 +2049,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (ee_len > EXT_MAX_LEN) goto out2; /* if found extent covers block, simply return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { + if (iblock >= ee_block && iblock < ee_block + ee_len) { newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); @@ -2031,7 +2080,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* allocate new block */ goal = ext4_ext_find_goal(inode, path, iblock); - allocated = max_blocks; + + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ + newex.ee_block = cpu_to_le32(iblock); + newex.ee_len = cpu_to_le16(max_blocks); + err = ext4_ext_check_overlap(inode, &newex, path); + if (err) + allocated = le16_to_cpu(newex.ee_len); + else + allocated = max_blocks; newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); if (!newblock) goto out2; @@ -2039,12 +2096,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goal, newblock, allocated); /* try to insert new extent into found leaf and return */ - newex.ee_block = cpu_to_le32(iblock); ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(allocated); err = ext4_ext_insert_extent(handle, inode, path, &newex); - if (err) + if (err) { + /* free data blocks we just allocated */ + ext4_free_blocks(handle, inode, ext_pblock(&newex), + le16_to_cpu(newex.ee_len)); goto out2; + } if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = inode->i_size; @@ -2157,11 +2217,3 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } - -EXPORT_SYMBOL(ext4_mark_inode_dirty); -EXPORT_SYMBOL(ext4_ext_invalidate_cache); -EXPORT_SYMBOL(ext4_ext_insert_extent); -EXPORT_SYMBOL(ext4_ext_walk_space); -EXPORT_SYMBOL(ext4_ext_find_goal); -EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert); - diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3c6c1fd2be90..d4c8186aed64 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = { .open = generic_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b34182b6ee4d..8416fa28c422 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -255,8 +255,8 @@ static int verify_chain(Indirect *from, Indirect *to) * @inode: inode in question (we are only interested in its superblock) * @i_block: block number to be parsed * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. * * To store the locations of file's data ext4 uses a data structure common * for UNIX filesystems - tree of pointers anchored in the inode, with @@ -2673,8 +2673,10 @@ void ext4_read_inode(struct inode * inode) */ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) + EXT4_INODE_SIZE(inode->i_sb)) { + brelse (bh); goto bad_inode; + } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext4_inode) - diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4ec57be5baf5..2811e5720ad0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -46,7 +46,7 @@ */ #define NAMEI_RA_CHUNKS 2 #define NAMEI_RA_BLOCKS 4 -#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) +#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) static struct buffer_head *ext4_append(handle_t *handle, @@ -241,7 +241,7 @@ static inline unsigned dx_node_limit (struct inode *dir) static void dx_show_index (char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); - printk("%s index ", label); + printk("%s index ", label); for (i = 0; i < n; i++) { printk("%x->%u ", i? dx_get_hash(entries + i) : 0, dx_get_block(entries + i)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb9afdd0e26e..175b68c60968 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1985,7 +1985,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR - "EXT4: failed to claim external journal device.\n"); + "EXT4: failed to claim external journal device.\n"); blkdev_put(bdev); return NULL; } diff --git a/fs/fat/file.c b/fs/fat/file.c index 55d3c7461c5b..69a83b59dce8 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = { .release = fat_file_release, .ioctl = fat_generic_ioctl, .fsync = file_fsync, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; static int fat_cont_expand(struct inode *inode, loff_t size) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index adf7995232b8..f79de7c8cdfa 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = { .release = fuse_release, .fsync = fuse_fsync, .lock = fuse_file_lock, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; static const struct file_operations fuse_direct_io_file_operations = { @@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .release = fuse_release, .fsync = fuse_fsync, .lock = fuse_file_lock, - /* no mmap and sendfile */ + /* no mmap and splice_read */ }; static const struct address_space_operations fuse_file_aops = { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9804c0cdcb42..cc5efc13496a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -655,10 +655,9 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", - .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb_blk, .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; static inline int register_fuseblk(void) diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index e3f1ada643ac..04ad0caebedb 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ - mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ + mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c53a5d2d0590..cd805a66880d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, for (x = 0; x < rlist.rl_rgrps; x++) { struct gfs2_rgrpd *rgd; rgd = rlist.rl_ghs[x].gh_gl->gl_object; - rg_blocks += rgd->rd_ri.ri_length; + rg_blocks += rgd->rd_length; } error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); @@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_free_data(ip, bstart, blen); } - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); @@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) goto out_gunlock_q; error = gfs2_trans_begin(sdp, - sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + + sdp->sd_max_height + al->al_rgd->rd_length + RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; @@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) } ip->i_di.di_size = size; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping) unsigned blocksize, iblock, length, pos; struct buffer_head *bh; struct page *page; - void *kaddr; int err; page = grab_cache_page(mapping, index); @@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping) /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) goto unlock; + err = 0; } if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); unlock: unlock_page(page); @@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { ip->i_di.di_size = size; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); @@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (!error) { ip->i_di.di_size = size; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_di.di_height = 0; ip->i_di.di_goal_meta = ip->i_di.di_goal_data = - ip->i_num.no_addr; + ip->i_no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); } - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 683cb5bda870..3548d9f31e0d 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -16,6 +16,7 @@ #include <linux/delay.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> +#include <linux/freezer.h> #include "gfs2.h" #include "incore.h" @@ -49,6 +50,8 @@ int gfs2_scand(void *data) while (!kthread_should_stop()) { gfs2_scand_internal(sdp); t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; + if (freezing(current)) + refrigerator(); schedule_timeout_interruptible(t); } @@ -74,6 +77,8 @@ int gfs2_glockd(void *data) wait_event_interruptible(sdp->sd_reclaim_wq, (atomic_read(&sdp->sd_reclaim_count) || kthread_should_stop())); + if (freezing(current)) + refrigerator(); } return 0; @@ -93,6 +98,8 @@ int gfs2_recoverd(void *data) while (!kthread_should_stop()) { gfs2_check_journals(sdp); t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; + if (freezing(current)) + refrigerator(); schedule_timeout_interruptible(t); } @@ -141,6 +148,8 @@ int gfs2_logd(void *data) } t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; + if (freezing(current)) + refrigerator(); schedule_timeout_interruptible(t); } @@ -191,6 +200,8 @@ int gfs2_quotad(void *data) gfs2_quota_scan(sdp); t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; + if (freezing(current)) + refrigerator(); schedule_timeout_interruptible(t); } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index a96fa07b3f3b..2beb2f401aa2 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -228,7 +228,7 @@ out: if (ip->i_di.di_size < offset + copied) ip->i_di.di_size = offset + copied; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, if (dip->i_di.di_entries != g.offset) { fs_warn(sdp, "Number of entries corrupt in dir %llu, " "ip->i_di.di_entries (%u) != g.offset (%u)\n", - (unsigned long long)dip->i_num.no_addr, + (unsigned long long)dip->i_no_addr, dip->i_di.di_entries, g.offset); error = -EIO; @@ -1488,24 +1488,55 @@ out: * Returns: errno */ -int gfs2_dir_search(struct inode *dir, const struct qstr *name, - struct gfs2_inum_host *inum, unsigned int *type) +struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) { struct buffer_head *bh; struct gfs2_dirent *dent; + struct inode *inode; + + dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); + if (dent) { + if (IS_ERR(dent)) + return ERR_PTR(PTR_ERR(dent)); + inode = gfs2_inode_lookup(dir->i_sb, + be16_to_cpu(dent->de_type), + be64_to_cpu(dent->de_inum.no_addr), + be64_to_cpu(dent->de_inum.no_formal_ino)); + brelse(bh); + return inode; + } + return ERR_PTR(-ENOENT); +} + +int gfs2_dir_check(struct inode *dir, const struct qstr *name, + const struct gfs2_inode *ip) +{ + struct buffer_head *bh; + struct gfs2_dirent *dent; + int ret = -ENOENT; dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); - if (inum) - gfs2_inum_in(inum, (char *)&dent->de_inum); - if (type) - *type = be16_to_cpu(dent->de_type); + if (ip) { + if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) + goto out; + if (be64_to_cpu(dent->de_inum.no_formal_ino) != + ip->i_no_formal_ino) + goto out; + if (unlikely(IF2DT(ip->i_inode.i_mode) != + be16_to_cpu(dent->de_type))) { + gfs2_consist_inode(GFS2_I(dir)); + ret = -EIO; + goto out; + } + } + ret = 0; +out: brelse(bh); - return 0; } - return -ENOENT; + return ret; } static int dir_new_leaf(struct inode *inode, const struct qstr *name) @@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inum_host *inum, unsigned type) + const struct gfs2_inode *nip, unsigned type) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, if (IS_ERR(dent)) return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); - gfs2_inum_out(inum, (char *)&dent->de_inum); + gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(type); if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; @@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, break; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; @@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_consist_inode(dip); gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; - dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) */ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum_host *inum, unsigned int new_type) + const struct gfs2_inode *nip, unsigned int new_type) { struct buffer_head *bh; struct gfs2_dirent *dent; @@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, return PTR_ERR(dent); gfs2_trans_add_bh(dip->i_gl, bh, 1); - gfs2_inum_out(inum, (char *)&dent->de_inum); + gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(new_type); if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { @@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, gfs2_trans_add_bh(dip->i_gl, bh, 1); } - dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; @@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, for (x = 0; x < rlist.rl_rgrps; x++) { struct gfs2_rgrpd *rgd; rgd = rlist.rl_ghs[x].gh_gl->gl_object; - rg_blocks += rgd->rd_ri.ri_length; + rg_blocks += rgd->rd_length; } error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 48fe89046bba..8a468cac9328 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -16,15 +16,16 @@ struct inode; struct gfs2_inode; struct gfs2_inum; -int gfs2_dir_search(struct inode *dir, const struct qstr *filename, - struct gfs2_inum_host *inum, unsigned int *type); +struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); +int gfs2_dir_check(struct inode *dir, const struct qstr *filename, + const struct gfs2_inode *ip); int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inum_host *inum, unsigned int type); + const struct gfs2_inode *ip, unsigned int type); int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir); int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum_host *new_inum, unsigned int new_type); + const struct gfs2_inode *nip, unsigned int new_type); int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 5b83ca6acab1..2a7435b5c4dc 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (error) return error; - error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE + + error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE + RES_EATTR + RES_STATFS + RES_QUOTA, blks); if (error) goto out_gunlock; @@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + al->al_rgd->rd_ri.ri_length + + blks + al->al_rgd->rd_length + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; @@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) for (x = 0; x < rlist.rl_rgrps; x++) { struct gfs2_rgrpd *rgd; rgd = rlist.rl_ghs[x].gh_gl->gl_object; - rg_blocks += rgd->rd_ri.ri_length; + rg_blocks += rgd->rd_length; } error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1815429a2978..3f0974e1afef 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb(); + smp_mb__after_clear_bit(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); } -static int holder_wait(void *word) +static int just_schedule(void *word) { schedule(); return 0; @@ -435,7 +435,20 @@ static int holder_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); +} + +static void gfs2_demote_wake(struct gfs2_glock *gl) +{ + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); +} + +static void wait_on_demote(struct gfs2_glock *gl) +{ + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); } /** @@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl) if (gl->gl_state == gl->gl_demote_state || gl->gl_state == LM_ST_UNLOCKED) { - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); return 0; } set_bit(GLF_LOCK, &gl->gl_flags); @@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state) +static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) { spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; + if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && + gl->gl_object) { + struct inode *inode = igrab(gl->gl_object); + spin_unlock(&gl->gl_spin); + if (inode) { + d_prune_aliases(inode); + iput(inode); + } + return; + } } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { gl->gl_demote_state = state; } @@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) if (ret & LM_OUT_CANCELED) op_done = 0; else - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); @@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); if (glops->go_inval) glops->go_inval(gl, DIO_METADATA); @@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) const struct gfs2_glock_operations *glops = gl->gl_ops; if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_lock(gl); @@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh) spin_unlock(&gl->gl_spin); } +void gfs2_glock_dq_wait(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + gfs2_glock_dq(gh); + wait_on_demote(gl); +} + /** * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it * @gh: the holder structure @@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * - * Figure out how big an impact this function has. Either: - * 1) Replace this code with code that calls gfs2_glock_prefetch() - * 2) Forget async stuff and just call nq_m_sync() - * 3) Leave it like it is * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) @@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) { - int *e; - unsigned int x; - int borked = 0, serious = 0; + struct gfs2_holder *tmp[4]; + struct gfs2_holder **pph = tmp; int error = 0; - if (!num_gh) + switch(num_gh) { + case 0: return 0; - - if (num_gh == 1) { + case 1: ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); return gfs2_glock_nq(ghs); - } - - e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); - if (!e) - return -ENOMEM; - - for (x = 0; x < num_gh; x++) { - ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC; - error = gfs2_glock_nq(&ghs[x]); - if (error) { - borked = 1; - serious = error; - num_gh = x; + default: + if (num_gh <= 4) break; - } - } - - for (x = 0; x < num_gh; x++) { - error = e[x] = glock_wait_internal(&ghs[x]); - if (error) { - borked = 1; - if (error != GLR_TRYFAILED && error != GLR_CANCELED) - serious = error; - } + pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS); + if (!pph) + return -ENOMEM; } - if (!borked) { - kfree(e); - return 0; - } - - for (x = 0; x < num_gh; x++) - if (!e[x]) - gfs2_glock_dq(&ghs[x]); - - if (serious) - error = serious; - else { - for (x = 0; x < num_gh; x++) - gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags, - &ghs[x]); - error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e); - } + error = nq_m_sync(num_gh, ghs, pph); - kfree(e); + if (pph != tmp) + kfree(pph); return error; } @@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, if (!gl) return; - handle_callback(gl, state); + handle_callback(gl, state, 1); spin_lock(&gl->gl_spin); run_queue(gl); @@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_unlock(gl); } @@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_unlock(gl); } } @@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) print_dbg(gi, " Inode:\n"); print_dbg(gi, " num = %llu/%llu\n", - ip->i_num.no_formal_ino, ip->i_num.no_addr); + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr); print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) @@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) } if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", - gl->gl_demote_state, - (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); + gl->gl_demote_state, (unsigned long long) + (jiffies - gl->gl_demote_time)*(1000000/HZ)); } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index b3e152db70c8..7721ca3fff9e 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh); int gfs2_glock_poll(struct gfs2_holder *gh); int gfs2_glock_wait(struct gfs2_holder *gh); void gfs2_glock_dq(struct gfs2_holder *gh); +void gfs2_glock_dq_wait(struct gfs2_holder *gh); void gfs2_glock_dq_uninit(struct gfs2_holder *gh); int gfs2_glock_nq_num(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 7b82657a9910..777ca46010e8 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl) ip = NULL; if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - gfs2_log_flush(gl->gl_sbd, gl); if (ip) filemap_fdatawrite(ip->i_inode.i_mapping); + gfs2_log_flush(gl->gl_sbd, gl); gfs2_meta_sync(gl); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d995441373ab..170ba93829c0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -28,6 +28,14 @@ struct gfs2_sbd; typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); +struct gfs2_log_header_host { + u64 lh_sequence; /* Sequence number of this transaction */ + u32 lh_flags; /* GFS2_LOG_HEAD_... */ + u32 lh_tail; /* Block number of log tail */ + u32 lh_blkno; + u32 lh_hash; +}; + /* * Structure of operations that are associated with each * type of element in the log. @@ -60,12 +68,23 @@ struct gfs2_bitmap { u32 bi_len; }; +struct gfs2_rgrp_host { + u32 rg_flags; + u32 rg_free; + u32 rg_dinodes; + u64 rg_igeneration; +}; + struct gfs2_rgrpd { struct list_head rd_list; /* Link with superblock */ struct list_head rd_list_mru; struct list_head rd_recent; /* Recently used rgrps */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ - struct gfs2_rindex_host rd_ri; + u64 rd_addr; /* grp block disk address */ + u64 rd_data0; /* first data location */ + u32 rd_length; /* length of rgrp header in fs blocks */ + u32 rd_data; /* num of data blocks in rgrp */ + u32 rd_bitbytes; /* number of bytes in data bitmaps */ struct gfs2_rgrp_host rd_rg; u64 rd_rg_vn; struct gfs2_bitmap *rd_bits; @@ -76,6 +95,8 @@ struct gfs2_rgrpd { u32 rd_last_alloc_data; u32 rd_last_alloc_meta; struct gfs2_sbd *rd_sbd; + unsigned long rd_flags; +#define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */ }; enum gfs2_state_bits { @@ -211,10 +232,24 @@ enum { GIF_SW_PAGED = 3, }; +struct gfs2_dinode_host { + u64 di_size; /* number of bytes in file */ + u64 di_blocks; /* number of blocks in file */ + u64 di_goal_meta; /* rgrp to alloc from next */ + u64 di_goal_data; /* data block goal */ + u64 di_generation; /* generation number for NFS */ + u32 di_flags; /* GFS2_DIF_... */ + u16 di_height; /* height of metadata */ + /* These only apply to directories */ + u16 di_depth; /* Number of bits in the table */ + u32 di_entries; /* The number of entries in the directory */ + u64 di_eattr; /* extended attribute block number */ +}; + struct gfs2_inode { struct inode i_inode; - struct gfs2_inum_host i_num; - + u64 i_no_addr; + u64 i_no_formal_ino; unsigned long i_flags; /* GIF_... */ struct gfs2_dinode_host i_di; /* To be replaced by ref to block */ @@ -275,14 +310,6 @@ enum { QDF_LOCKED = 2, }; -struct gfs2_quota_lvb { - __be32 qb_magic; - u32 __pad; - __be64 qb_limit; /* Hard limit of # blocks to alloc */ - __be64 qb_warn; /* Warn user when alloc is above this # */ - __be64 qb_value; /* Current # blocks allocated */ -}; - struct gfs2_quota_data { struct list_head qd_list; unsigned int qd_count; @@ -327,7 +354,9 @@ struct gfs2_trans { unsigned int tr_num_buf; unsigned int tr_num_buf_new; + unsigned int tr_num_databuf_new; unsigned int tr_num_buf_rm; + unsigned int tr_num_databuf_rm; struct list_head tr_list_buf; unsigned int tr_num_revoke; @@ -354,6 +383,12 @@ struct gfs2_jdesc { unsigned int jd_blocks; }; +struct gfs2_statfs_change_host { + s64 sc_total; + s64 sc_free; + s64 sc_dinodes; +}; + #define GFS2_GLOCKD_DEFAULT 1 #define GFS2_GLOCKD_MAX 16 @@ -426,6 +461,28 @@ enum { #define GFS2_FSNAME_LEN 256 +struct gfs2_inum_host { + u64 no_formal_ino; + u64 no_addr; +}; + +struct gfs2_sb_host { + u32 sb_magic; + u32 sb_type; + u32 sb_format; + + u32 sb_fs_format; + u32 sb_multihost_format; + u32 sb_bsize; + u32 sb_bsize_shift; + + struct gfs2_inum_host sb_master_dir; + struct gfs2_inum_host sb_root_dir; + + char sb_lockproto[GFS2_LOCKNAME_LEN]; + char sb_locktable[GFS2_LOCKNAME_LEN]; +}; + struct gfs2_sbd { struct super_block *sd_vfs; struct super_block *sd_vfs_meta; @@ -544,6 +601,7 @@ struct gfs2_sbd { unsigned int sd_log_blks_reserved; unsigned int sd_log_commited_buf; + unsigned int sd_log_commited_databuf; unsigned int sd_log_commited_revoke; unsigned int sd_log_num_gl; @@ -552,7 +610,6 @@ struct gfs2_sbd { unsigned int sd_log_num_rg; unsigned int sd_log_num_databuf; unsigned int sd_log_num_jdata; - unsigned int sd_log_num_hdrs; struct list_head sd_log_le_gl; struct list_head sd_log_le_buf; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index df0b8b3018b9..34f7bcdea1e9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -38,12 +38,17 @@ #include "trans.h" #include "util.h" +struct gfs2_inum_range_host { + u64 ir_start; + u64 ir_length; +}; + static int iget_test(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum_host *inum = opaque; + u64 *no_addr = opaque; - if (ip->i_num.no_addr == inum->no_addr && + if (ip->i_no_addr == *no_addr && inode->i_private != NULL) return 1; @@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque) static int iget_set(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum_host *inum = opaque; + u64 *no_addr = opaque; - ip->i_num = *inum; - inode->i_ino = inum->no_addr; + inode->i_ino = (unsigned long)*no_addr; + ip->i_no_addr = *no_addr; return 0; } -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum) +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) +{ + unsigned long hash = (unsigned long)no_addr; + return ilookup5(sb, hash, iget_test, &no_addr); +} + +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) { - return ilookup5(sb, (unsigned long)inum->no_addr, - iget_test, inum); + unsigned long hash = (unsigned long)no_addr; + return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); } -static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum) +/** + * GFS2 lookup code fills in vfs inode contents based on info obtained + * from directory entry inside gfs2_inode_lookup(). This has caused issues + * with NFS code path since its get_dentry routine doesn't have the relevant + * directory entry when gfs2_inode_lookup() is invoked. Part of the code + * segment inside gfs2_inode_lookup code needs to get moved around. + * + * Clean up I_LOCK and I_NEW as well. + **/ + +void gfs2_set_iop(struct inode *inode) { - return iget5_locked(sb, (unsigned long)inum->no_addr, - iget_test, iget_set, inum); + umode_t mode = inode->i_mode; + + if (S_ISREG(mode)) { + inode->i_op = &gfs2_file_iops; + inode->i_fop = &gfs2_file_fops; + inode->i_mapping->a_ops = &gfs2_file_aops; + } else if (S_ISDIR(mode)) { + inode->i_op = &gfs2_dir_iops; + inode->i_fop = &gfs2_dir_fops; + } else if (S_ISLNK(mode)) { + inode->i_op = &gfs2_symlink_iops; + } else { + inode->i_op = &gfs2_dev_iops; + } + + unlock_new_inode(inode); } /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block - * @inum: The inode number + * @no_addr: The inode number * @type: The type of the inode * * Returns: A VFS inode, or an error */ -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type) +struct inode *gfs2_inode_lookup(struct super_block *sb, + unsigned int type, + u64 no_addr, + u64 no_formal_ino) { - struct inode *inode = gfs2_iget(sb, inum); + struct inode *inode = gfs2_iget(sb, no_addr); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *io_gl; int error; @@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); - umode_t mode = DT2IF(type); inode->i_private = ip; - inode->i_mode = mode; - - if (S_ISREG(mode)) { - inode->i_op = &gfs2_file_iops; - inode->i_fop = &gfs2_file_fops; - inode->i_mapping->a_ops = &gfs2_file_aops; - } else if (S_ISDIR(mode)) { - inode->i_op = &gfs2_dir_iops; - inode->i_fop = &gfs2_dir_fops; - } else if (S_ISLNK(mode)) { - inode->i_op = &gfs2_symlink_iops; - } else { - inode->i_op = &gfs2_dev_iops; - } + ip->i_no_formal_ino = no_formal_ino; - error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (unlikely(error)) goto fail; ip->i_gl->gl_object = ip; - error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl); + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) goto fail_put; @@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail_iopen; + ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); - unlock_new_inode(inode); + + if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) + goto gfs2_nfsbypass; + + inode->i_mode = DT2IF(type); + + /* + * We must read the inode in order to work out its type in + * this case. Note that this doesn't happen often as we normally + * know the type beforehand. This code path only occurs during + * unlinked inode recovery (where it is safe to do this glock, + * which is not true in the general case). + */ + if (type == DT_UNKNOWN) { + struct gfs2_holder gh; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (unlikely(error)) + goto fail_glock; + /* Inode is now uptodate */ + gfs2_glock_dq_uninit(&gh); + } + + gfs2_set_iop(inode); } +gfs2_nfsbypass: return inode; +fail_glock: + gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: gfs2_glock_put(io_gl); fail_put: @@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct gfs2_dinode_host *di = &ip->i_di; const struct gfs2_dinode *str = buf; - if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) { + if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) { if (gfs2_consist_inode(ip)) gfs2_dinode_print(ip); return -EIO; } - if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) - return -ESTALE; - + ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); ip->i_inode.i_mode = be32_to_cpu(str->di_mode); ip->i_inode.i_rdev = 0; switch (ip->i_inode.i_mode & S_IFMT) { @@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_blocks = be64_to_cpu(str->di_blocks); gfs2_set_inode_blocks(&ip->i_inode); ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); - ip->i_inode.i_atime.tv_nsec = 0; + ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = 0; + ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = 0; + ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); di->di_goal_meta = be64_to_cpu(str->di_goal_meta); di->di_goal_data = be64_to_cpu(str->di_goal_data); @@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (error) goto out_qs; - rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); if (!rgd) { gfs2_consist_inode(ip); error = -EIO; @@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) else drop_nlink(&ip->i_inode); - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_holder d_gh; - struct gfs2_inum_host inum; - unsigned int type; - int error; + int error = 0; struct inode *inode = NULL; int unlock = 0; @@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, goto out; } - error = gfs2_dir_search(dir, name, &inum, &type); - if (error) - goto out; - - inode = gfs2_inode_lookup(sb, &inum, type); - + inode = gfs2_dir_search(dir, name); + if (IS_ERR(inode)) + error = PTR_ERR(inode); out: if (unlock) gfs2_glock_dq_uninit(&d_gh); @@ -409,6 +452,22 @@ out: return inode ? inode : ERR_PTR(error); } +static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) +{ + const struct gfs2_inum_range *str = buf; + + ir->ir_start = be64_to_cpu(str->ir_start); + ir->ir_length = be64_to_cpu(str->ir_length); +} + +static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) +{ + struct gfs2_inum_range *str = buf; + + str->ir_start = cpu_to_be64(ir->ir_start); + str->ir_length = cpu_to_be64(ir->ir_length); +} + static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) { struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); @@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, if (!dip->i_inode.i_nlink) return -EPERM; - error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); + error = gfs2_dir_check(&dip->i_inode, name, NULL); switch (error) { case -ENOENT: error = 0; @@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, *gid = current->fsgid; } -static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, - u64 *generation) +static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); int error; @@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, if (error) goto out_ipreserv; - inum->no_addr = gfs2_alloc_di(dip, generation); + *no_addr = gfs2_alloc_di(dip, generation); gfs2_trans_end(sdp); @@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; struct buffer_head *dibh; + struct timespec tv = CURRENT_TIME; dibh = gfs2_meta_new(gl, inum->no_addr); gfs2_trans_add_bh(gl, dibh, 1); @@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_nlink = 0; di->di_size = 0; di->di_blocks = cpu_to_be64(1); - di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); + di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); di->di_major = cpu_to_be32(MAJOR(dev)); di->di_minor = cpu_to_be32(MINOR(dev)); di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); @@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_entries = 0; memset(&di->__pad4, 0, sizeof(di->__pad4)); di->di_eattr = 0; + di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); + di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); + di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); brelse(dibh); @@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - al->al_rgd->rd_ri.ri_length + + al->al_rgd->rd_length + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode)); + error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); if (error) goto fail_end_trans; @@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev) { - struct inode *inode; + struct inode *inode = NULL; struct gfs2_inode *dip = ghs->gh_gl->gl_object; struct inode *dir = &dip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_inum_host inum; + struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; u64 generation; @@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock; - error = alloc_dinode(dip, &inum, &generation); + error = alloc_dinode(dip, &inum.no_addr, &generation); if (error) goto fail_gunlock; @@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; - inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode)); + inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), + inum.no_addr, + inum.no_formal_ino); if (IS_ERR(inode)) goto fail_gunlock2; error = gfs2_inode_refresh(GFS2_I(inode)); if (error) - goto fail_iput; + goto fail_gunlock2; error = gfs2_acl_create(dip, GFS2_I(inode)); if (error) - goto fail_iput; + goto fail_gunlock2; error = gfs2_security_init(dip, GFS2_I(inode)); if (error) - goto fail_iput; + goto fail_gunlock2; error = link_dinode(dip, name, GFS2_I(inode)); if (error) - goto fail_iput; + goto fail_gunlock2; if (!inode) return ERR_PTR(-ENOMEM); return inode; -fail_iput: - iput(inode); fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); + if (inode) + iput(inode); fail_gunlock: gfs2_glock_dq(ghs); fail: @@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, */ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) + const struct gfs2_inode *ip) { - struct gfs2_inum_host inum; - unsigned int type; int error; if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) @@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_dir_search(&dip->i_inode, name, &inum, &type); + error = gfs2_dir_check(&dip->i_inode, name, ip); if (error) return error; - if (!gfs2_inum_equal(&inum, &ip->i_num)) - return -ENOENT; - - if (IF2DT(ip->i_inode.i_mode) != type) { - gfs2_consist_inode(dip); - return -EIO; - } - return 0; } @@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = gl->gl_object; - s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); + s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); unsigned int state; int flags; int error; + struct timespec tv = CURRENT_TIME; if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || @@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) (sdp->sd_vfs->s_flags & MS_RDONLY)) return 0; - curtime = get_seconds(); - if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { + if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { gfs2_glock_dq(gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, gh); @@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) /* Verify that atime hasn't been updated while we were trying to get exclusive lock. */ - curtime = get_seconds(); - if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { + tv = CURRENT_TIME; + if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { struct buffer_head *dibh; struct gfs2_dinode *di; @@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) if (error) goto fail_end_trans; - ip->i_inode.i_atime.tv_sec = curtime; + ip->i_inode.i_atime = tv; gfs2_trans_add_bh(ip->i_gl, dibh, 1); di = (struct gfs2_dinode *)dibh->b_data; di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); brelse(dibh); gfs2_trans_end(sdp); @@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) return error; } +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) +{ + const struct gfs2_dinode_host *di = &ip->i_di; + struct gfs2_dinode *str = buf; + + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); + str->di_header.__pad0 = 0; + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_header.__pad1 = 0; + str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); + str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); + str->di_mode = cpu_to_be32(ip->i_inode.i_mode); + str->di_uid = cpu_to_be32(ip->i_inode.i_uid); + str->di_gid = cpu_to_be32(ip->i_inode.i_gid); + str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); + str->di_size = cpu_to_be64(di->di_size); + str->di_blocks = cpu_to_be64(di->di_blocks); + str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + + str->di_goal_meta = cpu_to_be64(di->di_goal_meta); + str->di_goal_data = cpu_to_be64(di->di_goal_data); + str->di_generation = cpu_to_be64(di->di_generation); + + str->di_flags = cpu_to_be32(di->di_flags); + str->di_height = cpu_to_be16(di->di_height); + str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); + str->di_depth = cpu_to_be16(di->di_depth); + str->di_entries = cpu_to_be32(di->di_entries); + + str->di_eattr = cpu_to_be64(di->di_eattr); + str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); +} + +void gfs2_dinode_print(const struct gfs2_inode *ip) +{ + const struct gfs2_dinode_host *di = &ip->i_di; + + printk(KERN_INFO " no_formal_ino = %llu\n", + (unsigned long long)ip->i_no_formal_ino); + printk(KERN_INFO " no_addr = %llu\n", + (unsigned long long)ip->i_no_addr); + printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); + printk(KERN_INFO " di_blocks = %llu\n", + (unsigned long long)di->di_blocks); + printk(KERN_INFO " di_goal_meta = %llu\n", + (unsigned long long)di->di_goal_meta); + printk(KERN_INFO " di_goal_data = %llu\n", + (unsigned long long)di->di_goal_data); + printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); + printk(KERN_INFO " di_height = %u\n", di->di_height); + printk(KERN_INFO " di_depth = %u\n", di->di_depth); + printk(KERN_INFO " di_entries = %u\n", di->di_entries); + printk(KERN_INFO " di_eattr = %llu\n", + (unsigned long long)di->di_eattr); +} + diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index b57f448b15bc..4517ac82c01c 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -10,17 +10,17 @@ #ifndef __INODE_DOT_H__ #define __INODE_DOT_H__ -static inline int gfs2_is_stuffed(struct gfs2_inode *ip) +static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { return !ip->i_di.di_height; } -static inline int gfs2_is_jdata(struct gfs2_inode *ip) +static inline int gfs2_is_jdata(const struct gfs2_inode *ip) { return ip->i_di.di_flags & GFS2_DIF_JDATA; } -static inline int gfs2_is_dir(struct gfs2_inode *ip) +static inline int gfs2_is_dir(const struct gfs2_inode *ip) { return S_ISDIR(ip->i_inode.i_mode); } @@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode) (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); } +static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, + u64 no_formal_ino) +{ + return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino; +} + +static inline void gfs2_inum_out(const struct gfs2_inode *ip, + struct gfs2_dirent *dent) +{ + dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); + dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); +} + + void gfs2_inode_attr_in(struct gfs2_inode *ip); -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); +void gfs2_set_iop(struct inode *inode); +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, + u64 no_addr, u64 no_formal_ino); +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); int gfs2_inode_refresh(struct gfs2_inode *ip); @@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip); int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip); + const struct gfs2_inode *ip); int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); int gfs2_glock_nq_atime(struct gfs2_holder *gh); int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +void gfs2_dinode_print(const struct gfs2_inode *ip); #endif /* __INODE_DOT_H__ */ diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index c305255bfe8a..542a797ac89a 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c @@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, lp->cur = DLM_LOCK_IV; lp->lvb = NULL; lp->hold_null = NULL; - init_completion(&lp->ast_wait); INIT_LIST_HEAD(&lp->clist); INIT_LIST_HEAD(&lp->blist); INIT_LIST_HEAD(&lp->delay_list); @@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp) lp->lksb.sb_lvbptr = NULL; } +static int gdlm_ast_wait(void *word) +{ + schedule(); + return 0; +} + /* This can do a synchronous dlm request (requiring a lock_dlm thread to get the completion) because gfs won't call hold_lvb() during a callback (from the context of a lock_dlm thread). */ @@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp) lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE; set_bit(LFL_NOBAST, &lpn->flags); set_bit(LFL_INLOCK, &lpn->flags); + set_bit(LFL_AST_WAIT, &lpn->flags); - init_completion(&lpn->ast_wait); gdlm_do_lock(lpn); - wait_for_completion(&lpn->ast_wait); + wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE); error = lpn->lksb.sb_status; if (error) { printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n", diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index d074c6e6f9bf..24d70f73b651 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -101,6 +101,7 @@ enum { LFL_NOBAST = 10, LFL_HEADQUE = 11, LFL_UNLOCK_DELETE = 12, + LFL_AST_WAIT = 13, }; struct gdlm_lock { @@ -117,7 +118,6 @@ struct gdlm_lock { unsigned long flags; /* lock_dlm flags LFL_ */ int bast_mode; /* protected by async_lock */ - struct completion ast_wait; struct list_head clist; /* complete */ struct list_head blist; /* blocking */ diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 1d8faa3da8af..41c5b04caaba 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data, error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), &ls->dlm_lockspace, - nodir ? DLM_LSFL_NODIR : 0, + DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), GDLM_LVB_SIZE); if (error) { log_error("dlm_new_lockspace error %d", error); diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index f82495e18c2d..fba1f1d87e4f 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c @@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, op->info.number = name->ln_number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - + op->info.owner = (__u64)(long) fl->fl_owner; send_op(op); wait_event(recv_wq, (op->done != 0)); @@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, } spin_unlock(&ops_lock); + /* info.rv from userspace is 1 for conflict, 0 for no-conflict, + -ENOENT if there are no locks on the file */ + rv = op->info.rv; fl->fl_type = F_UNLCK; if (rv == -ENOENT) rv = 0; - else if (rv == 0 && op->info.pid != fl->fl_pid) { + else if (rv > 0) { fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; fl->fl_pid = op->info.pid; fl->fl_start = op->info.start; fl->fl_end = op->info.end; + rv = 0; } kfree(op); diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 9cf1f168eaf8..1aca51e45092 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c @@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode) ls->fscb(ls->sdp, cb, &lp->lockname); } +static void wake_up_ast(struct gdlm_lock *lp) +{ + clear_bit(LFL_AST_WAIT, &lp->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&lp->flags, LFL_AST_WAIT); +} + static void process_complete(struct gdlm_lock *lp) { struct gdlm_ls *ls = lp->ls; @@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp) */ if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { - complete(&lp->ast_wait); + wake_up_ast(lp); return; } @@ -214,7 +221,7 @@ out: if (test_bit(LFL_INLOCK, &lp->flags)) { clear_bit(LFL_NOBLOCK, &lp->flags); lp->cur = lp->req; - complete(&lp->ast_wait); + wake_up_ast(lp); return; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 291415ddfe51..f49a12e24086 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert(sdp, bd->bd_ail == ai); + if (!bh){ + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + continue; + } + if (!buffer_busy(bh)) { if (!buffer_uptodate(bh)) { gfs2_log_unlock(sdp); @@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl bd_ail_st_list) { bh = bd->bd_bh; + if (!bh){ + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + continue; + } + gfs2_assert(sdp, bd->bd_ail == ai); if (buffer_busy(bh)) { @@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve * - * Note that we never give out the last 6 blocks of the journal. Thats - * due to the fact that there is are a small number of header blocks + * Note that we never give out the last few blocks of the journal. Thats + * due to the fact that there is a small number of header blocks * associated with each log flush. The exact number can't be known until * flush time, so we ensure that we have just enough free blocks at all * times to avoid running out during a log flush. @@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { unsigned int try = 0; + unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); if (gfs2_assert_warn(sdp, blks) || gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) @@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) mutex_lock(&sdp->sd_log_reserve_mutex); gfs2_log_lock(sdp); - while(sdp->sd_log_blks_free <= (blks + 6)) { + while(sdp->sd_log_blks_free <= (blks + reserved_blks)) { gfs2_log_unlock(sdp); gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL); @@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer return dist; } +/** + * calc_reserved - Calculate the number of blocks to reserve when + * refunding a transaction's unused buffers. + * @sdp: The GFS2 superblock + * + * This is complex. We need to reserve room for all our currently used + * metadata buffers (e.g. normal file I/O rewriting file time stamps) and + * all our journaled data buffers for journaled files (e.g. files in the + * meta_fs like rindex, or files for which chattr +j was done.) + * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush + * will count it as free space (sd_log_blks_free) and corruption will follow. + * + * We can have metadata bufs and jdata bufs in the same journal. So each + * type gets its own log header, for which we need to reserve a block. + * In fact, each type has the potential for needing more than one header + * in cases where we have more buffers than will fit on a journal page. + * Metadata journal entries take up half the space of journaled buffer entries. + * Thus, metadata entries have buf_limit (502) and journaled buffers have + * databuf_limit (251) before they cause a wrap around. + * + * Also, we need to reserve blocks for revoke journal entries and one for an + * overall header for the lot. + * + * Returns: the number of blocks reserved + */ +static unsigned int calc_reserved(struct gfs2_sbd *sdp) +{ + unsigned int reserved = 0; + unsigned int mbuf_limit, metabufhdrs_needed; + unsigned int dbuf_limit, databufhdrs_needed; + unsigned int revokes = 0; + + mbuf_limit = buf_limit(sdp); + metabufhdrs_needed = (sdp->sd_log_commited_buf + + (mbuf_limit - 1)) / mbuf_limit; + dbuf_limit = databuf_limit(sdp); + databufhdrs_needed = (sdp->sd_log_commited_databuf + + (dbuf_limit - 1)) / dbuf_limit; + + if (sdp->sd_log_commited_revoke) + revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, + sizeof(u64)); + + reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + + sdp->sd_log_commited_databuf + databufhdrs_needed + + revokes; + /* One for the overall header */ + if (reserved) + reserved++; + return reserved; +} + static unsigned int current_tail(struct gfs2_sbd *sdp) { struct gfs2_ail *ai; @@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, return bh; } -static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull) +static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) { unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); ail2_empty(sdp, new_tail); gfs2_log_lock(sdp); - sdp->sd_log_blks_free += dist - (pull ? 1 : 0); + sdp->sd_log_blks_free += dist; gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) brelse(bh); if (sdp->sd_log_tail != tail) - log_pull_tail(sdp, tail, pull); + log_pull_tail(sdp, tail); else gfs2_assert_withdraw(sdp, !pull); @@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp) struct list_head *head = &sdp->sd_log_flush_list; struct gfs2_log_buf *lb; struct buffer_head *bh; + int flushcount = 0; while (!list_empty(head)) { lb = list_entry(head->next, struct gfs2_log_buf, lb_list); @@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp) } else brelse(bh); kfree(lb); + flushcount++; } - log_write_header(sdp, 0, 0); + /* If nothing was journaled, the header is unplanned and unwanted. */ + if (flushcount) { + log_write_header(sdp, 0, 0); + } else { + unsigned int tail; + tail = current_tail(sdp); + + gfs2_ail1_empty(sdp, 0); + if (sdp->sd_log_tail != tail) + log_pull_tail(sdp, tail); + } } /** @@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) INIT_LIST_HEAD(&ai->ai_ail1_list); INIT_LIST_HEAD(&ai->ai_ail2_list); - gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf); + gfs2_assert_withdraw(sdp, + sdp->sd_log_num_buf + sdp->sd_log_num_jdata == + sdp->sd_log_commited_buf + + sdp->sd_log_commited_databuf); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); @@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) lops_before_commit(sdp); if (!list_empty(&sdp->sd_log_flush_list)) log_flush_commit(sdp); - else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) + else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ + gfs2_log_lock(sdp); + sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */ + gfs2_log_unlock(sdp); log_write_header(sdp, 0, PULL); + } lops_after_commit(sdp, ai); gfs2_log_lock(sdp); sdp->sd_log_head = sdp->sd_log_flush_head; - sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs; sdp->sd_log_blks_reserved = 0; sdp->sd_log_commited_buf = 0; - sdp->sd_log_num_hdrs = 0; + sdp->sd_log_commited_databuf = 0; sdp->sd_log_commited_revoke = 0; if (!list_empty(&ai->ai_ail1_list)) { @@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - unsigned int reserved = 0; + unsigned int reserved; unsigned int old; gfs2_log_lock(sdp); sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; - gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0); + sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - + tr->tr_num_databuf_rm; + gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || + (((int)sdp->sd_log_commited_databuf) >= 0)); sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); - - if (sdp->sd_log_commited_buf) - reserved += sdp->sd_log_commited_buf; - if (sdp->sd_log_commited_revoke) - reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, - sizeof(u64)); - if (reserved) - reserved++; - + reserved = calc_reserved(sdp); old = sdp->sd_log_blks_free; sdp->sd_log_blks_free += tr->tr_reserved - (reserved - sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); - gfs2_assert_withdraw(sdp, - sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks + - sdp->sd_log_num_hdrs); + gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= + sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; @@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs); gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); sdp->sd_log_flush_head = sdp->sd_log_head; sdp->sd_log_flush_wrapped = 0; - log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0); + log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, + (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index f82d84d05d23..aff70f0698fd 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -17,6 +17,7 @@ #include "gfs2.h" #include "incore.h" +#include "inode.h" #include "glock.h" #include "log.h" #include "lops.h" @@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) struct gfs2_log_descriptor *ld; struct gfs2_bufdata *bd1 = NULL, *bd2; unsigned int total = sdp->sd_log_num_buf; - unsigned int offset = sizeof(struct gfs2_log_descriptor); + unsigned int offset = BUF_OFFSET; unsigned int limit; unsigned int num; unsigned n; __be64 *ptr; - offset += sizeof(__be64) - 1; - offset &= ~(sizeof(__be64) - 1); - limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); + limit = buf_limit(sdp); /* for 4k blocks, limit = 503 */ bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); @@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) if (total > limit) num = limit; bh = gfs2_log_get_buf(sdp); - sdp->sd_log_num_hdrs++; ld = (struct gfs2_log_descriptor *)bh->b_data; ptr = (__be64 *)(bh->b_data + offset); ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_inode *ip = GFS2_I(mapping->host); gfs2_log_lock(sdp); + if (!list_empty(&bd->bd_list_tr)) { + gfs2_log_unlock(sdp); + return; + } tr->tr_touched = 1; - if (list_empty(&bd->bd_list_tr) && - (ip->i_di.di_flags & GFS2_DIF_JDATA)) { + if (gfs2_is_jdata(ip)) { tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); - gfs2_log_unlock(sdp); - gfs2_pin(sdp, bd->bd_bh); - tr->tr_num_buf_new++; - } else { - gfs2_log_unlock(sdp); } + gfs2_log_unlock(sdp); + if (!list_empty(&le->le_list)) + return; + gfs2_trans_add_gl(bd->bd_gl); - gfs2_log_lock(sdp); - if (list_empty(&le->le_list)) { - if (ip->i_di.di_flags & GFS2_DIF_JDATA) - sdp->sd_log_num_jdata++; - sdp->sd_log_num_databuf++; - list_add(&le->le_list, &sdp->sd_log_le_databuf); + if (gfs2_is_jdata(ip)) { + sdp->sd_log_num_jdata++; + gfs2_pin(sdp, bd->bd_bh); + tr->tr_num_databuf_new++; } + sdp->sd_log_num_databuf++; + gfs2_log_lock(sdp); + list_add(&le->le_list, &sdp->sd_log_le_databuf); gfs2_log_unlock(sdp); } @@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) LIST_HEAD(started); struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; struct buffer_head *bh = NULL,*bh1 = NULL; - unsigned int offset = sizeof(struct gfs2_log_descriptor); struct gfs2_log_descriptor *ld; unsigned int limit; unsigned int total_dbuf = sdp->sd_log_num_databuf; @@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) unsigned int num, n; __be64 *ptr = NULL; - offset += 2*sizeof(__be64) - 1; - offset &= ~(2*sizeof(__be64) - 1); - limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); + limit = databuf_limit(sdp); /* * Start writing ordered buffers, write journaled buffers @@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); if (!bh) { bh = gfs2_log_get_buf(sdp); - sdp->sd_log_num_hdrs++; ld = (struct gfs2_log_descriptor *) bh->b_data; - ptr = (__be64 *)(bh->b_data + offset); + ptr = (__be64 *)(bh->b_data + + DATABUF_OFFSET); ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); ld->ld_header.mh_type = @@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) if (unlikely(magic != 0)) set_buffer_escaped(bh1); gfs2_log_lock(sdp); - if (n++ > num) + if (++n >= num) break; } else if (!bh1) { total_dbuf--; @@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); if (bh) { + set_buffer_mapped(bh); set_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); bh = NULL; diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 965bc65c7c64..41a00df75587 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -13,6 +13,13 @@ #include <linux/list.h> #include "incore.h" +#define BUF_OFFSET \ + ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \ + ~(sizeof(__be64) - 1)) +#define DATABUF_OFFSET \ + ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \ + ~(2 * sizeof(__be64) - 1)) + extern const struct gfs2_log_operations gfs2_glock_lops; extern const struct gfs2_log_operations gfs2_buf_lops; extern const struct gfs2_log_operations gfs2_revoke_lops; @@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops; extern const struct gfs2_log_operations *gfs2_log_ops[]; +static inline unsigned int buf_limit(struct gfs2_sbd *sdp) +{ + unsigned int limit; + + limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64); + return limit; +} + +static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) +{ + unsigned int limit; + + limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64)); + return limit; +} + static inline void lops_init_le(struct gfs2_log_element *le, const struct gfs2_log_operations *lops) { diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index e62d4f620c58..8da343b34ae7 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) if (test_clear_buffer_pinned(bh)) { struct gfs2_trans *tr = current->journal_info; + struct gfs2_inode *bh_ip = + GFS2_I(bh->b_page->mapping->host); + gfs2_log_lock(sdp); list_del_init(&bd->bd_le.le_list); gfs2_assert_warn(sdp, sdp->sd_log_num_buf); sdp->sd_log_num_buf--; gfs2_log_unlock(sdp); - tr->tr_num_buf_rm++; + if (bh_ip->i_inode.i_private != NULL) + tr->tr_num_databuf_rm++; + else + tr->tr_num_buf_rm++; brelse(bh); } if (bd) { diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index e037425bc042..527bf19d9690 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) { - return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp); + return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp); } struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index 4864659555d4..6f006a804db3 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) char *options, *o, *v; int error = 0; - if (!remount) { - /* If someone preloaded options, use those instead */ - spin_lock(&gfs2_sys_margs_lock); - if (gfs2_sys_margs) { - data = gfs2_sys_margs; - gfs2_sys_margs = NULL; - } - spin_unlock(&gfs2_sys_margs_lock); - - /* Set some defaults */ - args->ar_num_glockd = GFS2_GLOCKD_DEFAULT; - args->ar_quota = GFS2_QUOTA_DEFAULT; - args->ar_data = GFS2_DATA_DEFAULT; + /* If someone preloaded options, use those instead */ + spin_lock(&gfs2_sys_margs_lock); + if (!remount && gfs2_sys_margs) { + data = gfs2_sys_margs; + gfs2_sys_margs = NULL; } + spin_unlock(&gfs2_sys_margs_lock); + + /* Set some defaults */ + memset(args, 0, sizeof(struct gfs2_args)); + args->ar_num_glockd = GFS2_GLOCKD_DEFAULT; + args->ar_quota = GFS2_QUOTA_DEFAULT; + args->ar_data = GFS2_DATA_DEFAULT; /* Split the options into tokens with the "," character and process them */ diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c deleted file mode 100644 index d9ecfd23a49e..000000000000 --- a/fs/gfs2/ondisk.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/completion.h> -#include <linux/buffer_head.h> - -#include "gfs2.h" -#include <linux/gfs2_ondisk.h> -#include <linux/lm_interface.h> -#include "incore.h" - -#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \ - struct->member); - -/* - * gfs2_xxx_in - read in an xxx struct - * first arg: the cpu-order structure - * buf: the disk-order buffer - * - * gfs2_xxx_out - write out an xxx struct - * first arg: the cpu-order structure - * buf: the disk-order buffer - * - * gfs2_xxx_print - print out an xxx struct - * first arg: the cpu-order structure - */ - -void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf) -{ - const struct gfs2_inum *str = buf; - - no->no_formal_ino = be64_to_cpu(str->no_formal_ino); - no->no_addr = be64_to_cpu(str->no_addr); -} - -void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf) -{ - struct gfs2_inum *str = buf; - - str->no_formal_ino = cpu_to_be64(no->no_formal_ino); - str->no_addr = cpu_to_be64(no->no_addr); -} - -static void gfs2_inum_print(const struct gfs2_inum_host *no) -{ - printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino); - printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); -} - -static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) -{ - const struct gfs2_meta_header *str = buf; - - mh->mh_magic = be32_to_cpu(str->mh_magic); - mh->mh_type = be32_to_cpu(str->mh_type); - mh->mh_format = be32_to_cpu(str->mh_format); -} - -void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) -{ - const struct gfs2_sb *str = buf; - - gfs2_meta_header_in(&sb->sb_header, buf); - - sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); - sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); - sb->sb_bsize = be32_to_cpu(str->sb_bsize); - sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); - - gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir); - gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir); - - memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); - memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); -} - -void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf) -{ - const struct gfs2_rindex *str = buf; - - ri->ri_addr = be64_to_cpu(str->ri_addr); - ri->ri_length = be32_to_cpu(str->ri_length); - ri->ri_data0 = be64_to_cpu(str->ri_data0); - ri->ri_data = be32_to_cpu(str->ri_data); - ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes); - -} - -void gfs2_rindex_print(const struct gfs2_rindex_host *ri) -{ - printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr); - pv(ri, ri_length, "%u"); - - printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0); - pv(ri, ri_data, "%u"); - - pv(ri, ri_bitbytes, "%u"); -} - -void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) -{ - const struct gfs2_rgrp *str = buf; - - rg->rg_flags = be32_to_cpu(str->rg_flags); - rg->rg_free = be32_to_cpu(str->rg_free); - rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); - rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); -} - -void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) -{ - struct gfs2_rgrp *str = buf; - - str->rg_flags = cpu_to_be32(rg->rg_flags); - str->rg_free = cpu_to_be32(rg->rg_free); - str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); - str->__pad = cpu_to_be32(0); - str->rg_igeneration = cpu_to_be64(rg->rg_igeneration); - memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); -} - -void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) -{ - const struct gfs2_quota *str = buf; - - qu->qu_limit = be64_to_cpu(str->qu_limit); - qu->qu_warn = be64_to_cpu(str->qu_warn); - qu->qu_value = be64_to_cpu(str->qu_value); -} - -void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) -{ - const struct gfs2_dinode_host *di = &ip->i_di; - struct gfs2_dinode *str = buf; - - str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.__pad0 = 0; - str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_header.__pad1 = 0; - - gfs2_inum_out(&ip->i_num, &str->di_num); - - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(ip->i_inode.i_uid); - str->di_gid = cpu_to_be32(ip->i_inode.i_gid); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(di->di_size); - str->di_blocks = cpu_to_be64(di->di_blocks); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); - - str->di_goal_meta = cpu_to_be64(di->di_goal_meta); - str->di_goal_data = cpu_to_be64(di->di_goal_data); - str->di_generation = cpu_to_be64(di->di_generation); - - str->di_flags = cpu_to_be32(di->di_flags); - str->di_height = cpu_to_be16(di->di_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && - !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? - GFS2_FORMAT_DE : 0); - str->di_depth = cpu_to_be16(di->di_depth); - str->di_entries = cpu_to_be32(di->di_entries); - - str->di_eattr = cpu_to_be64(di->di_eattr); -} - -void gfs2_dinode_print(const struct gfs2_inode *ip) -{ - const struct gfs2_dinode_host *di = &ip->i_di; - - gfs2_inum_print(&ip->i_num); - - printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); - printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); - printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta); - printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); - - pv(di, di_flags, "0x%.8X"); - pv(di, di_height, "%u"); - - pv(di, di_depth, "%u"); - pv(di, di_entries, "%u"); - - printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr); -} - -void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) -{ - const struct gfs2_log_header *str = buf; - - gfs2_meta_header_in(&lh->lh_header, buf); - lh->lh_sequence = be64_to_cpu(str->lh_sequence); - lh->lh_flags = be32_to_cpu(str->lh_flags); - lh->lh_tail = be32_to_cpu(str->lh_tail); - lh->lh_blkno = be32_to_cpu(str->lh_blkno); - lh->lh_hash = be32_to_cpu(str->lh_hash); -} - -void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) -{ - const struct gfs2_inum_range *str = buf; - - ir->ir_start = be64_to_cpu(str->ir_start); - ir->ir_length = be64_to_cpu(str->ir_length); -} - -void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) -{ - struct gfs2_inum_range *str = buf; - - str->ir_start = cpu_to_be64(ir->ir_start); - str->ir_length = cpu_to_be64(ir->ir_length); -} - -void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) -{ - const struct gfs2_statfs_change *str = buf; - - sc->sc_total = be64_to_cpu(str->sc_total); - sc->sc_free = be64_to_cpu(str->sc_free); - sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); -} - -void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) -{ - struct gfs2_statfs_change *str = buf; - - str->sc_total = cpu_to_be64(sc->sc_total); - str->sc_free = cpu_to_be64(sc->sc_free); - str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); -} - -void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) -{ - const struct gfs2_quota_change *str = buf; - - qc->qc_change = be64_to_cpu(str->qc_change); - qc->qc_flags = be32_to_cpu(str->qc_flags); - qc->qc_id = be32_to_cpu(str->qc_id); -} - diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 30c15622174f..26c888890c24 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -32,6 +32,7 @@ #include "trans.h" #include "rgrp.h" #include "ops_file.h" +#include "super.h" #include "util.h" #include "glops.h" @@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, end = start + bsize; if (end <= from || start >= to) continue; + if (gfs2_is_jdata(ip)) + set_buffer_uptodate(bh); gfs2_trans_add_bh(ip->i_gl, bh, 0); } } @@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) return 0; /* don't care */ } - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) { + if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) && + PageChecked(page)) { + ClearPageChecked(page); error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); if (error) goto out_ignore; @@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) * so we need to supply one here. It doesn't happen often. */ if (unlikely(page->index)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); - SetPageUptodate(page); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); return 0; } @@ -450,6 +451,31 @@ out_uninit: } /** + * adjust_fs_space - Adjusts the free space available due to gfs2_grow + * @inode: the rindex inode + */ +static void adjust_fs_space(struct inode *inode) +{ + struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + u64 fs_total, new_free; + + /* Total up the file system space, according to the latest rindex. */ + fs_total = gfs2_ri_total(sdp); + + spin_lock(&sdp->sd_statfs_spin); + if (fs_total > (m_sc->sc_total + l_sc->sc_total)) + new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); + else + new_free = 0; + spin_unlock(&sdp->sd_statfs_spin); + fs_warn(sdp, "File system extended by %llu blocks.\n", + (unsigned long long)new_free); + gfs2_statfs_change(sdp, new_free, new_free, 0); +} + +/** * gfs2_commit_write - Commit write to a file * @file: The file to write to * @page: The page containing the data @@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page, di->di_size = cpu_to_be64(inode->i_size); } + if (inode == sdp->sd_rindex) + adjust_fs_space(inode); + brelse(dibh); gfs2_trans_end(sdp); if (al->al_requested) { @@ -543,6 +572,23 @@ fail_nounlock: } /** + * gfs2_set_page_dirty - Page dirtying function + * @page: The page to dirty + * + * Returns: 1 if it dirtyed the page, or 0 otherwise + */ + +static int gfs2_set_page_dirty(struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + SetPageChecked(page); + return __set_page_dirty_buffers(page); +} + +/** * gfs2_bmap - Block map function * @mapping: Address space info * @lblock: The block to map @@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) if (bd) { bd->bd_bh = NULL; bh->b_private = NULL; + if (!bd->bd_ail && list_empty(&bd->bd_le.le_list)) + kmem_cache_free(gfs2_bufdata_cachep, bd); } gfs2_log_unlock(sdp); @@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) unsigned int curr_off = 0; BUG_ON(!PageLocked(page)); + if (offset == 0) + ClearPageChecked(page); if (!page_has_buffers(page)) return; @@ -728,8 +778,8 @@ static unsigned limit = 0; return; fs_warn(sdp, "ip = %llu %llu\n", - (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr); + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr); for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) fs_warn(sdp, "ip->i_cache[%u] = %s\n", @@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = { .sync_page = block_sync_page, .prepare_write = gfs2_prepare_write, .commit_write = gfs2_commit_write, + .set_page_dirty = gfs2_set_page_dirty, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index 35aaee4aa7e1..fa1b5b3d28b9 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index a6fdc52f554a..793e334d098e 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -21,6 +21,7 @@ #include "glock.h" #include "ops_dentry.h" #include "util.h" +#include "inode.h" /** * gfs2_drevalidate - Check directory lookup consistency @@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) struct gfs2_inode *dip = GFS2_I(parent->d_inode); struct inode *inode = dentry->d_inode; struct gfs2_holder d_gh; - struct gfs2_inode *ip; - struct gfs2_inum_host inum; - unsigned int type; + struct gfs2_inode *ip = NULL; int error; int had_lock=0; - if (inode && is_bad_inode(inode)) - goto invalid; + if (inode) { + if (is_bad_inode(inode)) + goto invalid; + ip = GFS2_I(inode); + } if (sdp->sd_args.ar_localcaching) goto valid; @@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) goto fail; } - error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); + error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip); switch (error) { case 0: if (!inode) @@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) goto fail_gunlock; } - ip = GFS2_I(inode); - - if (!gfs2_inum_equal(&ip->i_num, &inum)) - goto invalid_gunlock; - - if (IF2DT(ip->i_inode.i_mode) != type) { - gfs2_consist_inode(dip); - goto fail_gunlock; - } - valid_gunlock: if (!had_lock) gfs2_glock_dq_uninit(&d_gh); diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index aad918337a46..99ea5659bc2c 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -22,10 +22,14 @@ #include "glops.h" #include "inode.h" #include "ops_dentry.h" -#include "ops_export.h" +#include "ops_fstype.h" #include "rgrp.h" #include "util.h" +#define GFS2_SMALL_FH_SIZE 4 +#define GFS2_LARGE_FH_SIZE 8 +#define GFS2_OLD_FH_SIZE 10 + static struct dentry *gfs2_decode_fh(struct super_block *sb, __u32 *p, int fh_len, @@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb, void *context) { __be32 *fh = (__force __be32 *)p; - struct gfs2_fh_obj fh_obj; - struct gfs2_inum_host *this, parent; + struct gfs2_inum_host inum, parent; - this = &fh_obj.this; - fh_obj.imode = DT_UNKNOWN; memset(&parent, 0, sizeof(struct gfs2_inum)); switch (fh_len) { case GFS2_LARGE_FH_SIZE: + case GFS2_OLD_FH_SIZE: parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; parent.no_formal_ino |= be32_to_cpu(fh[5]); parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; parent.no_addr |= be32_to_cpu(fh[7]); - fh_obj.imode = be32_to_cpu(fh[8]); case GFS2_SMALL_FH_SIZE: - this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; - this->no_formal_ino |= be32_to_cpu(fh[1]); - this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32; - this->no_addr |= be32_to_cpu(fh[3]); + inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; + inum.no_formal_ino |= be32_to_cpu(fh[1]); + inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; + inum.no_addr |= be32_to_cpu(fh[3]); break; default: return NULL; } - return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent, + return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent, acceptable, context); } @@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, (connectable && *len < GFS2_LARGE_FH_SIZE)) return 255; - fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); - fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); - fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32); - fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); + fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); + fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); + fh[2] = cpu_to_be32(ip->i_no_addr >> 32); + fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; if (!connectable || inode == sb->s_root->d_inode) @@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, igrab(inode); spin_unlock(&dentry->d_lock); - fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); - fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); - fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32); - fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); - - fh[8] = cpu_to_be32(inode->i_mode); - fh[9] = 0; /* pad to double word */ + fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); + fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); + fh[6] = cpu_to_be32(ip->i_no_addr >> 32); + fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_LARGE_FH_SIZE; iput(inode); @@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name, ip = GFS2_I(inode); *name = 0; - gnfd.inum = ip->i_num; + gnfd.inum.no_addr = ip->i_no_addr; + gnfd.inum.no_formal_ino = ip->i_no_formal_ino; gnfd.name = name; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); @@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) { struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; - struct gfs2_inum_host *inum = &fh_obj->this; + struct gfs2_inum_host *inum = inum_obj; struct gfs2_holder i_gh, ri_gh, rgd_gh; struct gfs2_rgrpd *rgd; struct inode *inode; @@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) /* System files? */ - inode = gfs2_ilookup(sb, inum); + inode = gfs2_ilookup(sb, inum->no_addr); if (inode) { - if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) { + if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { iput(inode); return ERR_PTR(-ESTALE); } @@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) gfs2_glock_dq_uninit(&rgd_gh); gfs2_glock_dq_uninit(&ri_gh); - inode = gfs2_inode_lookup(sb, inum, fh_obj->imode); + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, + inum->no_addr, + 0); if (!inode) goto fail; if (IS_ERR(inode)) { @@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) goto fail; } + /* Pick up the works we bypass in gfs2_inode_lookup */ + if (inode->i_state & I_NEW) + gfs2_set_iop(inode); + + if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { + iput(inode); + goto fail; + } + error = -EIO; if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) { iput(inode); diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h deleted file mode 100644 index f925a955b3b8..000000000000 --- a/fs/gfs2/ops_export.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_EXPORT_DOT_H__ -#define __OPS_EXPORT_DOT_H__ - -#define GFS2_SMALL_FH_SIZE 4 -#define GFS2_LARGE_FH_SIZE 10 - -extern struct export_operations gfs2_export_ops; -struct gfs2_fh_obj { - struct gfs2_inum_host this; - __u32 imode; -}; - -#endif /* __OPS_EXPORT_DOT_H__ */ diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 064df8804582..196d83266e34 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockname name = - { .ln_number = ip->i_num.no_addr, + { .ln_number = ip->i_no_addr, .ln_type = LM_TYPE_PLOCK }; if (!(fl->fl_flags & FL_POSIX)) @@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) gfs2_glock_dq_uninit(fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), - ip->i_num.no_addr, &gfs2_flock_glops, + ip->i_no_addr, &gfs2_flock_glops, CREATE, &gl); if (error) goto out; @@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = { .release = gfs2_close, .fsync = gfs2_fsync, .lock = gfs2_lock, - .sendfile = generic_file_sendfile, .flock = gfs2_flock, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 2c5f8e7def0d..cf5aa5050548 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -27,7 +27,6 @@ #include "inode.h" #include "lm.h" #include "mount.h" -#include "ops_export.h" #include "ops_fstype.h" #include "ops_super.h" #include "recovery.h" @@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime) sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_export_op = &gfs2_export_ops; + sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) @@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime) static int init_names(struct gfs2_sbd *sdp, int silent) { - struct page *page; char *proto, *table; int error = 0; @@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent) /* Try to autodetect */ if (!proto[0] || !table[0]) { - struct gfs2_sb *sb; - page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); - if (!page) - return -ENOBUFS; - sb = kmap(page); - gfs2_sb_in(&sdp->sd_sb, sb); - kunmap(page); - __free_page(page); + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + if (error) + return error; error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); if (error) @@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent) snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); + while ((table = strchr(sdp->sd_table_name, '/'))) + *table = '_'; + out: return error; } @@ -236,17 +233,17 @@ fail: return error; } -static struct inode *gfs2_lookup_root(struct super_block *sb, - struct gfs2_inum_host *inum) +static inline struct inode *gfs2_lookup_root(struct super_block *sb, + u64 no_addr) { - return gfs2_inode_lookup(sb, inum, DT_DIR); + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); } static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder sb_gh; - struct gfs2_inum_host *inum; + u64 no_addr; struct inode *inode; int error = 0; @@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); /* Get the root inode */ - inum = &sdp->sd_sb.sb_root_dir; + no_addr = sdp->sd_sb.sb_root_dir.no_addr; if (sb->s_type == &gfs2meta_fs_type) - inum = &sdp->sd_sb.sb_master_dir; - inode = gfs2_lookup_root(sb, inum); + no_addr = sdp->sd_sb.sb_master_dir.no_addr; + inode = gfs2_lookup_root(sb, no_addr); if (IS_ERR(inode)) { error = PTR_ERR(inode); fs_err(sdp, "can't read in root inode: %d\n", error); @@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_qinode; - inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir); + inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); if (IS_ERR(inode)) { error = PTR_ERR(inode); fs_err(sdp, "can't read in master directory: %d\n", error); diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h index 7cc2c296271b..407029b3b2b3 100644 --- a/fs/gfs2/ops_fstype.h +++ b/fs/gfs2/ops_fstype.h @@ -14,5 +14,6 @@ extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; +extern struct export_operations gfs2_export_ops; #endif /* __OPS_FSTYPE_DOT_H__ */ diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index d85f6e05cb95..911c115b5c6c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_gunlock; - error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL); + error = gfs2_dir_check(dir, &dentry->d_name, NULL); switch (error) { case -ENOENT: break; @@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - al->al_rgd->rd_ri.ri_length + + al->al_rgd->rd_length + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_ipres; } - error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, - IF2DT(inode->i_mode)); + error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode)); if (error) goto out_end_trans; @@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); @@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); - gfs2_inum_out(&dip->i_num, &dent->de_inum); + gfs2_inum_out(dip, dent); dent->de_type = cpu_to_be16(DT_DIR); gfs2_dinode_out(ip, di); @@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); error = gfs2_glock_nq_m(3, ghs); @@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, * this is the case of the target file already existing * so we unlink before doing the rename */ - nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); + nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); if (nrgd) gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } @@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_gunlock; - error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL); + error = gfs2_dir_check(ndir, &ndentry->d_name, NULL); switch (error) { case -ENOENT: error = 0; @@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - al->al_rgd->rd_ri.ri_length + + al->al_rgd->rd_length + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) @@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); + error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); if (error) goto out_end_trans; } else { @@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - ip->i_inode.i_ctime = CURRENT_TIME_SEC; + ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, - IF2DT(ip->i_inode.i_mode)); + error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode)); if (error) goto out_end_trans; @@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr) } error = gfs2_truncatei(ip, attr->ia_size); - if (error) - return error; + if (error && (inode->i_size != ip->i_di.di_size)) + i_size_write(inode, ip->i_di.di_size); return error; } diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 485ce3d49923..603d940f1159 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode) gfs2_glock_schedule_for_reclaim(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) + if (ip->i_iopen_gh.gh_gl) { + ip->i_iopen_gh.gh_gl->gl_object = NULL; gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } } } @@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode) if (!inode->i_private) goto out; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; } - gfs2_glock_dq(&ip->i_iopen_gh); + gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); error = gfs2_glock_nq(&ip->i_iopen_gh); if (error) diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index aa0dbd2aac1b..404b7cc9f8c4 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) if (error) goto out_gunlock_q; - error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length + + error = gfs2_trans_begin(sdp, al->al_rgd->rd_length + ind_blocks + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c186857e48a8..6e546ee8f3d4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -66,6 +66,18 @@ #define QUOTA_USER 1 #define QUOTA_GROUP 0 +struct gfs2_quota_host { + u64 qu_limit; + u64 qu_warn; + s64 qu_value; +}; + +struct gfs2_quota_change_host { + u64 qc_change; + u32 qc_flags; /* GFS2_QCF_... */ + u32 qc_id; +}; + static u64 qd2offset(struct gfs2_quota_data *qd) { u64 offset; @@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) mutex_unlock(&sdp->sd_quota_mutex); } +static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) +{ + const struct gfs2_quota *str = buf; + + qu->qu_limit = be64_to_cpu(str->qu_limit); + qu->qu_warn = be64_to_cpu(str->qu_warn); + qu->qu_value = be64_to_cpu(str->qu_value); +} + +static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) +{ + struct gfs2_quota *str = buf; + + str->qu_limit = cpu_to_be64(qu->qu_limit); + str->qu_warn = cpu_to_be64(qu->qu_warn); + str->qu_value = cpu_to_be64(qu->qu_value); + memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); +} + /** * gfs2_adjust_quota * @@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct inode *inode = &ip->i_inode; struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; - unsigned offset = loc & (PAGE_CACHE_SHIFT - 1); + unsigned offset = loc & (PAGE_CACHE_SIZE - 1); unsigned blocksize, iblock, pos; struct buffer_head *bh; struct page *page; void *kaddr; - __be64 *ptr; + char *ptr; + struct gfs2_quota_host qp; s64 value; int err = -EIO; @@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, kaddr = kmap_atomic(page, KM_USER0); ptr = kaddr + offset; - value = (s64)be64_to_cpu(*ptr) + change; - *ptr = cpu_to_be64(value); + gfs2_quota_in(&qp, ptr); + qp.qu_value += change; + value = qp.qu_value; + gfs2_quota_out(&qp, ptr); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); err = 0; qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); qd->qd_qb.qb_value = cpu_to_be64(value); + ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); + ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); unlock: unlock_page(page); page_cache_release(page); @@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) goto out_alloc; error = gfs2_trans_begin(sdp, - al->al_rgd->rd_ri.ri_length + + al->al_rgd->rd_length + num_qd * data_blocks + nalloc * ind_blocks + RES_DINODE + num_qd + @@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) offset = qd2offset(qd); error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, (struct gfs2_quota_data *) - qd->qd_gl->gl_lvb); + qd); if (error) goto out_end_trans; @@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) return error; } +static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) +{ + const struct gfs2_quota_change *str = buf; + + qc->qc_change = be64_to_cpu(str->qc_change); + qc->qc_flags = be32_to_cpu(str->qc_flags); + qc->qc_id = be32_to_cpu(str->qc_id); +} + int gfs2_quota_init(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 8bc182c7e2ef..5ada38c99a2c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp) } } +static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) +{ + const struct gfs2_log_header *str = buf; + + if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) || + str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH)) + return 1; + + lh->lh_sequence = be64_to_cpu(str->lh_sequence); + lh->lh_flags = be32_to_cpu(str->lh_flags); + lh->lh_tail = be32_to_cpu(str->lh_tail); + lh->lh_blkno = be32_to_cpu(str->lh_blkno); + lh->lh_hash = be32_to_cpu(str->lh_hash); + return 0; +} + /** * get_log_header - read the log header for a given segment * @jd: the journal @@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, sizeof(u32)); hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); hash ^= (u32)~0; - gfs2_log_header_in(&lh, bh->b_data); + error = gfs2_log_header_in(&lh, bh->b_data); brelse(bh); - if (lh.lh_header.mh_magic != GFS2_MAGIC || - lh.lh_header.mh_type != GFS2_METATYPE_LH || - lh.lh_blkno != blk || lh.lh_hash != hash) + if (error || lh.lh_blkno != blk || lh.lh_hash != hash) return 1; *head = lh; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1727f5012efe..e4e040625153 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -28,6 +28,7 @@ #include "ops_file.h" #include "util.h" #include "log.h" +#include "inode.h" #define BFITNOENT ((u32)~0) @@ -50,6 +51,9 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; +static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, + unsigned char old_state, unsigned char new_state); + /** * gfs2_setbit - Set a bit in the bitmaps * @buffer: the buffer that holds the bitmaps @@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_bitmap *bi = NULL; - u32 length = rgd->rd_ri.ri_length; + u32 length = rgd->rd_length; u32 count[4], tmp; int buf, x; @@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) return; } - tmp = rgd->rd_ri.ri_data - + tmp = rgd->rd_data - rgd->rd_rg.rg_free - rgd->rd_rg.rg_dinodes; if (count[1] + count[2] != tmp) { @@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } -static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block) +static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) { - u64 first = ri->ri_data0; - u64 last = first + ri->ri_data; + u64 first = rgd->rd_data0; + u64 last = first + rgd->rd_data; return first <= block && block < last; } @@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) spin_lock(&sdp->sd_rindex_spin); list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { - if (rgrp_contains_block(&rgd->rd_ri, blk)) { + if (rgrp_contains_block(rgd, blk)) { list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); spin_unlock(&sdp->sd_rindex_spin); return rgd; @@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) mutex_unlock(&sdp->sd_rindex_mutex); } +static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) +{ + printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); + printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); + printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); + printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); + printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); +} + /** * gfs2_compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor @@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_bitmap *bi; - u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */ + u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ u32 bytes_left, bytes; int x; @@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) if (!rgd->rd_bits) return -ENOMEM; - bytes_left = rgd->rd_ri.ri_bitbytes; + bytes_left = rgd->rd_bitbytes; for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; @@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } else if (x + 1 == length) { bytes = bytes_left; bi->bi_offset = sizeof(struct gfs2_meta_header); - bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; /* other blocks */ } else { bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); bi->bi_offset = sizeof(struct gfs2_meta_header); - bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; } @@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) return -EIO; } bi = rgd->rd_bits + (length - 1); - if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) { + if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { if (gfs2_consist_rgrpd(rgd)) { - gfs2_rindex_print(&rgd->rd_ri); + gfs2_rindex_print(rgd); fs_err(sdp, "start=%u len=%u offset=%u\n", bi->bi_start, bi->bi_len, bi->bi_offset); } @@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } /** - * gfs2_ri_update - Pull in a new resource index from the disk + * gfs2_ri_total - Total up the file system space, according to the rindex. + * + */ +u64 gfs2_ri_total(struct gfs2_sbd *sdp) +{ + u64 total_data = 0; + struct inode *inode = sdp->sd_rindex; + struct gfs2_inode *ip = GFS2_I(inode); + char buf[sizeof(struct gfs2_rindex)]; + struct file_ra_state ra_state; + int error, rgrps; + + mutex_lock(&sdp->sd_rindex_mutex); + file_ra_state_init(&ra_state, inode->i_mapping); + for (rgrps = 0;; rgrps++) { + loff_t pos = rgrps * sizeof(struct gfs2_rindex); + + if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size) + break; + error = gfs2_internal_read(ip, &ra_state, buf, &pos, + sizeof(struct gfs2_rindex)); + if (error != sizeof(struct gfs2_rindex)) + break; + total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); + } + mutex_unlock(&sdp->sd_rindex_mutex); + return total_data; +} + +static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf) +{ + const struct gfs2_rindex *str = buf; + + rgd->rd_addr = be64_to_cpu(str->ri_addr); + rgd->rd_length = be32_to_cpu(str->ri_length); + rgd->rd_data0 = be64_to_cpu(str->ri_data0); + rgd->rd_data = be32_to_cpu(str->ri_data); + rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes); +} + +/** + * read_rindex_entry - Pull in a new resource index entry from the disk * @gl: The glock covering the rindex inode * + * Returns: 0 on success, error code otherwise + */ + +static int read_rindex_entry(struct gfs2_inode *ip, + struct file_ra_state *ra_state) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); + char buf[sizeof(struct gfs2_rindex)]; + int error; + struct gfs2_rgrpd *rgd; + + error = gfs2_internal_read(ip, ra_state, buf, &pos, + sizeof(struct gfs2_rindex)); + if (!error) + return 0; + if (error != sizeof(struct gfs2_rindex)) { + if (error > 0) + error = -EIO; + return error; + } + + rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); + error = -ENOMEM; + if (!rgd) + return error; + + mutex_init(&rgd->rd_mutex); + lops_init_le(&rgd->rd_le, &gfs2_rg_lops); + rgd->rd_sbd = sdp; + + list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); + list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); + + gfs2_rindex_in(rgd, buf); + error = compute_bitstructs(rgd); + if (error) + return error; + + error = gfs2_glock_get(sdp, rgd->rd_addr, + &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); + if (error) + return error; + + rgd->rd_gl->gl_object = rgd; + rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; + rgd->rd_flags |= GFS2_RDF_CHECK; + return error; +} + +/** + * gfs2_ri_update - Pull in a new resource index from the disk + * @ip: pointer to the rindex inode + * * Returns: 0 on successful update, error code otherwise */ @@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; - struct gfs2_rgrpd *rgd; - char buf[sizeof(struct gfs2_rindex)]; struct file_ra_state ra_state; - u64 junk = ip->i_di.di_size; + u64 rgrp_count = ip->i_di.di_size; int error; - if (do_div(junk, sizeof(struct gfs2_rindex))) { + if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) { gfs2_consist_inode(ip); return -EIO; } @@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip) clear_rgrpdi(sdp); file_ra_state_init(&ra_state, inode->i_mapping); - for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { - loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); - error = gfs2_internal_read(ip, &ra_state, buf, &pos, - sizeof(struct gfs2_rindex)); - if (!error) - break; - if (error != sizeof(struct gfs2_rindex)) { - if (error > 0) - error = -EIO; - goto fail; + for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) { + error = read_rindex_entry(ip, &ra_state); + if (error) { + clear_rgrpdi(sdp); + return error; } + } - rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); - error = -ENOMEM; - if (!rgd) - goto fail; - - mutex_init(&rgd->rd_mutex); - lops_init_le(&rgd->rd_le, &gfs2_rg_lops); - rgd->rd_sbd = sdp; - - list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); - list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); - - gfs2_rindex_in(&rgd->rd_ri, buf); - error = compute_bitstructs(rgd); - if (error) - goto fail; + sdp->sd_rindex_vn = ip->i_gl->gl_vn; + return 0; +} - error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr, - &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); - if (error) - goto fail; +/** + * gfs2_ri_update_special - Pull in a new resource index from the disk + * + * This is a special version that's safe to call from gfs2_inplace_reserve_i. + * In this case we know that we don't have any resource groups in memory yet. + * + * @ip: pointer to the rindex inode + * + * Returns: 0 on successful update, error code otherwise + */ +static int gfs2_ri_update_special(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; + struct file_ra_state ra_state; + int error; - rgd->rd_gl->gl_object = rgd; - rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; + file_ra_state_init(&ra_state, inode->i_mapping); + for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { + /* Ignore partials */ + if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > + ip->i_di.di_size) + break; + error = read_rindex_entry(ip, &ra_state); + if (error) { + clear_rgrpdi(sdp); + return error; + } } sdp->sd_rindex_vn = ip->i_gl->gl_vn; return 0; - -fail: - clear_rgrpdi(sdp); - return error; } /** @@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) return error; } +static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rg->rg_flags = be32_to_cpu(str->rg_flags); + rg->rg_free = be32_to_cpu(str->rg_free); + rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); + rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); +} + +static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) +{ + struct gfs2_rgrp *str = buf; + + str->rg_flags = cpu_to_be32(rg->rg_flags); + str->rg_free = cpu_to_be32(rg->rg_free); + str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); + str->__pad = cpu_to_be32(0); + str->rg_igeneration = cpu_to_be64(rg->rg_igeneration); + memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); +} + /** * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps * @rgd: the struct gfs2_rgrpd describing the RG to read in @@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; - unsigned int length = rgd->rd_ri.ri_length; + unsigned int length = rgd->rd_length; struct gfs2_bitmap *bi; unsigned int x, y; int error; @@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; - error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh); + error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); if (error) goto fail; } @@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; - int x, length = rgd->rd_ri.ri_length; + int x, length = rgd->rd_length; spin_lock(&sdp->sd_rindex_spin); gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); @@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; - unsigned int length = rgd->rd_ri.ri_length; + unsigned int length = rgd->rd_length; unsigned int x; for (x = 0; x < length; x++) { @@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) } /** + * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes + * @rgd: The rgrp + * + * Returns: The inode, if one has been found + */ + +static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) +{ + struct inode *inode; + u32 goal = 0; + u64 no_addr; + + for(;;) { + goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, + GFS2_BLKST_UNLINKED); + if (goal == 0) + return 0; + no_addr = goal + rgd->rd_data0; + if (no_addr <= *last_unlinked) + continue; + *last_unlinked = no_addr; + inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, + no_addr, -1); + if (!IS_ERR(inode)) + return inode; + } + + rgd->rd_flags &= ~GFS2_RDF_CHECK; + return NULL; +} + +/** * recent_rgrp_first - get first RG from "recent" list * @sdp: The GFS2 superblock * @rglast: address of the rgrp used last @@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, goto first; list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { - if (rgd->rd_ri.ri_addr == rglast) + if (rgd->rd_addr == rglast) goto out; } @@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) * Returns: errno */ -static int get_local_rgrp(struct gfs2_inode *ip) +static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) { + struct inode *inode = NULL; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; struct gfs2_alloc *al = &ip->i_alloc; @@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip) case 0: if (try_rgrp_fit(rgd, al)) goto out; + if (rgd->rd_flags & GFS2_RDF_CHECK) + inode = try_rgrp_unlink(rgd, last_unlinked); gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (inode) + return inode; rgd = recent_rgrp_next(rgd, 1); break; @@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip) break; default: - return error; + return ERR_PTR(error); } } @@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip) case 0: if (try_rgrp_fit(rgd, al)) goto out; + if (rgd->rd_flags & GFS2_RDF_CHECK) + inode = try_rgrp_unlink(rgd, last_unlinked); gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (inode) + return inode; break; case GLR_TRYFAILED: @@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip) break; default: - return error; + return ERR_PTR(error); } rgd = gfs2_rgrpd_get_next(rgd); @@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip) if (rgd == begin) { if (++loops >= 3) - return -ENOSPC; + return ERR_PTR(-ENOSPC); if (!skipped) loops++; flags = 0; @@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip) } out: - ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; + ip->i_last_rg_alloc = rgd->rd_addr; if (begin) { recent_rgrp_add(rgd); @@ -964,7 +1133,7 @@ out: forward_rgrp_set(sdp, rgd); } - return 0; + return NULL; } /** @@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_alloc *al = &ip->i_alloc; - int error; + struct inode *inode; + int error = 0; + u64 last_unlinked = 0; if (gfs2_assert_warn(sdp, al->al_requested)) return -EINVAL; - error = gfs2_rindex_hold(sdp, &al->al_ri_gh); +try_again: + /* We need to hold the rindex unless the inode we're using is + the rindex itself, in which case it's already held. */ + if (ip != GFS2_I(sdp->sd_rindex)) + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ + error = gfs2_ri_update_special(ip); + if (error) return error; - error = get_local_rgrp(ip); - if (error) { - gfs2_glock_dq_uninit(&al->al_ri_gh); - return error; + inode = get_local_rgrp(ip, &last_unlinked); + if (inode) { + if (ip != GFS2_I(sdp->sd_rindex)) + gfs2_glock_dq_uninit(&al->al_ri_gh); + if (IS_ERR(inode)) + return PTR_ERR(inode); + iput(inode); + gfs2_log_flush(sdp, NULL); + goto try_again; } al->al_file = file; @@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) al->al_rgd = NULL; gfs2_glock_dq_uninit(&al->al_rgd_gh); - gfs2_glock_dq_uninit(&al->al_ri_gh); + if (ip != GFS2_I(sdp->sd_rindex)) + gfs2_glock_dq_uninit(&al->al_ri_gh); } /** @@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) unsigned int buf; unsigned char type; - length = rgd->rd_ri.ri_length; - rgrp_block = block - rgd->rd_ri.ri_data0; + length = rgd->rd_length; + rgrp_block = block - rgd->rd_data0; for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; @@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) */ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, unsigned char new_state) + unsigned char old_state, unsigned char new_state) { struct gfs2_bitmap *bi = NULL; - u32 length = rgd->rd_ri.ri_length; + u32 length = rgd->rd_length; u32 blk = 0; unsigned int buf, x; @@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, goal = 0; } - if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length)) - blk = 0; + if (old_state != new_state) { + gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT); - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, blk, new_state); - if (bi->bi_clone) - gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len, blk, new_state); + if (bi->bi_clone) + gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, + bi->bi_len, blk, new_state); + } - return bi->bi_start * GFS2_NBBY + blk; + return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk; } /** @@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, return NULL; } - length = rgd->rd_ri.ri_length; + length = rgd->rd_length; - rgrp_blk = bstart - rgd->rd_ri.ri_data0; + rgrp_blk = bstart - rgd->rd_data0; while (blen--) { for (buf = 0; buf < length; buf++) { @@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) u32 goal, blk; u64 block; - if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data)) - goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0; + if (rgrp_contains_block(rgd, ip->i_di.di_goal_data)) + goal = ip->i_di.di_goal_data - rgd->rd_data0; else goal = rgd->rd_last_alloc_data; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); rgd->rd_last_alloc_data = blk; - block = rgd->rd_ri.ri_data0 + blk; + block = rgd->rd_data0 + blk; ip->i_di.di_goal_data = block; gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); @@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) u32 goal, blk; u64 block; - if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta)) - goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0; + if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta)) + goal = ip->i_di.di_goal_meta - rgd->rd_data0; else goal = rgd->rd_last_alloc_meta; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); rgd->rd_last_alloc_meta = blk; - block = rgd->rd_ri.ri_data0 + blk; + block = rgd->rd_data0 + blk; ip->i_di.di_goal_meta = block; gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); @@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) rgd->rd_last_alloc_meta = blk; - block = rgd->rd_ri.ri_data0 + blk; + block = rgd->rd_data0 + blk; gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); rgd->rd_rg.rg_free--; @@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_rgrpd *rgd; - u64 blkno = ip->i_num.no_addr; + u64 blkno = ip->i_no_addr; rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); if (!rgd) @@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - gfs2_free_uninit_di(rgd, ip->i_num.no_addr); + gfs2_free_uninit_di(rgd, ip->i_no_addr); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); - gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); + gfs2_meta_wipe(ip, ip->i_no_addr, 1); } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b01e0cfc99b5..b4c6adfc6f2e 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, int flags); void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +u64 gfs2_ri_total(struct gfs2_sbd *sdp); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4fdda974dc83..f916b9740c75 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) { unsigned int x; - if (sb->sb_header.mh_magic != GFS2_MAGIC || - sb->sb_header.mh_type != GFS2_METATYPE_SB) { + if (sb->sb_magic != GFS2_MAGIC || + sb->sb_type != GFS2_METATYPE_SB) { if (!silent) printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); return -EINVAL; @@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error) return 0; } +static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) +{ + const struct gfs2_sb *str = buf; + + sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); + sb->sb_type = be32_to_cpu(str->sb_header.mh_type); + sb->sb_format = be32_to_cpu(str->sb_header.mh_format); + sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); + sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); + sb->sb_bsize = be32_to_cpu(str->sb_bsize); + sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); + sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); + sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); + sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); + sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); + + memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); + memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); +} + /** * gfs2_read_super - Read the gfs2 super block from disk - * @sb: The VFS super block + * @sdp: The GFS2 super block * @sector: The location of the super block + * @error: The error code to return * * This uses the bio functions to read the super block from disk * because we want to be 100% sure that we never read cached data. @@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error) * the master directory (contains pointers to journals etc) and the * root directory. * - * Returns: A page containing the sb or NULL + * Returns: 0 on success or error */ -struct page *gfs2_read_super(struct super_block *sb, sector_t sector) +int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) { + struct super_block *sb = sdp->sd_vfs; + struct gfs2_sb *p; struct page *page; struct bio *bio; page = alloc_page(GFP_KERNEL); if (unlikely(!page)) - return NULL; + return -ENOBUFS; ClearPageUptodate(page); ClearPageDirty(page); @@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector) bio = bio_alloc(GFP_KERNEL, 1); if (unlikely(!bio)) { __free_page(page); - return NULL; + return -ENOBUFS; } bio->bi_sector = sector * (sb->s_blocksize >> 9); @@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector) bio_put(bio); if (!PageUptodate(page)) { __free_page(page); - return NULL; + return -EIO; } - return page; + p = kmap(page); + gfs2_sb_in(&sdp->sd_sb, p); + kunmap(page); + __free_page(page); + return 0; } /** @@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) u32 tmp_blocks; unsigned int x; int error; - struct page *page; - char *sb; - page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); - if (!page) { + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + if (error) { if (!silent) fs_err(sdp, "can't read superblock\n"); - return -EIO; + return error; } - sb = kmap(page); - gfs2_sb_in(&sdp->sd_sb, sb); - kunmap(page); - __free_page(page); error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); if (error) @@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) name.len = sprintf(buf, "journal%u", sdp->sd_journals); name.hash = gfs2_disk_hash(name.name, name.len); - error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); + error = gfs2_dir_check(sdp->sd_jindex, &name, NULL); if (error == -ENOENT) { error = 0; break; @@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) return error; } +static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) +{ + const struct gfs2_statfs_change *str = buf; + + sc->sc_total = be64_to_cpu(str->sc_total); + sc->sc_free = be64_to_cpu(str->sc_free); + sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); +} + +static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) +{ + struct gfs2_statfs_change *str = buf; + + str->sc_total = cpu_to_be64(sc->sc_total); + str->sc_free = cpu_to_be64(sc->sc_free); + str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); +} + int gfs2_statfs_init(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, struct gfs2_statfs_change_host *sc) { gfs2_rgrp_verify(rgd); - sc->sc_total += rgd->rd_ri.ri_data; + sc->sc_total += rgd->rd_data; sc->sc_free += rgd->rd_rg.rg_free; sc->sc_dinodes += rgd->rd_rg.rg_dinodes; return 0; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index e590b2df11dc..60a870e430be 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt); int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); -struct page *gfs2_read_super(struct super_block *sb, sector_t sector); +int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 601eaa1b9ed6..424a0774eda8 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, "GFS2: fsid=%s: inode = %llu %llu\n" "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr, + sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, sdp->sd_fsname, function, file, line); return rv; } @@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, "GFS2: fsid=%s: RG = %llu\n" "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr, + sdp->sd_fsname, (unsigned long long)rgd->rd_addr, sdp->sd_fsname, function, file, line); return rv; } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9a934db0bd8a..bc835f272a6e 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .fsync = file_fsync, .open = hfs_file_open, .release = hfs_file_release, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 45dab5d6cc10..409ce5429c91 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .fsync = file_fsync, .open = hfsplus_file_open, .release = hfsplus_file_release, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 8286491dbf31..c77862032e84 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) static const struct file_operations hostfs_file_fops = { .llseek = generic_file_llseek, .read = do_sync_read, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .write = do_sync_write, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index b4eafc0f1e54..5b53e5c5d8df 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops = .mmap = generic_file_mmap, .release = hpfs_file_release, .fsync = hpfs_file_fsync, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations hpfs_file_iops = diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index aa083dd34e92..e6b46b3ac2fe 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -736,15 +736,13 @@ static int can_do_hugetlb_shm(void) can_do_mlock()); } -struct file *hugetlb_zero_setup(size_t size) +struct file *hugetlb_file_setup(const char *name, size_t size) { int error = -ENOMEM; struct file *file; struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; - char buf[16]; - static atomic_t counter; if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); @@ -756,8 +754,7 @@ struct file *hugetlb_zero_setup(size_t size) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; - snprintf(buf, 16, "%u", atomic_inc_return(&counter)); - quick_string.name = buf; + quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; dentry = d_alloc(root, &quick_string); diff --git a/fs/ioctl.c b/fs/ioctl.c index 479c1038ed4a..8c90cbc903fa 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/module.h> +#include <linux/kallsyms.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -20,6 +21,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; + void *f; if (!filp->f_op) goto out; @@ -29,10 +31,16 @@ static long do_ioctl(struct file *filp, unsigned int cmd, if (error == -ENOIOCTLCMD) error = -EINVAL; goto out; - } else if (filp->f_op->ioctl) { + } else if ((f = filp->f_op->ioctl)) { lock_kernel(); - error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, - filp, cmd, arg); + if (!filp->f_op->ioctl) { + printk("%s: ioctl %p disappeared\n", __FUNCTION__, f); + print_symbol("symbol: %s\n", (unsigned long)f); + dump_stack(); + } else { + error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, + filp, cmd, arg); + } unlock_kernel(); } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 99871279a1ed..c2530197be0c 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations = .ioctl = jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, - .sendfile = generic_file_sendfile + .splice_read = generic_file_splice_read, }; /* jffs2_file_inode_operations */ diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 4884d5edfe65..7b363786c2d2 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -210,8 +210,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info * * offset, and the one with the smallest length will come first in the * ordering. * - * Returns 0 if the node was inserted - * 1 if the node is obsolete (because we can't mark it so yet) + * Returns 0 if the node was handled (including marking it obsolete) * < 0 an if error occurred */ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, @@ -229,9 +228,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, check anyway. */ if (!tn->fn->size) { if (rii->mdata_tn) { - /* We had a candidate mdata node already */ - dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version); - jffs2_kill_tn(c, rii->mdata_tn); + if (rii->mdata_tn->version < tn->version) { + /* We had a candidate mdata node already */ + dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version); + jffs2_kill_tn(c, rii->mdata_tn); + } else { + dbg_readinode("kill new mdata with ver %d (older than existing %d\n", + tn->version, rii->mdata_tn->version); + jffs2_kill_tn(c, tn); + return 0; + } } rii->mdata_tn = tn; dbg_readinode("keep new mdata with ver %d\n", tn->version); @@ -565,8 +571,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r * Helper function for jffs2_get_inode_nodes(). * It is called every time an directory entry node is found. * - * Returns: 0 on succes; - * 1 if the node should be marked obsolete; + * Returns: 0 on success; * negative error code on failure. */ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, @@ -673,8 +678,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r * Helper function for jffs2_get_inode_nodes(). * It is called every time an inode node is found. * - * Returns: 0 on success; - * 1 if the node should be marked obsolete; + * Returns: 0 on success (possibly after marking a bad node obsolete); * negative error code on failure. */ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, @@ -683,7 +687,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref { struct jffs2_tmp_dnode_info *tn; uint32_t len, csize; - int ret = 1; + int ret = 0; uint32_t crc; /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ @@ -712,8 +716,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref /* Sanity checks */ if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) || unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) { - JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref)); - jffs2_dbg_dump_node(c, ref_offset(ref)); + JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref)); + jffs2_dbg_dump_node(c, ref_offset(ref)); + jffs2_mark_node_obsolete(c, ref); goto free_out; } @@ -768,6 +773,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) { JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n", ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc)); + jffs2_mark_node_obsolete(c, ref); goto free_out; } @@ -847,7 +853,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref * It is called every time an unknown node is found. * * Returns: 0 on success; - * 1 if the node should be marked obsolete; * negative error code on failure. */ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un) @@ -1044,7 +1049,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf case JFFS2_NODETYPE_DIRENT: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent) && + len < sizeof(struct jffs2_raw_dirent)) { err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf); if (unlikely(err)) goto free_out; @@ -1058,7 +1064,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf case JFFS2_NODETYPE_INODE: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode) && + len < sizeof(struct jffs2_raw_inode)) { err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf); if (unlikely(err)) goto free_out; @@ -1071,17 +1078,15 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf break; default: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node) && + len < sizeof(struct jffs2_unknown_node)) { err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf); if (unlikely(err)) goto free_out; } err = read_unknown(c, ref, &node->u); - if (err == 1) { - jffs2_mark_node_obsolete(c, ref); - break; - } else if (unlikely(err)) + if (unlikely(err)) goto free_out; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 6488af43bc9b..e220d3bd610d 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -19,7 +19,7 @@ #include <linux/mount.h> #include <linux/jffs2.h> #include <linux/pagemap.h> -#include <linux/mtd/mtd.h> +#include <linux/mtd/super.h> #include <linux/ctype.h> #include <linux/namei.h> #include "compr.h" @@ -75,69 +75,27 @@ static const struct super_operations jffs2_super_operations = .sync_fs = jffs2_sync_fs, }; -static int jffs2_sb_compare(struct super_block *sb, void *data) -{ - struct jffs2_sb_info *p = data; - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - - /* The superblocks are considered to be equivalent if the underlying MTD - device is the same one */ - if (c->mtd == p->mtd) { - D1(printk(KERN_DEBUG "jffs2_sb_compare: match on device %d (\"%s\")\n", p->mtd->index, p->mtd->name)); - return 1; - } else { - D1(printk(KERN_DEBUG "jffs2_sb_compare: No match, device %d (\"%s\"), device %d (\"%s\")\n", - c->mtd->index, c->mtd->name, p->mtd->index, p->mtd->name)); - return 0; - } -} - -static int jffs2_sb_set(struct super_block *sb, void *data) -{ - struct jffs2_sb_info *p = data; - - /* For persistence of NFS exports etc. we use the same s_dev - each time we mount the device, don't just use an anonymous - device */ - sb->s_fs_info = p; - p->os_priv = sb; - sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, p->mtd->index); - - return 0; -} - -static int jffs2_get_sb_mtd(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct mtd_info *mtd, - struct vfsmount *mnt) +/* + * fill in the superblock + */ +static int jffs2_fill_super(struct super_block *sb, void *data, int silent) { - struct super_block *sb; struct jffs2_sb_info *c; - int ret; + + D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" + " New superblock for device %d (\"%s\")\n", + sb->s_mtd->index, sb->s_mtd->name)); c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; - c->mtd = mtd; - - sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); - - if (IS_ERR(sb)) - goto out_error; - - if (sb->s_root) { - /* New mountpoint for JFFS2 which is already mounted */ - D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n", - mtd->index, mtd->name)); - ret = simple_set_mnt(mnt, sb); - goto out_put; - } - D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n", - mtd->index, mtd->name)); + c->mtd = sb->s_mtd; + c->os_priv = sb; + sb->s_fs_info = c; - /* Initialize JFFS2 superblock locks, the further initialization will be - * done later */ + /* Initialize JFFS2 superblock locks, the further initialization will + * be done later */ init_MUTEX(&c->alloc_sem); init_MUTEX(&c->erase_free_sem); init_waitqueue_head(&c->erase_wait); @@ -146,133 +104,20 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type, spin_lock_init(&c->inocache_lock); sb->s_op = &jffs2_super_operations; - sb->s_flags = flags | MS_NOATIME; + sb->s_flags = sb->s_flags | MS_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); - - if (ret) { - /* Failure case... */ - up_write(&sb->s_umount); - deactivate_super(sb); - return ret; - } - - sb->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, sb); - -out_error: - ret = PTR_ERR(sb); - out_put: - kfree(c); - put_mtd_device(mtd); - - return ret; -} - -static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, int mtdnr, - struct vfsmount *mnt) -{ - struct mtd_info *mtd; - - mtd = get_mtd_device(NULL, mtdnr); - if (IS_ERR(mtd)) { - D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); - return PTR_ERR(mtd); - } - - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); + return jffs2_do_fill_super(sb, data, silent); } static int jffs2_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - int err; - struct nameidata nd; - int mtdnr; - - if (!dev_name) - return -EINVAL; - - D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name)); - - /* The preferred way of mounting in future; especially when - CONFIG_BLK_DEV is implemented - we specify the underlying - MTD device by number or by name, so that we don't require - block device support to be present in the kernel. */ - - /* FIXME: How to do the root fs this way? */ - - if (dev_name[0] == 'm' && dev_name[1] == 't' && dev_name[2] == 'd') { - /* Probably mounting without the blkdev crap */ - if (dev_name[3] == ':') { - struct mtd_info *mtd; - - /* Mount by MTD device name */ - D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4)); - for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) { - mtd = get_mtd_device(NULL, mtdnr); - if (!IS_ERR(mtd)) { - if (!strcmp(mtd->name, dev_name+4)) - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); - put_mtd_device(mtd); - } - } - printk(KERN_NOTICE "jffs2_get_sb(): MTD device with name \"%s\" not found.\n", dev_name+4); - } else if (isdigit(dev_name[3])) { - /* Mount by MTD device number name */ - char *endptr; - - mtdnr = simple_strtoul(dev_name+3, &endptr, 0); - if (!*endptr) { - /* It was a valid number */ - D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr)); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); - } - } - } - - /* Try the old way - the hack where we allowed users to mount - /dev/mtdblock$(n) but didn't actually _use_ the blkdev */ - - err = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); - - D1(printk(KERN_DEBUG "jffs2_get_sb(): path_lookup() returned %d, inode %p\n", - err, nd.dentry->d_inode)); - - if (err) - return err; - - err = -EINVAL; - - if (!S_ISBLK(nd.dentry->d_inode->i_mode)) - goto out; - - if (nd.mnt->mnt_flags & MNT_NODEV) { - err = -EACCES; - goto out; - } - - if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) { - if (!(flags & MS_SILENT)) - printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n", - dev_name); - goto out; - } - - mtdnr = iminor(nd.dentry->d_inode); - path_release(&nd); - - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); - -out: - path_release(&nd); - return err; + return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, + mnt); } static void jffs2_put_super (struct super_block *sb) @@ -307,8 +152,7 @@ static void jffs2_kill_sb(struct super_block *sb) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); if (!(sb->s_flags & MS_RDONLY)) jffs2_stop_garbage_collect_thread(c); - generic_shutdown_super(sb); - put_mtd_device(c->mtd); + kill_mtd_super(sb); kfree(c); } diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 78fc08893a6c..e48665984cb3 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -754,6 +754,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) list_del(&xd->xindex); jffs2_free_xattr_datum(xd); } + list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) { + list_del(&xd->xindex); + jffs2_free_xattr_datum(xd); + } } #define XREF_TMPHASH_SIZE (128) @@ -825,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ref->xd and ref->ic are not valid yet. */ xd = jffs2_find_xattr_datum(c, ref->xid); ic = jffs2_get_ino_cache(c, ref->ino); - if (!xd || !ic) { + if (!xd || !ic || !ic->nlink) { dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", ref->ino, ref->xid, ref->xseqno); ref->xseqno |= XREF_DELETE_MARKER; diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h index 79494c4f2b10..fa92f7f1d0d0 100644 --- a/fs/jfs/endian24.h +++ b/fs/jfs/endian24.h @@ -29,7 +29,7 @@ __u32 __x = (x); \ ((__u32)( \ ((__x & (__u32)0x000000ffUL) << 16) | \ - (__x & (__u32)0x0000ff00UL) | \ + (__x & (__u32)0x0000ff00UL) | \ ((__x & (__u32)0x00ff0000UL) >> 16) )); \ }) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index f7f8eff19b7b..87eb93694af7 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .fsync = jfs_fsync, diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 9c5d59632aac..887f5759e536 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -26,34 +26,6 @@ #include "jfs_filsys.h" #include "jfs_debug.h" -#ifdef CONFIG_JFS_DEBUG -void dump_mem(char *label, void *data, int length) -{ - int i, j; - int *intptr = data; - char *charptr = data; - char buf[10], line[80]; - - printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length, - data); - for (i = 0; i < length; i += 16) { - line[0] = 0; - for (j = 0; (j < 4) && (i + j * 4 < length); j++) { - sprintf(buf, " %08x", intptr[i / 4 + j]); - strcat(line, buf); - } - buf[0] = ' '; - buf[2] = 0; - for (j = 0; (j < 16) && (i + j < length); j++) { - buf[1] = - isprint(charptr[i + j]) ? charptr[i + j] : '.'; - strcat(line, buf); - } - printk("%s\n", line); - } -} -#endif - #ifdef PROC_FS_JFS /* see jfs_debug.h */ static struct proc_dir_entry *base; diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 7378798f0b21..044c1e654cc0 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -62,7 +62,6 @@ extern void jfs_proc_clean(void); extern int jfsloglevel; -extern void dump_mem(char *label, void *data, int length); extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); /* information message: e.g., configuration, major event */ @@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); * --------- */ #else /* CONFIG_JFS_DEBUG */ -#define dump_mem(label,data,length) do {} while (0) #define ASSERT(p) do {} while (0) #define jfs_info(fmt, arg...) do {} while (0) #define jfs_debug(fmt, arg...) do {} while (0) diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 40b20111383c..c387540d3425 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -19,23 +19,23 @@ #define _H_JFS_DINODE /* - * jfs_dinode.h: on-disk inode manager + * jfs_dinode.h: on-disk inode manager */ -#define INODESLOTSIZE 128 -#define L2INODESLOTSIZE 7 -#define log2INODESIZE 9 /* log2(bytes per dinode) */ +#define INODESLOTSIZE 128 +#define L2INODESLOTSIZE 7 +#define log2INODESIZE 9 /* log2(bytes per dinode) */ /* - * on-disk inode : 512 bytes + * on-disk inode : 512 bytes * * note: align 64-bit fields on 8-byte boundary. */ struct dinode { /* - * I. base area (128 bytes) - * ------------------------ + * I. base area (128 bytes) + * ------------------------ * * define generic/POSIX attributes */ @@ -70,16 +70,16 @@ struct dinode { __le32 di_acltype; /* 4: Type of ACL */ /* - * Extension Areas. + * Extension Areas. * - * Historically, the inode was partitioned into 4 128-byte areas, - * the last 3 being defined as unions which could have multiple - * uses. The first 96 bytes had been completely unused until - * an index table was added to the directory. It is now more - * useful to describe the last 3/4 of the inode as a single - * union. We would probably be better off redesigning the - * entire structure from scratch, but we don't want to break - * commonality with OS/2's JFS at this time. + * Historically, the inode was partitioned into 4 128-byte areas, + * the last 3 being defined as unions which could have multiple + * uses. The first 96 bytes had been completely unused until + * an index table was added to the directory. It is now more + * useful to describe the last 3/4 of the inode as a single + * union. We would probably be better off redesigning the + * entire structure from scratch, but we don't want to break + * commonality with OS/2's JFS at this time. */ union { struct { @@ -95,7 +95,7 @@ struct dinode { } _dir; /* (384) */ #define di_dirtable u._dir._table #define di_dtroot u._dir._dtroot -#define di_parent di_dtroot.header.idotdot +#define di_parent di_dtroot.header.idotdot #define di_DASD di_dtroot.header.DASD struct { @@ -127,14 +127,14 @@ struct dinode { #define di_inlinedata u._file._u2._special._u #define di_rdev u._file._u2._special._u._rdev #define di_fastsymlink u._file._u2._special._u._fastsymlink -#define di_inlineea u._file._u2._special._inlineea +#define di_inlineea u._file._u2._special._inlineea } u; }; /* extended mode bits (on-disk inode di_mode) */ -#define IFJOURNAL 0x00010000 /* journalled file */ -#define ISPARSE 0x00020000 /* sparse file enabled */ -#define INLINEEA 0x00040000 /* inline EA area free */ +#define IFJOURNAL 0x00010000 /* journalled file */ +#define ISPARSE 0x00020000 /* sparse file enabled */ +#define INLINEEA 0x00040000 /* inline EA area free */ #define ISWAPFILE 0x00800000 /* file open for pager swap space */ /* more extended mode bits: attributes for OS/2 */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index f3b1ebb22280..e1985066b1c6 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -154,12 +154,12 @@ static const s8 budtab[256] = { * the in-core descriptor is initialized from disk. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory - * -EIO - i/o error + * 0 - success + * -ENOMEM - insufficient memory + * -EIO - i/o error */ int dbMount(struct inode *ipbmap) { @@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap) * the memory for this descriptor is freed. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbUnmount(struct inode *ipbmap, int mounterror) { @@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap) * at a time. * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) { @@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) /* * NAME: dbUpdatePMap() * - * FUNCTION: update the allocation state (free or allocate) of the + * FUNCTION: update the allocation state (free or allocate) of the * specified block range in the persistent block allocation map. * * the blocks will be updated in the persistent map one * dmap at a time. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * free - 'true' if block range is to be freed from the persistent - * map; 'false' if it is to be allocated. - * blkno - starting block number of the range. - * nblocks - number of contiguous blocks in the range. - * tblk - transaction block; + * ipbmap - pointer to in-core inode for the block map. + * free - 'true' if block range is to be freed from the persistent + * map; 'false' if it is to be allocated. + * blkno - starting block number of the range. + * nblocks - number of contiguous blocks in the range. + * tblk - transaction block; * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbUpdatePMap(struct inode *ipbmap, @@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap, /* * NAME: dbNextAG() * - * FUNCTION: find the preferred allocation group for new allocations. + * FUNCTION: find the preferred allocation group for new allocations. * * Within the allocation groups, we maintain a preferred * allocation group which consists of a group with at least @@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap, * empty ags around for large allocations. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * the preferred allocation group number. + * the preferred allocation group number. */ int dbNextAG(struct inode *ipbmap) { @@ -656,7 +656,7 @@ unlock: /* * NAME: dbAlloc() * - * FUNCTION: attempt to allocate a specified number of contiguous free + * FUNCTION: attempt to allocate a specified number of contiguous free * blocks from the working allocation block map. * * the block allocation policy uses hints and a multi-step @@ -680,16 +680,16 @@ unlock: * size or requests that specify no hint value. * * PARAMETERS: - * ip - pointer to in-core inode; - * hint - allocation hint. - * nblocks - number of contiguous blocks in the range. - * results - on successful return, set to the starting block number + * ip - pointer to in-core inode; + * hint - allocation hint. + * nblocks - number of contiguous blocks in the range. + * results - on successful return, set to the starting block number * of the newly allocated contiguous range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) { @@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* assert that nblocks is valid */ assert(nblocks > 0); -#ifdef _STILL_TO_PORT - /* DASD limit check F226941 */ - if (OVER_LIMIT(ip, nblocks)) - return -ENOSPC; -#endif /* _STILL_TO_PORT */ - /* get the log2 number of blocks to be allocated. * if the number of blocks is not a log2 multiple, * it will be rounded up to the next log2 multiple. @@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) bmp = JFS_SBI(ip->i_sb)->bmap; -//retry: /* serialize w.r.t.extendfs() */ mapSize = bmp->db_mapsize; /* the hint should be within the map */ @@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* * NAME: dbAllocExact() * - * FUNCTION: try to allocate the requested extent; + * FUNCTION: try to allocate the requested extent; * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - extent address; - * nblocks - extent length; + * ip - pointer to in-core inode; + * blkno - extent address; + * nblocks - extent length; * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) { @@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) /* * NAME: dbReAlloc() * - * FUNCTION: attempt to extend a current allocation by a specified + * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request @@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) * number of blocks required. * * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current * allocation. - * addnblocks - number of blocks to add to the allocation. - * results - on successful return, set to the starting block number + * addnblocks - number of blocks to add to the allocation. + * results - on successful return, set to the starting block number * of the existing allocation if the existing allocation * was extended in place or to a newly allocated contiguous * range if the existing allocation could not be extended * in place. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbReAlloc(struct inode *ip, @@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip, /* * NAME: dbExtend() * - * FUNCTION: attempt to extend a current allocation by a specified + * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request @@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip, * immediately following the current allocation. * * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current * allocation. - * addnblocks - number of blocks to add to the allocation. + * addnblocks - number of blocks to add to the allocation. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) { @@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) /* * NAME: dbAllocNext() * - * FUNCTION: attempt to allocate the blocks of the specified block + * FUNCTION: attempt to allocate the blocks of the specified block * range within a dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - starting block number of the range. - * nblocks - number of contiguous free blocks of the range. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - starting block number of the range. + * nblocks - number of contiguous free blocks of the range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ @@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbAllocNear() * - * FUNCTION: attempt to allocate a number of contiguous free blocks near + * FUNCTION: attempt to allocate a number of contiguous free blocks near * a specified block (hint) within a dmap. * * starting with the dmap leaf that covers the hint, we'll @@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, * the desired free space. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - block number to allocate near. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - block number to allocate near. + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ @@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp, /* * NAME: dbAllocAG() * - * FUNCTION: attempt to allocate the specified number of contiguous + * FUNCTION: attempt to allocate the specified number of contiguous * free blocks within the specified allocation group. * * unless the allocation group size is equal to the number @@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp, * the allocation group. * * PARAMETERS: - * bmp - pointer to bmap descriptor + * bmp - pointer to bmap descriptor * agno - allocation group number. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * note: IWRITE_LOCK(ipmap) held on entry/exit; */ @@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) /* * NAME: dbAllocAny() * - * FUNCTION: attempt to allocate the specified number of contiguous + * FUNCTION: attempt to allocate the specified number of contiguous * free blocks anywhere in the file system. * * dbAllocAny() attempts to find the sufficient free space by @@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * desired free space is allocated. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) /* * NAME: dbFindCtl() * - * FUNCTION: starting at a specified dmap control page level and block + * FUNCTION: starting at a specified dmap control page level and block * number, search down the dmap control levels for a range of - * contiguous free blocks large enough to satisfy an allocation + * contiguous free blocks large enough to satisfy an allocation * request for the specified number of free blocks. * * if sufficient contiguous free blocks are found, this routine @@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) * is sufficient in size. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * level - starting dmap control page level. - * l2nb - log2 number of contiguous free blocks desired. - * *blkno - on entry, starting block number for conducting the search. + * bmp - pointer to bmap descriptor + * level - starting dmap control page level. + * l2nb - log2 number of contiguous free blocks desired. + * *blkno - on entry, starting block number for conducting the search. * on successful return, the first block within a dmap page * that contains or starts a range of contiguous free blocks. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) /* * NAME: dbAllocCtl() * - * FUNCTION: attempt to allocate a specified number of contiguous + * FUNCTION: attempt to allocate a specified number of contiguous * blocks starting within a specific dmap. * * this routine is called by higher level routines that search @@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) * first dmap (i.e. blkno). * * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks to allocate. - * l2nb - log2 number of contiguous free blocks to allocate. - * blkno - starting block number of the dmap to start the allocation + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks to allocate. + * l2nb - log2 number of contiguous free blocks to allocate. + * blkno - starting block number of the dmap to start the allocation * from. - * results - on successful return, set to the starting block number + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) /* * NAME: dbAllocDmapLev() * - * FUNCTION: attempt to allocate a specified number of contiguous blocks + * FUNCTION: attempt to allocate a specified number of contiguous blocks * from a specified dmap. * * this routine checks if the contiguous blocks are available. @@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * returned. * * PARAMETERS: - * mp - pointer to bmap descriptor - * dp - pointer to dmap to attempt to allocate blocks from. - * l2nb - log2 number of contiguous block desired. - * nblocks - actual number of contiguous block desired. - * results - on successful return, set to the starting block number + * mp - pointer to bmap descriptor + * dp - pointer to dmap to attempt to allocate blocks from. + * l2nb - log2 number of contiguous block desired. + * nblocks - actual number of contiguous block desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; @@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp, /* * NAME: dbAllocDmap() * - * FUNCTION: adjust the disk allocation map to reflect the allocation + * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine allocates the specified blocks from the dmap @@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp, * covers this dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate the block range from. - * blkno - starting block number of the block to be allocated. - * nblocks - number of blocks to be allocated. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate the block range from. + * blkno - starting block number of the block to be allocated. + * nblocks - number of blocks to be allocated. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbFreeDmap() * - * FUNCTION: adjust the disk allocation map to reflect the allocation + * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine frees the specified blocks from the dmap through @@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * causes the maximum string of free blocks within the dmap to * change (i.e. the value of the root of the dmap's dmtree), this * routine will cause this change to be reflected up through the - * appropriate levels of the dmap control pages by a call to + * appropriate levels of the dmap control pages by a call to * dbAdjCtl() for the L0 dmap control page that covers this dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free the block range from. - * blkno - starting block number of the block to be freed. - * nblocks - number of blocks to be freed. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free the block range from. + * blkno - starting block number of the block to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbAllocBits() * - * FUNCTION: allocate a specified block range from a dmap. + * FUNCTION: allocate a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly @@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * dmap's dmtree, as a whole, to reflect the allocated range. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate bits from. - * blkno - starting block number of the bits to be allocated. - * nblocks - number of bits to be allocated. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate bits from. + * blkno - starting block number of the bits to be allocated. + * nblocks - number of bits to be allocated. * * RETURN VALUES: none * @@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the allocated words. */ for (; nwords > 0; nwords -= nw) { - if (leaf[word] < BUDMIN) { + if (leaf[word] < BUDMIN) { jfs_error(bmp->db_ipbmap->i_sb, "dbAllocBits: leaf page " "corrupt"); @@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbFreeBits() * - * FUNCTION: free a specified block range from a dmap. + * FUNCTION: free a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly @@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * dmtree, as a whole, to reflect the deallocated range. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free bits from. - * blkno - starting block number of the bits to be freed. - * nblocks - number of bits to be freed. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free bits from. + * blkno - starting block number of the bits to be freed. + * nblocks - number of bits to be freed. * * RETURN VALUES: 0 for success * @@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the new root value and the next dmap control page level to * be adjusted. * PARAMETERS: - * bmp - pointer to bmap descriptor - * blkno - the first block of a block range within a dmap. it is + * bmp - pointer to bmap descriptor + * blkno - the first block of a block range within a dmap. it is * the allocation or deallocation of this block range that * requires the dmap control page to be adjusted. - * newval - the new value of the lower level dmap or dmap control + * newval - the new value of the lower level dmap or dmap control * page root. - * alloc - 'true' if adjustment is due to an allocation. - * level - current level of dmap control page (i.e. L0, L1, L2) to + * alloc - 'true' if adjustment is due to an allocation. + * level - current level of dmap control page (i.e. L0, L1, L2) to * be adjusted. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) /* * NAME: dbSplit() * - * FUNCTION: update the leaf of a dmtree with a new value, splitting + * FUNCTION: update the leaf of a dmtree with a new value, splitting * the leaf from the binary buddy system of the dmtree's * leaves, as required. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * splitsz - the size the binary buddy system starting at the leaf + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * splitsz - the size the binary buddy system starting at the leaf * must be split to, specified as the log2 number of blocks. - * newval - the new value for the leaf. + * newval - the new value for the leaf. * * RETURN VALUES: none * @@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) /* * NAME: dbBackSplit() * - * FUNCTION: back split the binary buddy system of dmtree leaves + * FUNCTION: back split the binary buddy system of dmtree leaves * that hold a specified leaf until the specified leaf * starts its own binary buddy system. * @@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) * in which a previous join operation must be backed out. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. * * RETURN VALUES: none * @@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno) /* * NAME: dbJoin() * - * FUNCTION: update the leaf of a dmtree with a new value, joining + * FUNCTION: update the leaf of a dmtree with a new value, joining * the leaf with other leaves of the dmtree into a multi-leaf * binary buddy system, as required. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. * * RETURN VALUES: none */ @@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* * NAME: dbAdjTree() * - * FUNCTION: update a leaf of a dmtree with a new value, adjusting + * FUNCTION: update a leaf of a dmtree with a new value, adjusting * the dmtree, as required, to reflect the new leaf value. * the combination of any buddies must already be done before * this is called. * * PARAMETERS: - * tp - pointer to the tree to be adjusted. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. + * tp - pointer to the tree to be adjusted. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. * * RETURN VALUES: none */ @@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) /* * NAME: dbFindLeaf() * - * FUNCTION: search a dmtree_t for sufficient free blocks, returning + * FUNCTION: search a dmtree_t for sufficient free blocks, returning * the index of a leaf describing the free blocks if * sufficient free blocks are found. * @@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) * free space. * * PARAMETERS: - * tp - pointer to the tree to be searched. - * l2nb - log2 number of free blocks to search for. + * tp - pointer to the tree to be searched. + * l2nb - log2 number of free blocks to search for. * leafidx - return pointer to be set to the index of the leaf * describing at least l2nb free blocks if sufficient * free blocks are found. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient free blocks. + * 0 - success + * -ENOSPC - insufficient free blocks. */ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) { @@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) /* * NAME: dbFindBits() * - * FUNCTION: find a specified number of binary buddy free bits within a + * FUNCTION: find a specified number of binary buddy free bits within a * dmap bitmap word value. * * this routine searches the bitmap value for (1 << l2nb) free * bits at (1 << l2nb) alignments within the value. * * PARAMETERS: - * word - dmap bitmap word value. - * l2nb - number of free bits specified as a log2 number. + * word - dmap bitmap word value. + * l2nb - number of free bits specified as a log2 number. * * RETURN VALUES: - * starting bit number of free bits. + * starting bit number of free bits. */ static int dbFindBits(u32 word, int l2nb) { @@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb) /* * NAME: dbMaxBud(u8 *cp) * - * FUNCTION: determine the largest binary buddy string of free + * FUNCTION: determine the largest binary buddy string of free * bits within 32-bits of the map. * * PARAMETERS: - * cp - pointer to the 32-bit value. + * cp - pointer to the 32-bit value. * * RETURN VALUES: - * largest binary buddy of free bits within a dmap word. + * largest binary buddy of free bits within a dmap word. */ static int dbMaxBud(u8 * cp) { @@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp) /* * NAME: cnttz(uint word) * - * FUNCTION: determine the number of trailing zeros within a 32-bit + * FUNCTION: determine the number of trailing zeros within a 32-bit * value. * * PARAMETERS: - * value - 32-bit value to be examined. + * value - 32-bit value to be examined. * * RETURN VALUES: - * count of trailing zeros + * count of trailing zeros */ static int cnttz(u32 word) { @@ -3025,14 +3018,14 @@ static int cnttz(u32 word) /* * NAME: cntlz(u32 value) * - * FUNCTION: determine the number of leading zeros within a 32-bit + * FUNCTION: determine the number of leading zeros within a 32-bit * value. * * PARAMETERS: - * value - 32-bit value to be examined. + * value - 32-bit value to be examined. * * RETURN VALUES: - * count of leading zeros + * count of leading zeros */ static int cntlz(u32 value) { @@ -3050,14 +3043,14 @@ static int cntlz(u32 value) * NAME: blkstol2(s64 nb) * * FUNCTION: convert a block count to its log2 value. if the block - * count is not a l2 multiple, it is rounded up to the next + * count is not a l2 multiple, it is rounded up to the next * larger l2 multiple. * * PARAMETERS: - * nb - number of blocks + * nb - number of blocks * * RETURN VALUES: - * log2 number of blocks + * log2 number of blocks */ static int blkstol2(s64 nb) { @@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb) * at a time. * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) { @@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, * L2 * | * L1---------------------------------L1 - * | | - * L0---------L0---------L0 L0---------L0---------L0 - * | | | | | | - * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; + * | | + * L0---------L0---------L0 L0---------L0---------L0 + * | | | | | | + * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm * * <---old---><----------------------------extend-----------------------> @@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) (long long) blkno, (long long) nblocks, (long long) newsize); /* - * initialize bmap control page. + * initialize bmap control page. * * all the data in bmap control page should exclude * the mkfs hidden dmap page. @@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; /* - * reconfigure db_agfree[] + * reconfigure db_agfree[] * from old AG configuration to new AG configuration; * * coalesce contiguous k (newAGSize/oldAGSize) AGs; @@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) bmp->db_maxag = bmp->db_maxag / k; /* - * extend bmap + * extend bmap * * update bit maps and corresponding level control pages; * global control page db_nfree, db_agfree[agno], db_maxfreebud; @@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) /* compute start L0 */ j = 0; l1leaf = l1dcp->stree + CTLLEAFIND; - p += nbperpage; /* 1st L0 of L1.k */ + p += nbperpage; /* 1st L0 of L1.k */ } /* @@ -3548,7 +3541,7 @@ errout: return -EIO; /* - * finalize bmap control page + * finalize bmap control page */ finalize: @@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap) int i, n; /* - * finalize bmap control page + * finalize bmap control page */ //finalize: /* @@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks) * convert number of map pages to the zero origin top dmapctl level */ #define BMAPPGTOLEV(npages) \ - (((npages) <= 3 + MAXL0PAGES) ? 0 \ - : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) + (((npages) <= 3 + MAXL0PAGES) ? 0 : \ + ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) s64 dbMapFileSizeToMapSize(struct inode * ipbmap) { @@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) factor = (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); complete = (u32) npages / factor; - ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL - : ((i == 1) ? LPERCTL : 1)); + ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : + ((i == 1) ? LPERCTL : 1)); /* pages in last/incomplete child */ npages = (u32) npages % factor; diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 45ea454c74bd..11e6d471b364 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp) * - 1 is added to account for the control page of the map. */ #define BLKTODMAP(b,s) \ - ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) + ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) /* * convert disk block number to the logical block number of the LEVEL 0 @@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp) * - 1 is added to account for the control page of the map. */ #define BLKTOL0(b,s) \ - (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) + (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) /* * convert disk block number to the logical block number of the LEVEL 1 @@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp) * at the specified level which describes the disk block. */ #define BLKTOCTL(b,s,l) \ - (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) + (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) /* * convert aggregate map size to the zero origin dmapctl level of the @@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp) * dmaptree must be consistent with dmapctl. */ struct dmaptree { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of first tree leaf */ - __le32 height; /* 4: height of the tree */ + __le32 nleafs; /* 4: number of tree leafs */ + __le32 l2nleafs; /* 4: l2 number of tree leafs */ + __le32 leafidx; /* 4: index of first tree leaf */ + __le32 height; /* 4: height of the tree */ s8 budmin; /* 1: min l2 tree leaf value to combine */ - s8 stree[TREESIZE]; /* TREESIZE: tree */ - u8 pad[2]; /* 2: pad to word boundary */ -}; /* - 360 - */ + s8 stree[TREESIZE]; /* TREESIZE: tree */ + u8 pad[2]; /* 2: pad to word boundary */ +}; /* - 360 - */ /* * dmap page per 8K blocks bitmap */ struct dmap { - __le32 nblocks; /* 4: num blks covered by this dmap */ - __le32 nfree; /* 4: num of free blks in this dmap */ - __le64 start; /* 8: starting blkno for this dmap */ - struct dmaptree tree; /* 360: dmap tree */ - u8 pad[1672]; /* 1672: pad to 2048 bytes */ - __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ - __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ -}; /* - 4096 - */ + __le32 nblocks; /* 4: num blks covered by this dmap */ + __le32 nfree; /* 4: num of free blks in this dmap */ + __le64 start; /* 8: starting blkno for this dmap */ + struct dmaptree tree; /* 360: dmap tree */ + u8 pad[1672]; /* 1672: pad to 2048 bytes */ + __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ + __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ +}; /* - 4096 - */ /* * disk map control page per level. @@ -173,14 +173,14 @@ struct dmap { * dmapctl must be consistent with dmaptree. */ struct dmapctl { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of the first tree leaf */ - __le32 height; /* 4: height of tree */ - s8 budmin; /* 1: minimum l2 tree leaf value */ - s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ - u8 pad[2714]; /* 2714: pad to 4096 */ -}; /* - 4096 - */ + __le32 nleafs; /* 4: number of tree leafs */ + __le32 l2nleafs; /* 4: l2 number of tree leafs */ + __le32 leafidx; /* 4: index of the first tree leaf */ + __le32 height; /* 4: height of tree */ + s8 budmin; /* 1: minimum l2 tree leaf value */ + s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ + u8 pad[2714]; /* 2714: pad to 4096 */ +}; /* - 4096 - */ /* * common definition for dmaptree within dmap and dmapctl @@ -202,41 +202,41 @@ typedef union dmtree { * on-disk aggregate disk allocation map descriptor. */ struct dbmap_disk { - __le64 dn_mapsize; /* 8: number of blocks in aggregate */ - __le64 dn_nfree; /* 8: num free blks in aggregate map */ - __le32 dn_l2nbperpage; /* 4: number of blks per page */ - __le32 dn_numag; /* 4: total number of ags */ - __le32 dn_maxlevel; /* 4: number of active ags */ - __le32 dn_maxag; /* 4: max active alloc group number */ - __le32 dn_agpref; /* 4: preferred alloc group (hint) */ - __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ - __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ - __le32 dn_agstart; /* 4: start tree index at AG height */ - __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ - __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ - __le64 dn_agsize; /* 8: num of blks per alloc group */ - s8 dn_maxfreebud; /* 1: max free buddy system */ - u8 pad[3007]; /* 3007: pad to 4096 */ -}; /* - 4096 - */ + __le64 dn_mapsize; /* 8: number of blocks in aggregate */ + __le64 dn_nfree; /* 8: num free blks in aggregate map */ + __le32 dn_l2nbperpage; /* 4: number of blks per page */ + __le32 dn_numag; /* 4: total number of ags */ + __le32 dn_maxlevel; /* 4: number of active ags */ + __le32 dn_maxag; /* 4: max active alloc group number */ + __le32 dn_agpref; /* 4: preferred alloc group (hint) */ + __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ + __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ + __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ + __le32 dn_agstart; /* 4: start tree index at AG height */ + __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ + __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ + __le64 dn_agsize; /* 8: num of blks per alloc group */ + s8 dn_maxfreebud; /* 1: max free buddy system */ + u8 pad[3007]; /* 3007: pad to 4096 */ +}; /* - 4096 - */ struct dbmap { - s64 dn_mapsize; /* number of blocks in aggregate */ - s64 dn_nfree; /* num free blks in aggregate map */ - int dn_l2nbperpage; /* number of blks per page */ - int dn_numag; /* total number of ags */ - int dn_maxlevel; /* number of active ags */ - int dn_maxag; /* max active alloc group number */ - int dn_agpref; /* preferred alloc group (hint) */ - int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheigth; /* height in dmapctl of the AG */ - int dn_agwidth; /* width in dmapctl of the AG */ - int dn_agstart; /* start tree index at AG height */ - int dn_agl2size; /* l2 num of blks per alloc group */ - s64 dn_agfree[MAXAG]; /* per AG free count */ - s64 dn_agsize; /* num of blks per alloc group */ - signed char dn_maxfreebud; /* max free buddy system */ -}; /* - 4096 - */ + s64 dn_mapsize; /* number of blocks in aggregate */ + s64 dn_nfree; /* num free blks in aggregate map */ + int dn_l2nbperpage; /* number of blks per page */ + int dn_numag; /* total number of ags */ + int dn_maxlevel; /* number of active ags */ + int dn_maxag; /* max active alloc group number */ + int dn_agpref; /* preferred alloc group (hint) */ + int dn_aglevel; /* dmapctl level holding the AG */ + int dn_agheigth; /* height in dmapctl of the AG */ + int dn_agwidth; /* width in dmapctl of the AG */ + int dn_agstart; /* start tree index at AG height */ + int dn_agl2size; /* l2 num of blks per alloc group */ + s64 dn_agfree[MAXAG]; /* per AG free count */ + s64 dn_agsize; /* num of blks per alloc group */ + signed char dn_maxfreebud; /* max free buddy system */ +}; /* - 4096 - */ /* * in-memory aggregate disk allocation map descriptor. */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 6d62f3222892..c14ba3cfa818 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, lv = &llck->lv[llck->index]; /* - * Linelock slot size is twice the size of directory table - * slot size. 512 entries per page. + * Linelock slot size is twice the size of directory table + * slot size. 512 entries per page. */ lv->offset = ((index - 2) & 511) >> 1; lv->length = 1; @@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, btstack->nsplit = 1; /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, } if (cmp == 0) { /* - * search hit + * search hit */ /* search hit - leaf page: * return the entry found @@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, } /* - * search miss + * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or (maxindex + 1) index. @@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip, struct lv *lv; /* - * retrieve search result + * retrieve search result * * dtSearch() returns (leaf page pinned, index at which to insert). * n.b. dtSearch() may return index of (maxindex + 1) of @@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip, DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); /* - * insert entry for new key + * insert entry for new key */ if (DO_INDEX(ip)) { if (JFS_IP(ip)->next_index == DIREND) { @@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip, data.leaf.ino = *fsn; /* - * leaf page does not have enough room for new entry: + * leaf page does not have enough room for new entry: * - * extend/split the leaf page; + * extend/split the leaf page; * * dtSplitUp() will insert the entry and unpin the leaf page. */ @@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip, } /* - * leaf page does have enough room for new entry: + * leaf page does have enough room for new entry: * - * insert the new data entry into the leaf page; + * insert the new data entry into the leaf page; */ BT_MARK_DIRTY(mp, ip); /* @@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid, } /* - * split leaf page + * split leaf page * * The split routines insert the new entry, and * acquire txLock as appropriate. */ /* - * split root leaf page: + * split root leaf page: */ if (sp->header.flag & BT_ROOT) { /* @@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid, } /* - * extend first leaf page + * extend first leaf page * * extend the 1st extent if less than buffer page size * (dtExtendPage() reurns leaf page unpinned) @@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid, } /* - * split leaf page <sp> into <sp> and a new right page <rp>. + * split leaf page <sp> into <sp> and a new right page <rp>. * * return <rp> pinned and its extent descriptor <rpxd> */ @@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, rp->header.freecnt = rp->header.maxslot - fsi; /* - * sequential append at tail: append without split + * sequential append at tail: append without split * * If splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is @@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, } /* - * non-sequential insert (at possibly middle page) + * non-sequential insert (at possibly middle page) */ /* @@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, left = 0; /* - * compute fill factor for split pages + * compute fill factor for split pages * * <nxt> traces the next entry to move to rp * <off> traces the next entry to stay in sp @@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, /* <nxt> poins to the 1st entry to move */ /* - * move entries to right page + * move entries to right page * * dtMoveEntry() initializes rp and reserves entry for insertion * @@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid, return (rc); /* - * extend the extent + * extend the extent */ pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; @@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid, } /* - * extend the page + * extend the page */ sp->header.self = *pxd; @@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid, /* update buffer extent descriptor of extended page */ xlen = lengthPXD(pxd); xsize = xlen << JFS_SBI(sb)->l2bsize; -#ifdef _STILL_TO_PORT - bmSetXD(smp, xaddr, xsize); -#endif /* _STILL_TO_PORT */ /* * copy old stbl to new stbl at start of extended area @@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid, } /* - * update parent entry on the parent/root page + * update parent entry on the parent/root page */ /* * acquire a transaction lock on the parent/root page @@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid, sp = &JFS_IP(ip)->i_dtroot; /* - * allocate/initialize a single (right) child page + * allocate/initialize a single (right) child page * * N.B. at first split, a one (or two) block to fit new entry * is allocated; at subsequent split, a full page is allocated; @@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid, rp->header.prev = 0; /* - * move in-line root page into new right page extent + * move in-line root page into new right page extent */ /* linelock header + copied entries + new stbl (1st slot) in new page */ ASSERT(dtlck->index == 0); @@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid, dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); /* - * reset parent/root page + * reset parent/root page * * set the 1st entry offset to 0, which force the left-most key * at any level of the tree to be less than any search key. @@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid, dtpage_t *np; /* - * search for the entry to delete: + * search for the entry to delete: * * dtSearch() returns (leaf page pinned, index at which to delete). */ @@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, int i; /* - * keep the root leaf page which has become empty + * keep the root leaf page which has become empty */ if (BT_IS_ROOT(fmp)) { /* @@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, } /* - * free the non-root leaf page + * free the non-root leaf page */ /* * acquire a transaction lock on the page @@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, discard_metapage(fmp); /* - * propagate page deletion up the directory tree + * propagate page deletion up the directory tree * * If the delete from the parent page makes it empty, * continue all the way up the tree. @@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, #ifdef _NOTYET /* - * NAME: dtRelocate() + * NAME: dtRelocate() * - * FUNCTION: relocate dtpage (internal or leaf) of directory; - * This function is mainly used by defragfs utility. + * FUNCTION: relocate dtpage (internal or leaf) of directory; + * This function is mainly used by defragfs utility. */ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, s64 nxaddr) @@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, xlen); /* - * 1. get the internal parent dtpage covering - * router entry for the tartget page to be relocated; + * 1. get the internal parent dtpage covering + * router entry for the tartget page to be relocated; */ rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); if (rc) @@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, jfs_info("dtRelocate: parent router entry validated."); /* - * 2. relocate the target dtpage + * 2. relocate the target dtpage */ /* read in the target page from src extent */ DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); @@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, /* update the buffer extent descriptor of the dtpage */ xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; -#ifdef _STILL_TO_PORT - bmSetXD(mp, nxaddr, xsize); -#endif /* _STILL_TO_PORT */ + /* unpin the relocated page */ DT_PUTPAGE(mp); jfs_info("dtRelocate: target dtpage relocated."); @@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, */ /* - * 3. acquire maplock for the source extent to be freed; + * 3. acquire maplock for the source extent to be freed; */ /* for dtpage relocation, write a LOG_NOREDOPAGE record * for the source dtpage (logredo() will init NoRedoPage @@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, pxdlock->index = 1; /* - * 4. update the parent router entry for relocation; + * 4. update the parent router entry for relocation; * * acquire tlck for the parent entry covering the target dtpage; * write LOG_REDOPAGE to apply after image only; @@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, * NAME: dtSearchNode() * * FUNCTION: Search for an dtpage containing a specified address - * This function is mainly used by defragfs utility. + * This function is mainly used by defragfs utility. * * NOTE: Search result on stack, the found page is pinned at exit. * The result page must be an internal dtpage. @@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, BT_CLR(btstack); /* reset stack */ /* - * descend tree to the level with specified leftmost page + * descend tree to the level with specified leftmost page * * by convention, root bn = 0. */ @@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, } /* - * search each page at the current levevl + * search each page at the current levevl */ loop: stbl = DT_GETSTBL(p); @@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (DO_INDEX(ip)) { /* * persistent index is stored in directory entries. - * Special cases: 0 = . - * 1 = .. - * -1 = End of directory + * Special cases: 0 = . + * 1 = .. + * -1 = End of directory */ do_index = 1; @@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." - * pn > 0: Real entries, pn=1 -> leftmost page - * pn = index = -1: No more entries + * pn = index = 0: First entry "." + * pn = 0; index = 1: Second entry ".." + * pn > 0: Real entries, pn=1 -> leftmost page + * pn = index = -1: No more entries */ dtpos = filp->f_pos; if (dtpos == 0) { @@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) BT_CLR(btstack); /* reset stack */ /* - * descend leftmost path of the tree + * descend leftmost path of the tree * * by convention, root bn = 0. */ @@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip, struct ldtentry *entry; /* - * search for the entry to modify: + * search for the entry to modify: * * dtSearch() returns (leaf page pinned, index at which to modify). */ diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index af8513f78648..8561c6ecece0 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h @@ -35,7 +35,7 @@ typedef union { /* - * entry segment/slot + * entry segment/slot * * an entry consists of type dependent head/only segment/slot and * additional segments/slots linked vi next field; diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index a35bdca6a805..7ae1e3281de9 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); #endif static s64 extRoundDown(s64 nb); -#define DPD(a) (printk("(a): %d\n",(a))) -#define DPC(a) (printk("(a): %c\n",(a))) +#define DPD(a) (printk("(a): %d\n",(a))) +#define DPC(a) (printk("(a): %c\n",(a))) #define DPL1(a) \ { \ if ((a) >> 32) \ @@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb); printk("(a): %x\n",(a) << 32); \ } -#define DPD1(a) (printk("(a): %d ",(a))) -#define DPX(a) (printk("(a): %08x\n",(a))) -#define DPX1(a) (printk("(a): %08x ",(a))) -#define DPS(a) (printk("%s\n",(a))) -#define DPE(a) (printk("\nENTERING: %s\n",(a))) -#define DPE1(a) (printk("\nENTERING: %s",(a))) -#define DPS1(a) (printk(" %s ",(a))) +#define DPD1(a) (printk("(a): %d ",(a))) +#define DPX(a) (printk("(a): %08x\n",(a))) +#define DPX1(a) (printk("(a): %08x ",(a))) +#define DPS(a) (printk("%s\n",(a))) +#define DPE(a) (printk("\nENTERING: %s\n",(a))) +#define DPE1(a) (printk("\nENTERING: %s",(a))) +#define DPS1(a) (printk(" %s ",(a))) /* * NAME: extAlloc() * - * FUNCTION: allocate an extent for a specified page range within a + * FUNCTION: allocate an extent for a specified page range within a * file. * * PARAMETERS: @@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb); * should be marked as allocated but not recorded. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) @@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) #ifdef _NOTYET /* - * NAME: extRealloc() + * NAME: extRealloc() * - * FUNCTION: extend the allocation of a file extent containing a + * FUNCTION: extend the allocation of a file extent containing a * partial back last page. * * PARAMETERS: @@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) * should be marked as allocated but not recorded. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) { @@ -345,9 +345,9 @@ exit: /* - * NAME: extHint() + * NAME: extHint() * - * FUNCTION: produce an extent allocation hint for a file offset. + * FUNCTION: produce an extent allocation hint for a file offset. * * PARAMETERS: * ip - the inode of the file. @@ -356,8 +356,8 @@ exit: * the hint. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int extHint(struct inode *ip, s64 offset, xad_t * xp) { @@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) lxdl.nlxd = 1; lxdl.lxd = &lxd; LXDoffset(&lxd, prev) - LXDlength(&lxd, nbperpage); + LXDlength(&lxd, nbperpage); xadl.maxnxad = 1; xadl.nxad = 0; @@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) return (rc); - /* check if not extent exists for the previous page. + /* check if no extent exists for the previous page. * this is possible for sparse files. */ if (xadl.nxad == 0) { -// assert(ISSPARSE(ip)); +// assert(ISSPARSE(ip)); return (0); } @@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) */ xp->flag &= XAD_NOTRECORDED; - if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { + if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { jfs_error(ip->i_sb, "extHint: corrupt xtree"); return -EIO; - } + } return (0); } /* - * NAME: extRecord() + * NAME: extRecord() * - * FUNCTION: change a page with a file from not recorded to recorded. + * FUNCTION: change a page with a file from not recorded to recorded. * * PARAMETERS: * ip - inode of the file. * cp - cbuf of the file page. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extRecord(struct inode *ip, xad_t * xp) { @@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp) #ifdef _NOTYET /* - * NAME: extFill() + * NAME: extFill() * - * FUNCTION: allocate disk space for a file page that represents + * FUNCTION: allocate disk space for a file page that represents * a file hole. * * PARAMETERS: @@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp) * cp - cbuf of the file page represent the hole. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extFill(struct inode *ip, xad_t * xp) { int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; s64 blkno = offsetXAD(xp) >> ip->i_blkbits; -// assert(ISSPARSE(ip)); +// assert(ISSPARSE(ip)); /* initialize the extent allocation hint */ XADaddress(xp, 0); @@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp) /* * NAME: extBalloc() * - * FUNCTION: allocate disk blocks to form an extent. + * FUNCTION: allocate disk blocks to form an extent. * * initially, we will try to allocate disk blocks for the * requested size (nblocks). if this fails (nblocks @@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp) * allocated block range. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ static int extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) @@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) /* * NAME: extBrealloc() * - * FUNCTION: attempt to extend an extent's allocation. + * FUNCTION: attempt to extend an extent's allocation. * * Initially, we will try to extend the extent's allocation * in place. If this fails, we'll try to move the extent @@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) * * PARAMETERS: * ip - the inode of the file. - * blkno - starting block number of the extents current allocation. - * nblks - number of blocks within the extents current allocation. + * blkno - starting block number of the extents current allocation. + * nblks - number of blocks within the extents current allocation. * newnblks - pointer to a s64 value. on entry, this value is the * the new desired extent size (number of blocks). on * successful exit, this value is set to the extent's actual @@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) * newblkno - the starting block number of the extents new allocation. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ static int extBrealloc(struct inode *ip, @@ -634,16 +634,16 @@ extBrealloc(struct inode *ip, /* - * NAME: extRoundDown() + * NAME: extRoundDown() * - * FUNCTION: round down a specified number of blocks to the next + * FUNCTION: round down a specified number of blocks to the next * smallest power of 2 number. * * PARAMETERS: * nb - the inode of the file. * * RETURN VALUES: - * next smallest power of 2 number. + * next smallest power of 2 number. */ static s64 extRoundDown(s64 nb) { diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 38f70ac03bec..b3f5463fbe52 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -34,9 +34,9 @@ #define JFS_UNICODE 0x00000001 /* unicode name */ /* mount time flags for error handling */ -#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ -#define JFS_ERR_CONTINUE 0x00000004 /* continue */ -#define JFS_ERR_PANIC 0x00000008 /* panic */ +#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ +#define JFS_ERR_CONTINUE 0x00000004 /* continue */ +#define JFS_ERR_PANIC 0x00000008 /* panic */ /* Quota support */ #define JFS_USRQUOTA 0x00000010 @@ -83,7 +83,6 @@ /* case-insensitive name/directory support */ #define JFS_AIX 0x80000000 /* AIX support */ -/* POSIX name/directory support - Never implemented*/ /* * buffer cache configuration @@ -113,10 +112,10 @@ #define IDATASIZE 256 /* inode inline data size */ #define IXATTRSIZE 128 /* inode inline extended attribute size */ -#define XTPAGE_SIZE 4096 -#define log2_PAGESIZE 12 +#define XTPAGE_SIZE 4096 +#define log2_PAGESIZE 12 -#define IAG_SIZE 4096 +#define IAG_SIZE 4096 #define IAG_EXTENT_SIZE 4096 #define INOSPERIAG 4096 /* number of disk inodes per iag */ #define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index c6530227cda6..3870ba8b9086 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *); static void copy_to_dinode(struct dinode *, struct inode *); /* - * NAME: diMount() + * NAME: diMount() * - * FUNCTION: initialize the incore inode map control structures for + * FUNCTION: initialize the incore inode map control structures for * a fileset or aggregate init time. * - * the inode map's control structure (dinomap) is - * brought in from disk and placed in virtual memory. + * the inode map's control structure (dinomap) is + * brought in from disk and placed in virtual memory. * * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. + * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. */ int diMount(struct inode *ipimap) { @@ -180,18 +180,18 @@ int diMount(struct inode *ipimap) /* - * NAME: diUnmount() + * NAME: diUnmount() * - * FUNCTION: write to disk the incore inode map control structures for + * FUNCTION: write to disk the incore inode map control structures for * a fileset or aggregate at unmount time. * * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. + * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. */ int diUnmount(struct inode *ipimap, int mounterror) { @@ -274,9 +274,9 @@ int diSync(struct inode *ipimap) /* - * NAME: diRead() + * NAME: diRead() * - * FUNCTION: initialize an incore inode from disk. + * FUNCTION: initialize an incore inode from disk. * * on entry, the specifed incore inode should itself * specify the disk inode number corresponding to the @@ -285,7 +285,7 @@ int diSync(struct inode *ipimap) * this routine handles incore inode initialization for * both "special" and "regular" inodes. special inodes * are those required early in the mount process and - * require special handling since much of the file system + * require special handling since much of the file system * is not yet initialized. these "special" inodes are * identified by a NULL inode map inode pointer and are * actually initialized by a call to diReadSpecial(). @@ -298,12 +298,12 @@ int diSync(struct inode *ipimap) * incore inode. * * PARAMETERS: - * ip - pointer to incore inode to be initialized from disk. + * ip - pointer to incore inode to be initialized from disk. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOMEM - insufficient memory + * 0 - success + * -EIO - i/o error. + * -ENOMEM - insufficient memory * */ int diRead(struct inode *ip) @@ -410,26 +410,26 @@ int diRead(struct inode *ip) /* - * NAME: diReadSpecial() + * NAME: diReadSpecial() * - * FUNCTION: initialize a 'special' inode from disk. + * FUNCTION: initialize a 'special' inode from disk. * * this routines handles aggregate level inodes. The * inode cache cannot differentiate between the * aggregate inodes and the filesystem inodes, so we * handle these here. We don't actually use the aggregate - * inode map, since these inodes are at a fixed location + * inode map, since these inodes are at a fixed location * and in some cases the aggregate inode map isn't initialized * yet. * * PARAMETERS: - * sb - filesystem superblock + * sb - filesystem superblock * inum - aggregate inode number * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: - * new inode - success - * NULL - i/o error. + * new inode - success + * NULL - i/o error. */ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) { @@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) } /* - * NAME: diWriteSpecial() + * NAME: diWriteSpecial() * - * FUNCTION: Write the special inode to disk + * FUNCTION: Write the special inode to disk * * PARAMETERS: - * ip - special inode + * ip - special inode * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: none @@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary) } /* - * NAME: diFreeSpecial() + * NAME: diFreeSpecial() * - * FUNCTION: Free allocated space for special inode + * FUNCTION: Free allocated space for special inode */ void diFreeSpecial(struct inode *ip) { @@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip) /* - * NAME: diWrite() + * NAME: diWrite() * - * FUNCTION: write the on-disk inode portion of the in-memory inode + * FUNCTION: write the on-disk inode portion of the in-memory inode * to its corresponding on-disk inode. * * on entry, the specifed incore inode should itself @@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip) * * PARAMETERS: * tid - transacation id - * ip - pointer to incore inode to be written to the inode extent. + * ip - pointer to incore inode to be written to the inode extent. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int diWrite(tid_t tid, struct inode *ip) { @@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip) ilinelock = (struct linelock *) & tlck->lock; /* - * regular file: 16 byte (XAD slot) granularity + * regular file: 16 byte (XAD slot) granularity */ if (type & tlckXTREE) { xtpage_t *p, *xp; @@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip) xad->flag &= ~(XAD_NEW | XAD_EXTENDED); } /* - * directory: 32 byte (directory entry slot) granularity + * directory: 32 byte (directory entry slot) granularity */ else if (type & tlckDTREE) { dtpage_t *p, *xp; @@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip) } /* - * lock/copy inode base: 128 byte slot granularity + * lock/copy inode base: 128 byte slot granularity */ -// baseDinode: lv = & dilinelock->lv[dilinelock->index]; lv->offset = dioffset >> L2INODESLOTSIZE; copy_to_dinode(dp, ip); @@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip) lv->length = 1; dilinelock->index++; -#ifdef _JFS_FASTDASD - /* - * We aren't logging changes to the DASD used in directory inodes, - * but we need to write them to disk. If we don't unmount cleanly, - * mount will recalculate the DASD used. - */ - if (S_ISDIR(ip->i_mode) - && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) - memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); -#endif /* _JFS_FASTDASD */ - /* release the buffer holding the updated on-disk inode. * the buffer will be later written by commit processing. */ @@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip) /* - * NAME: diFree(ip) + * NAME: diFree(ip) * - * FUNCTION: free a specified inode from the inode working map + * FUNCTION: free a specified inode from the inode working map * for a fileset or aggregate. * * if the inode to be freed represents the first (only) @@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip) * any updates and are held until all updates are complete. * * PARAMETERS: - * ip - inode to be freed. + * ip - inode to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int diFree(struct inode *ip) { @@ -902,7 +890,8 @@ int diFree(struct inode *ip) * the map. */ if (iagno >= imap->im_nextiag) { - dump_mem("imap", imap, 32); + print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, + imap, 32, 0); jfs_error(ip->i_sb, "diFree: inum = %d, iagno = %d, nextiag = %d", (uint) inum, iagno, imap->im_nextiag); @@ -964,8 +953,8 @@ int diFree(struct inode *ip) return -EIO; } /* - * inode extent still has some inodes or below low water mark: - * keep the inode extent; + * inode extent still has some inodes or below low water mark: + * keep the inode extent; */ if (bitmap || imap->im_agctl[agno].numfree < 96 || @@ -1047,12 +1036,12 @@ int diFree(struct inode *ip) /* - * inode extent has become free and above low water mark: - * free the inode extent; + * inode extent has become free and above low water mark: + * free the inode extent; */ /* - * prepare to update iag list(s) (careful update step 1) + * prepare to update iag list(s) (careful update step 1) */ amp = bmp = cmp = dmp = NULL; fwd = back = -1; @@ -1152,7 +1141,7 @@ int diFree(struct inode *ip) invalidate_pxd_metapages(ip, freepxd); /* - * update iag list(s) (careful update step 2) + * update iag list(s) (careful update step 2) */ /* add the iag to the ag extent free list if this is the * first free extent for the iag. @@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) /* - * NAME: diAlloc(pip,dir,ip) + * NAME: diAlloc(pip,dir,ip) * - * FUNCTION: allocate a disk inode from the inode working map + * FUNCTION: allocate a disk inode from the inode working map * for a fileset or aggregate. * * PARAMETERS: - * pip - pointer to incore inode for the parent inode. - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to a new inode + * pip - pointer to incore inode for the parent inode. + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to a new inode * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ int diAlloc(struct inode *pip, bool dir, struct inode *ip) { @@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); /* - * try to allocate from the IAG + * try to allocate from the IAG */ /* check if the inode may be allocated from the iag * (i.e. the inode has free inodes or new extent can be added). @@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) /* - * NAME: diAllocAG(imap,agno,dir,ip) + * NAME: diAllocAG(imap,agno,dir,ip) * - * FUNCTION: allocate a disk inode from the allocation group. + * FUNCTION: allocate a disk inode from the allocation group. * * this routine first determines if a new extent of free * inodes should be added for the allocation group, with @@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) * PRE CONDITION: Already have the AG lock for this AG. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group to allocate from. - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to the new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group to allocate from. + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to the new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) @@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) /* - * NAME: diAllocAny(imap,agno,dir,iap) + * NAME: diAllocAny(imap,agno,dir,iap) * - * FUNCTION: allocate a disk inode from any other allocation group. + * FUNCTION: allocate a disk inode from any other allocation group. * * this routine is called when an allocation attempt within * the primary allocation group has failed. if attempts to @@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) * specified primary group. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - primary allocation group (to avoid). - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to a new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - primary allocation group (to avoid). + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to a new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) @@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) /* - * NAME: diAllocIno(imap,agno,ip) + * NAME: diAllocIno(imap,agno,ip) * - * FUNCTION: allocate a disk inode from the allocation group's free + * FUNCTION: allocate a disk inode from the allocation group's free * inode list, returning an error if this free list is * empty (i.e. no iags on the list). * @@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) * PRE CONDITION: Already have AG lock for this AG. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group. - * ip - pointer to new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group. + * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) { @@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) /* - * NAME: diAllocExt(imap,agno,ip) + * NAME: diAllocExt(imap,agno,ip) * * FUNCTION: add a new extent of free inodes to an iag, allocating * an inode from this extent to satisfy the current allocation @@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) * for the purpose of satisfying this request. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group number. - * ip - pointer to new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group number. + * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) { @@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) /* - * NAME: diAllocBit(imap,iagp,ino) + * NAME: diAllocBit(imap,iagp,ino) * * FUNCTION: allocate a backed inode from an iag. * @@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) * this AG. Must have read lock on imap inode. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * ino - inode number to be allocated within the iag. + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * ino - inode number to be allocated within the iag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) { @@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) /* - * NAME: diNewExt(imap,iagp,extno) + * NAME: diNewExt(imap,iagp,extno) * - * FUNCTION: initialize a new extent of inodes for an iag, allocating - * the first inode of the extent for use for the current - * allocation request. + * FUNCTION: initialize a new extent of inodes for an iag, allocating + * the first inode of the extent for use for the current + * allocation request. * * disk resources are allocated for the new extent of inodes * and the inodes themselves are initialized to reflect their @@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) * this AG. Must have read lock on imap inode. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * extno - extent number. + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * extno - extent number. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) { @@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* - * NAME: diNewIAG(imap,iagnop,agno) + * NAME: diNewIAG(imap,iagnop,agno) * * FUNCTION: allocate a new iag for an allocation group. * @@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) * and returned to satisfy the request. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagnop - pointer to an iag number set with the number of the + * imap - pointer to inode map control structure. + * iagnop - pointer to an iag number set with the number of the * newly allocated iag upon successful return. - * agno - allocation group number. + * agno - allocation group number. * bpp - Buffer pointer to be filled in with new IAG's buffer * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. * * serialization: * AG lock held on entry/exit; @@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) * * note: new iag transaction: * . synchronously write iag; - * . write log of xtree and inode of imap; + * . write log of xtree and inode of imap; * . commit; * . synchronous write of xtree (right to left, bottom to top); * . at start of logredo(): init in-memory imap with one additional iag page; @@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) s64 xaddr = 0; s64 blkno; tid_t tid; -#ifdef _STILL_TO_PORT - xad_t xad; -#endif /* _STILL_TO_PORT */ struct inode *iplist[1]; /* pick up pointers to the inode map and mount inodes */ @@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) } /* - * NAME: diIAGRead() + * NAME: diIAGRead() * - * FUNCTION: get the buffer for the specified iag within a fileset + * FUNCTION: get the buffer for the specified iag within a fileset * or aggregate inode map. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagno - iag number. - * bpp - point to buffer pointer to be filled in on successful + * imap - pointer to inode map control structure. + * iagno - iag number. + * bpp - point to buffer pointer to be filled in on successful * exit. * * SERIALIZATION: @@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) * the read lock is unnecessary.) * * RETURN VALUES: - * 0 - success. - * -EIO - i/o error. + * 0 - success. + * -EIO - i/o error. */ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) { @@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) } /* - * NAME: diFindFree() + * NAME: diFindFree() * - * FUNCTION: find the first free bit in a word starting at + * FUNCTION: find the first free bit in a word starting at * the specified bit position. * * PARAMETERS: - * word - word to be examined. - * start - starting bit position. + * word - word to be examined. + * start - starting bit position. * * RETURN VALUES: - * bit position of first free bit in the word or 32 if + * bit position of first free bit in the word or 32 if * no free bits were found. */ static int diFindFree(u32 word, int start) @@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) atomic_read(&imap->im_numfree)); /* - * reconstruct imap + * reconstruct imap * * coalesce contiguous k (newAGSize/oldAGSize) AGs; * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; @@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) } /* - * process each iag page of the map. + * process each iag page of the map. * * rebuild AG Free Inode List, AG Free Inode Extent List; */ @@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) /* leave free iag in the free iag list */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - release_metapage(bp); + release_metapage(bp); continue; } @@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno, } /* - * NAME: copy_from_dinode() + * NAME: copy_from_dinode() * - * FUNCTION: Copies inode info from disk inode to in-memory inode + * FUNCTION: Copies inode info from disk inode to in-memory inode * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory + * 0 - success + * -ENOMEM - insufficient memory */ static int copy_from_dinode(struct dinode * dip, struct inode *ip) { @@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) } /* - * NAME: copy_to_dinode() + * NAME: copy_to_dinode() * - * FUNCTION: Copies inode info from in-memory inode to disk inode + * FUNCTION: Copies inode info from in-memory inode to disk inode */ static void copy_to_dinode(struct dinode * dip, struct inode *ip) { diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index 4f9c346ed498..610a0e9d8941 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h @@ -24,17 +24,17 @@ * jfs_imap.h: disk inode manager */ -#define EXTSPERIAG 128 /* number of disk inode extent per iag */ -#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ -#define SMAPSZ 4 /* number of words per summary map */ +#define EXTSPERIAG 128 /* number of disk inode extent per iag */ +#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ +#define SMAPSZ 4 /* number of words per summary map */ #define EXTSPERSUM 32 /* number of extents per summary map entry */ #define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ #define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ -#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ -#define MAXAG 128 /* maximum number of allocation groups */ +#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ +#define MAXAG 128 /* maximum number of allocation groups */ -#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ -#define SMAPSIZE 16 /* bytes in the IAG summary maps */ +#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ +#define SMAPSIZE 16 /* bytes in the IAG summary maps */ /* convert inode number to iag number */ #define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) @@ -60,31 +60,31 @@ * inode allocation group page (per 4096 inodes of an AG) */ struct iag { - __le64 agstart; /* 8: starting block of ag */ - __le32 iagnum; /* 4: inode allocation group number */ - __le32 inofreefwd; /* 4: ag inode free list forward */ - __le32 inofreeback; /* 4: ag inode free list back */ - __le32 extfreefwd; /* 4: ag inode extent free list forward */ - __le32 extfreeback; /* 4: ag inode extent free list back */ - __le32 iagfree; /* 4: iag free list */ + __le64 agstart; /* 8: starting block of ag */ + __le32 iagnum; /* 4: inode allocation group number */ + __le32 inofreefwd; /* 4: ag inode free list forward */ + __le32 inofreeback; /* 4: ag inode free list back */ + __le32 extfreefwd; /* 4: ag inode extent free list forward */ + __le32 extfreeback; /* 4: ag inode extent free list back */ + __le32 iagfree; /* 4: iag free list */ /* summary map: 1 bit per inode extent */ __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; - * note: this indicates free and backed - * inodes, if the extent is not backed the - * value will be 1. if the extent is - * backed but all inodes are being used the - * value will be 1. if the extent is - * backed but at least one of the inodes is - * free the value will be 0. + * note: this indicates free and backed + * inodes, if the extent is not backed the + * value will be 1. if the extent is + * backed but all inodes are being used the + * value will be 1. if the extent is + * backed but at least one of the inodes is + * free the value will be 0. */ __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ - __le32 nfreeinos; /* 4: number of free inodes */ - __le32 nfreeexts; /* 4: number of free extents */ + __le32 nfreeinos; /* 4: number of free inodes */ + __le32 nfreeexts; /* 4: number of free extents */ /* (72) */ u8 pad[1976]; /* 1976: pad to 2048 bytes */ /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ - __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ + __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ }; /* (4096) */ @@ -93,44 +93,44 @@ struct iag { * per AG control information (in inode map control page) */ struct iagctl_disk { - __le32 inofree; /* 4: free inode list anchor */ - __le32 extfree; /* 4: free extent list anchor */ - __le32 numinos; /* 4: number of backed inodes */ - __le32 numfree; /* 4: number of free inodes */ + __le32 inofree; /* 4: free inode list anchor */ + __le32 extfree; /* 4: free extent list anchor */ + __le32 numinos; /* 4: number of backed inodes */ + __le32 numfree; /* 4: number of free inodes */ }; /* (16) */ struct iagctl { - int inofree; /* free inode list anchor */ - int extfree; /* free extent list anchor */ - int numinos; /* number of backed inodes */ - int numfree; /* number of free inodes */ + int inofree; /* free inode list anchor */ + int extfree; /* free extent list anchor */ + int numinos; /* number of backed inodes */ + int numfree; /* number of free inodes */ }; /* * per fileset/aggregate inode map control page */ struct dinomap_disk { - __le32 in_freeiag; /* 4: free iag list anchor */ - __le32 in_nextiag; /* 4: next free iag number */ - __le32 in_numinos; /* 4: num of backed inodes */ + __le32 in_freeiag; /* 4: free iag list anchor */ + __le32 in_nextiag; /* 4: next free iag number */ + __le32 in_numinos; /* 4: num of backed inodes */ __le32 in_numfree; /* 4: num of free backed inodes */ __le32 in_nbperiext; /* 4: num of blocks per inode extent */ - __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ - __le32 in_diskblock; /* 4: for standalone test driver */ - __le32 in_maxag; /* 4: for standalone test driver */ - u8 pad[2016]; /* 2016: pad to 2048 */ + __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ + __le32 in_diskblock; /* 4: for standalone test driver */ + __le32 in_maxag; /* 4: for standalone test driver */ + u8 pad[2016]; /* 2016: pad to 2048 */ struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ }; /* (4096) */ struct dinomap { - int in_freeiag; /* free iag list anchor */ - int in_nextiag; /* next free iag number */ - int in_numinos; /* num of backed inodes */ - int in_numfree; /* num of free backed inodes */ + int in_freeiag; /* free iag list anchor */ + int in_nextiag; /* next free iag number */ + int in_numinos; /* num of backed inodes */ + int in_numfree; /* num of free backed inodes */ int in_nbperiext; /* num of blocks per inode extent */ - int in_l2nbperiext; /* l2 of in_nbperiext */ - int in_diskblock; /* for standalone test driver */ - int in_maxag; /* for standalone test driver */ + int in_l2nbperiext; /* l2 of in_nbperiext */ + int in_diskblock; /* for standalone test driver */ + int in_maxag; /* for standalone test driver */ struct iagctl in_agctl[MAXAG]; /* AG control information */ }; @@ -139,9 +139,9 @@ struct dinomap { */ struct inomap { struct dinomap im_imap; /* 4096: inode allocation control */ - struct inode *im_ipimap; /* 4: ptr to inode for imap */ - struct mutex im_freelock; /* 4: iag free list lock */ - struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ + struct inode *im_ipimap; /* 4: ptr to inode for imap */ + struct mutex im_freelock; /* 4: iag free list lock */ + struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ u32 *im_DBGdimap; atomic_t im_numinos; /* num of backed inodes */ atomic_t im_numfree; /* num of free backed inodes */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 8f453eff3c83..cb8f30985ad1 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -40,7 +40,7 @@ struct jfs_inode_info { uint mode2; /* jfs-specific mode */ uint saved_uid; /* saved for uid mount option */ uint saved_gid; /* saved for gid mount option */ - pxd_t ixpxd; /* inode extent descriptor */ + pxd_t ixpxd; /* inode extent descriptor */ dxd_t acl; /* dxd describing acl */ dxd_t ea; /* dxd describing ea */ time_t otime; /* time created */ @@ -190,7 +190,7 @@ struct jfs_sb_info { uint gengen; /* inode generation generator*/ uint inostamp; /* shows inode belongs to fileset*/ - /* Formerly in ipbmap */ + /* Formerly in ipbmap */ struct bmap *bmap; /* incore bmap descriptor */ struct nls_table *nls_tab; /* current codepage */ struct inode *direct_inode; /* metadata inode */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 44a2f33cb98d..de3e4a506dbc 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, goto writeRecord; /* - * initialize/update page/transaction recovery lsn + * initialize/update page/transaction recovery lsn */ lsn = log->lsn; @@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * initialize/update lsn of tblock of the page + * initialize/update lsn of tblock of the page * * transaction inherits oldest lsn of pages associated * with allocation/deallocation of resources (their @@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, LOGSYNC_UNLOCK(log, flags); /* - * write the log record + * write the log record */ writeRecord: lsn = lmWriteRecord(log, tblk, lrd, tlck); @@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, goto moveLrd; /* - * move log record data + * move log record data */ /* retrieve source meta-data page to log */ if (tlck->flag & tlckPAGELOCK) { @@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * move log record descriptor + * move log record descriptor */ moveLrd: lrd->length = cpu_to_le16(len); @@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log) LOGGC_LOCK(log); /* - * write or queue the full page at the tail of write queue + * write or queue the full page at the tail of write queue */ /* get the tail tblk on commit queue */ if (list_empty(&log->cqueue)) @@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log) LOGGC_UNLOCK(log); /* - * allocate/initialize next page + * allocate/initialize next page */ /* if log wraps, the first data page of log is 2 * (0 never used, 1 is superblock). @@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) } /* - * forward syncpt + * forward syncpt */ /* if last sync is same as last syncpt, * invoke sync point forward processing to update sync. @@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) lsn = log->lsn; /* - * setup next syncpt trigger (SWAG) + * setup next syncpt trigger (SWAG) */ logsize = log->logsize; @@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) if (more < 2 * LOGPSIZE) { jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); /* - * log wrapping + * log wrapping * * option 1 - panic ? No.! * option 2 - shutdown file systems - * associated with log ? + * associated with log ? * option 3 - extend log ? */ /* @@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync) /* * NAME: lmLogOpen() * - * FUNCTION: open the log on first open; + * FUNCTION: open the log on first open; * insert filesystem in the active list of the log. * * PARAMETER: ipmnt - file system mount inode @@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb) init_waitqueue_head(&log->syncwait); /* - * external log as separate logical volume + * external log as separate logical volume * * file systems to log may have n-to-1 relationship; */ @@ -1155,7 +1155,7 @@ journal_found: return 0; /* - * unwind on error + * unwind on error */ shutdown: /* unwind lbmLogInit() */ list_del(&log->journal_list); @@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log) return 0; /* - * unwind on error + * unwind on error */ errout30: /* release log page */ log->wqueue = NULL; @@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb) if (test_bit(log_INLINELOG, &log->flag)) { /* - * in-line log in host file system + * in-line log in host file system */ rc = lmLogShutdown(log); kfree(log); @@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb) goto out; /* - * external log as separate logical volume + * external log as separate logical volume */ list_del(&log->journal_list); bdev = log->bdev; @@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if (!list_empty(&log->synclist)) { struct logsyncblk *lp; + printk(KERN_ERR "jfs_flush_journal: synclist not empty\n"); list_for_each_entry(lp, &log->synclist, synclist) { if (lp->xflag & COMMIT_PAGE) { struct metapage *mp = (struct metapage *)lp; - dump_mem("orphan metapage", lp, - sizeof(struct metapage)); - dump_mem("page", mp->page, sizeof(struct page)); - } - else - dump_mem("orphan tblock", lp, - sizeof(struct tblock)); + print_hex_dump(KERN_ERR, "metapage: ", + DUMP_PREFIX_ADDRESS, 16, 4, + mp, sizeof(struct metapage), 0); + print_hex_dump(KERN_ERR, "page: ", + DUMP_PREFIX_ADDRESS, 16, + sizeof(long), mp->page, + sizeof(struct page), 0); + } else + print_hex_dump(KERN_ERR, "tblock:", + DUMP_PREFIX_ADDRESS, 16, 4, + lp, sizeof(struct tblock), 0); } } +#else + WARN_ON(!list_empty(&log->synclist)); #endif - //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } @@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log) * * PARAMETE: log - pointer to logs inode. * fsdev - kdev_t of filesystem. - * serial - pointer to returned log serial number + * serial - pointer to returned log serial number * activate - insert/remove device from active list. * * RETURN: 0 - success @@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp) * FUNCTION: add a log buffer to the log redrive list * * PARAMETER: - * bp - log buffer + * bp - log buffer * * NOTES: * Takes log_redrive_lock. @@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, bp->l_flag = flag; /* - * insert bp at tail of write queue associated with log + * insert bp at tail of write queue associated with log * * (request is either for bp already/currently at head of queue * or new bp to be inserted at tail) @@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); /* - * initiate pageout of the page + * initiate pageout of the page */ lbmStartIO(bp); } @@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) * * FUNCTION: Interface to DD strategy routine * - * RETURN: none + * RETURN: none * * serialization: LCACHE_LOCK() is NOT held during log i/o; */ @@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) bio_put(bio); /* - * pagein completion + * pagein completion */ if (bp->l_flag & lbmREAD) { bp->l_flag &= ~lbmREAD; @@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * pageout completion + * pageout completion * * the bp at the head of write queue has completed pageout. * @@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * synchronous pageout: + * synchronous pageout: * * buffer has not necessarily been removed from write queue * (e.g., synchronous write of partial-page with COMMIT): @@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * Group Commit pageout: + * Group Commit pageout: */ else if (bp->l_flag & lbmGC) { LCACHE_UNLOCK(flags); @@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * asynchronous pageout: + * asynchronous pageout: * * buffer must have been removed from write queue: * insert buffer at head of freelist where it can be recycled @@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg) * FUNCTION: format file system log * * PARAMETERS: - * log - volume log + * log - volume log * logAddress - start address of log space in FS block * logSize - length of log space in FS block; * @@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) npages = logSize >> sbi->l2nbperpage; /* - * log space: + * log space: * * page 0 - reserved; * page 1 - log superblock; * page 2 - log data page: A SYNC log record is written - * into this page at logform time; + * into this page at logform time; * pages 3-N - log data page: set to empty log data pages; */ /* - * init log superblock: log page 1 + * init log superblock: log page 1 */ logsuper = (struct logsuper *) bp->l_ldata; @@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) goto exit; /* - * init pages 2 to npages-1 as log data pages: + * init pages 2 to npages-1 as log data pages: * * log page sequence number (lpsn) initialization: * @@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) goto exit; /* - * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) + * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) */ for (lspn = 0; lspn < npages - 3; lspn++) { lp->h.page = lp->t.page = cpu_to_le32(lspn); @@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) rc = 0; exit: /* - * finalize log + * finalize log */ /* release the buffer */ lbmFree(bp); diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index a53fb17ea219..1f85ef0ec045 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -144,7 +144,7 @@ struct logpage { * * (this comment should be rewritten !) * jfs uses only "after" log records (only a single writer is allowed - * in a page, pages are written to temporary paging space if + * in a page, pages are written to temporary paging space if * if they must be written to disk before commit, and i/o is * scheduled for modified pages to their home location after * the log records containing the after values and the commit @@ -153,7 +153,7 @@ struct logpage { * * a log record consists of a data area of variable length followed by * a descriptor of fixed size LOGRDSIZE bytes. - * the data area is rounded up to an integral number of 4-bytes and + * the data area is rounded up to an integral number of 4-bytes and * must be no longer than LOGPSIZE. * the descriptor is of size of multiple of 4-bytes and aligned on a * 4-byte boundary. @@ -215,13 +215,13 @@ struct lrd { union { /* - * COMMIT: commit + * COMMIT: commit * * transaction commit: no type-dependent information; */ /* - * REDOPAGE: after-image + * REDOPAGE: after-image * * apply after-image; * @@ -236,7 +236,7 @@ struct lrd { } redopage; /* (20) */ /* - * NOREDOPAGE: the page is freed + * NOREDOPAGE: the page is freed * * do not apply after-image records which precede this record * in the log with the same page block number to this page. @@ -252,7 +252,7 @@ struct lrd { } noredopage; /* (20) */ /* - * UPDATEMAP: update block allocation map + * UPDATEMAP: update block allocation map * * either in-line PXD, * or out-of-line XADLIST; @@ -268,7 +268,7 @@ struct lrd { } updatemap; /* (20) */ /* - * NOREDOINOEXT: the inode extent is freed + * NOREDOINOEXT: the inode extent is freed * * do not apply after-image records which precede this * record in the log with the any of the 4 page block @@ -286,7 +286,7 @@ struct lrd { } noredoinoext; /* (20) */ /* - * SYNCPT: log sync point + * SYNCPT: log sync point * * replay log upto syncpt address specified; */ @@ -295,13 +295,13 @@ struct lrd { } syncpt; /* - * MOUNT: file system mount + * MOUNT: file system mount * * file system mount: no type-dependent information; */ /* - * ? FREEXTENT: free specified extent(s) + * ? FREEXTENT: free specified extent(s) * * free specified extent(s) from block allocation map * N.B.: nextents should be length of data/sizeof(xad_t) @@ -314,7 +314,7 @@ struct lrd { } freextent; /* - * ? NOREDOFILE: this file is freed + * ? NOREDOFILE: this file is freed * * do not apply records which precede this record in the log * with the same inode number. @@ -330,7 +330,7 @@ struct lrd { } noredofile; /* - * ? NEWPAGE: + * ? NEWPAGE: * * metadata type dependent */ @@ -342,7 +342,7 @@ struct lrd { } newpage; /* - * ? DUMMY: filler + * ? DUMMY: filler * * no type-dependent information */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 43d4f69afbec..77c7f1129dde 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -472,7 +472,8 @@ add_failed: printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); goto skip; dump_bio: - dump_mem("bio", bio, sizeof(*bio)); + print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16, + 4, bio, sizeof(*bio), 0); skip: bio_put(bio); unlock_page(page); diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 4dd479834897..644429acb8c0 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb); */ int jfs_mount(struct super_block *sb) { - int rc = 0; /* Return code */ + int rc = 0; /* Return code */ struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipaimap = NULL; struct inode *ipaimap2 = NULL; @@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb) sbi->ipaimap2 = NULL; /* - * mount (the only/single) fileset + * mount (the only/single) fileset */ /* * open fileset inode allocation map (aka fileset inode) @@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb) goto out; /* - * unwind on error + * unwind on error */ errout41: /* close fileset inode allocation map inode */ diFreeSpecial(ipimap); diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 25430d0b0d59..7aa1f7004eaf 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -18,7 +18,7 @@ */ /* - * jfs_txnmgr.c: transaction manager + * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() @@ -60,7 +60,7 @@ #include "jfs_debug.h" /* - * transaction management structures + * transaction management structures */ static struct { int freetid; /* index of a free tid structure */ @@ -103,19 +103,19 @@ module_param(nTxLock, int, 0); MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)"); -struct tblock *TxBlock; /* transaction block table */ -static int TxLockLWM; /* Low water mark for number of txLocks used */ -static int TxLockHWM; /* High water mark for number of txLocks used */ -static int TxLockVHWM; /* Very High water mark */ -struct tlock *TxLock; /* transaction lock table */ +struct tblock *TxBlock; /* transaction block table */ +static int TxLockLWM; /* Low water mark for number of txLocks used */ +static int TxLockHWM; /* High water mark for number of txLocks used */ +static int TxLockVHWM; /* Very High water mark */ +struct tlock *TxLock; /* transaction lock table */ /* - * transaction management lock + * transaction management lock */ static DEFINE_SPINLOCK(jfsTxnLock); -#define TXN_LOCK() spin_lock(&jfsTxnLock) -#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) +#define TXN_LOCK() spin_lock(&jfsTxnLock) +#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) @@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) #define TXN_WAKEUP(event) wake_up_all(event) /* - * statistics + * statistics */ static struct { tid_t maxtid; /* 4: biggest tid ever used */ @@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, static void LogSyncRelease(struct metapage * mp); /* - * transaction block/lock management - * --------------------------------- + * transaction block/lock management + * --------------------------------- */ /* @@ -227,9 +227,9 @@ static void txLockFree(lid_t lid) } /* - * NAME: txInit() + * NAME: txInit() * - * FUNCTION: initialize transaction management structures + * FUNCTION: initialize transaction management structures * * RETURN: * @@ -333,9 +333,9 @@ int txInit(void) } /* - * NAME: txExit() + * NAME: txExit() * - * FUNCTION: clean up when module is unloaded + * FUNCTION: clean up when module is unloaded */ void txExit(void) { @@ -346,12 +346,12 @@ void txExit(void) } /* - * NAME: txBegin() + * NAME: txBegin() * - * FUNCTION: start a transaction. + * FUNCTION: start a transaction. * - * PARAMETER: sb - superblock - * flag - force for nested tx; + * PARAMETER: sb - superblock + * flag - force for nested tx; * * RETURN: tid - transaction id * @@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag) } /* - * NAME: txBeginAnon() + * NAME: txBeginAnon() * - * FUNCTION: start an anonymous transaction. + * FUNCTION: start an anonymous transaction. * Blocks if logsync or available tlocks are low to prevent * anonymous tlocks from depleting supply. * - * PARAMETER: sb - superblock + * PARAMETER: sb - superblock * * RETURN: none */ @@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb) } /* - * txEnd() + * txEnd() * * function: free specified transaction block. * - * logsync barrier processing: + * logsync barrier processing: * * serialization: */ @@ -577,13 +577,13 @@ wakeup: } /* - * txLock() + * txLock() * * function: acquire a transaction lock on the specified <mp> * * parameter: * - * return: transaction lock id + * return: transaction lock id * * serialization: */ @@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) { - jfs_err("txLock: trying to lock locked page!"); - dump_mem("ip", ip, sizeof(struct inode)); - dump_mem("mp", mp, sizeof(struct metapage)); - dump_mem("Locker's tblk", tid_to_tblock(tid), - sizeof(struct tblock)); - dump_mem("Tlock", tlck, sizeof(struct tlock)); + printk(KERN_ERR "txLock: trying to lock locked page!"); + print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, + ip, sizeof(*ip), 0); + print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, + mp, sizeof(*mp), 0); + print_hex_dump(KERN_ERR, "Locker's tblock: ", + DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), + sizeof(struct tblock), 0); + print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, + tlck, sizeof(*tlck), 0); BUG(); } INCREMENT(stattx.waitlock); /* statistics */ @@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, } /* - * NAME: txRelease() + * NAME: txRelease() * - * FUNCTION: Release buffers associated with transaction locks, but don't + * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: - * tblk - + * tblk - * - * RETURN: Errors from subroutines. + * RETURN: Errors from subroutines. */ static void txRelease(struct tblock * tblk) { @@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk) } /* - * NAME: txUnlock() + * NAME: txUnlock() * - * FUNCTION: Initiates pageout of pages modified by tid in journalled - * objects and frees their lockwords. + * FUNCTION: Initiates pageout of pages modified by tid in journalled + * objects and frees their lockwords. */ static void txUnlock(struct tblock * tblk) { @@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk) } /* - * txMaplock() + * txMaplock() * * function: allocate a transaction lock for freed page/entry; - * for freed page, maplock is used as xtlock/dtlock type; + * for freed page, maplock is used as xtlock/dtlock type; */ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) { @@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) } /* - * txLinelock() + * txLinelock() * * function: allocate a transaction lock for log vector list */ @@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock) } /* - * transaction commit management - * ----------------------------- + * transaction commit management + * ----------------------------- */ /* - * NAME: txCommit() - * - * FUNCTION: commit the changes to the objects specified in - * clist. For journalled segments only the - * changes of the caller are committed, ie by tid. - * for non-journalled segments the data are flushed to - * disk and then the change to the disk inode and indirect - * blocks committed (so blocks newly allocated to the - * segment will be made a part of the segment atomically). - * - * all of the segments specified in clist must be in - * one file system. no more than 6 segments are needed - * to handle all unix svcs. - * - * if the i_nlink field (i.e. disk inode link count) - * is zero, and the type of inode is a regular file or - * directory, or symbolic link , the inode is truncated - * to zero length. the truncation is committed but the - * VM resources are unaffected until it is closed (see - * iput and iclose). + * NAME: txCommit() + * + * FUNCTION: commit the changes to the objects specified in + * clist. For journalled segments only the + * changes of the caller are committed, ie by tid. + * for non-journalled segments the data are flushed to + * disk and then the change to the disk inode and indirect + * blocks committed (so blocks newly allocated to the + * segment will be made a part of the segment atomically). + * + * all of the segments specified in clist must be in + * one file system. no more than 6 segments are needed + * to handle all unix svcs. + * + * if the i_nlink field (i.e. disk inode link count) + * is zero, and the type of inode is a regular file or + * directory, or symbolic link , the inode is truncated + * to zero length. the truncation is committed but the + * VM resources are unaffected until it is closed (see + * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: - * on entry the inode lock on each segment is assumed - * to be held. + * on entry the inode lock on each segment is assumed + * to be held. * * i/o error: */ @@ -1175,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */ if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) tblk->xflag |= COMMIT_LAZY; /* - * prepare non-journaled objects for commit + * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks @@ -1186,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */ cd.nip = nip; /* - * acquire transaction lock on (on-disk) inodes + * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records @@ -1262,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */ } /* - * write log records from transaction locks + * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD. */ @@ -1294,7 +1298,7 @@ int txCommit(tid_t tid, /* transaction identifier */ !test_cflag(COMMIT_Nolink, tblk->u.ip))); /* - * write COMMIT log record + * write COMMIT log record */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; @@ -1303,7 +1307,7 @@ int txCommit(tid_t tid, /* transaction identifier */ lmGroupCommit(log, tblk); /* - * - transaction is now committed - + * - transaction is now committed - */ /* @@ -1314,11 +1318,11 @@ int txCommit(tid_t tid, /* transaction identifier */ txForce(tblk); /* - * update allocation map. + * update allocation map. * * update inode allocation map and inode: * free pager lock on memory object of inode if any. - * update block allocation map. + * update block allocation map. * * txUpdateMap() resets XAD_NEW in XAD. */ @@ -1326,7 +1330,7 @@ int txCommit(tid_t tid, /* transaction identifier */ txUpdateMap(tblk); /* - * free transaction locks and pageout/free pages + * free transaction locks and pageout/free pages */ txRelease(tblk); @@ -1335,7 +1339,7 @@ int txCommit(tid_t tid, /* transaction identifier */ /* - * reset in-memory object state + * reset in-memory object state */ for (k = 0; k < cd.nip; k++) { ip = cd.iplist[k]; @@ -1358,11 +1362,11 @@ int txCommit(tid_t tid, /* transaction identifier */ } /* - * NAME: txLog() + * NAME: txLog() * - * FUNCTION: Writes AFTER log records for all lines modified - * by tid for segments specified by inodes in comdata. - * Code assumes only WRITELOCKS are recorded in lockwords. + * FUNCTION: Writes AFTER log records for all lines modified + * by tid for segments specified by inodes in comdata. + * Code assumes only WRITELOCKS are recorded in lockwords. * * PARAMETERS: * @@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) } /* - * diLog() + * diLog() * - * function: log inode tlock and format maplock to update bmap; + * function: log inode tlock and format maplock to update bmap; */ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck, struct commit * cd) + struct tlock * tlck, struct commit * cd) { int rc = 0; struct metapage *mp; @@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxd = &lrd->log.redopage.pxd; /* - * inode after image + * inode after image */ if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ @@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, tlck->flag |= tlckWRITEPAGE; } else if (tlck->type & tlckFREE) { /* - * free inode extent + * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at @@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, jfs_err("diLog: UFO type tlck:0x%p", tlck); #ifdef _JFS_WIP /* - * alloc/free external EA extent + * alloc/free external EA extent * * a maplock for txUpdateMap() to update bPWMAP for alloc/free * of the extent has been formatted at txLock() time; @@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * dataLog() + * dataLog() * - * function: log data tlock + * function: log data tlock */ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * dtLog() + * dtLog() * - * function: log dtree tlock and format maplock to update bmap; + * function: log dtree tlock and format maplock to update bmap; */ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); /* - * page extension via relocation: entry insertion; - * page extension in-place: entry insertion; - * new right page from page split, reinitialized in-line - * root from root page split: entry insertion; + * page extension via relocation: entry insertion; + * page extension in-place: entry insertion; + * new right page from page split, reinitialized in-line + * root from root page split: entry insertion; */ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): @@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * entry insertion/deletion, - * sibling page link update (old right page before split); + * entry insertion/deletion, + * sibling page link update (old right page before split); */ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */ @@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page deletion: page has been invalidated - * page relocation: source extent + * page deletion: page has been invalidated + * page relocation: source extent * - * a maplock for free of the page has been formatted - * at txLock() time); + * a maplock for free of the page has been formatted + * at txLock() time); */ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() @@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * xtLog() + * xtLog() * - * function: log xtree tlock and format maplock to update bmap; + * function: log xtree tlock and format maplock to update bmap; */ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, xadlock = (struct xdlistlock *) maplock; /* - * entry insertion/extension; - * sibling page link update (old right page before split); + * entry insertion/extension; + * sibling page link update (old right page before split); */ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): @@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page deletion: file deletion/truncation (ref. xtTruncate()) + * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page); @@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page/entry truncation: file truncation (ref. xtTruncate()) + * page/entry truncation: file truncation (ref. xtTruncate()) * - * |----------+------+------+---------------| - * | | | - * | | hwm - hwm before truncation - * | next - truncation point - * lwm - lwm before truncation + * |----------+------+------+---------------| + * | | | + * | | hwm - hwm before truncation + * | next - truncation point + * lwm - lwm before truncation * header ? */ if (tlck->type & tlckTRUNCATE) { @@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, twm = xtlck->twm.offset; /* - * write log records + * write log records */ /* log after-image for logredo(): * @@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * format maplock(s) for txUpdateMap() to update bmap + * format maplock(s) for txUpdateMap() to update bmap */ maplock->index = 0; @@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * mapLog() + * mapLog() * - * function: log from maplock of freed data extents; + * function: log from maplock of freed data extents; */ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxd_t *pxd; /* - * page relocation: free the source page extent + * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src @@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * txEA() + * txEA() * - * function: acquire maplock for EA/ACL extents or - * set COMMIT_INLINE flag; + * function: acquire maplock for EA/ACL extents or + * set COMMIT_INLINE flag; */ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) { @@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) } /* - * txForce() + * txForce() * * function: synchronously write pages locked by transaction - * after txLog() but before txUpdateMap(); + * after txLog() but before txUpdateMap(); */ static void txForce(struct tblock * tblk) { @@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk) } /* - * txUpdateMap() + * txUpdateMap() * - * function: update persistent allocation map (and working map - * if appropriate); + * function: update persistent allocation map (and working map + * if appropriate); * * parameter: */ @@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk) /* - * update block allocation map + * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page; @@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk) } } /* - * update inode allocation map + * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; @@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk) } /* - * txAllocPMap() + * txAllocPMap() * * function: allocate from persistent map; * * parameter: - * ipbmap - - * malock - - * xad list: - * pxd: - * - * maptype - - * allocate from persistent map; - * free from persistent map; - * (e.g., tmp file - free from working map at releae - * of last reference); - * free from persistent and working map; - * - * lsn - log sequence number; + * ipbmap - + * malock - + * xad list: + * pxd: + * + * maptype - + * allocate from persistent map; + * free from persistent map; + * (e.g., tmp file - free from working map at releae + * of last reference); + * free from persistent and working map; + * + * lsn - log sequence number; */ static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk) @@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, } /* - * txFreeMap() + * txFreeMap() * - * function: free from persistent and/or working map; + * function: free from persistent and/or working map; * * todo: optimization */ @@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip, } /* - * txFreelock() + * txFreelock() * - * function: remove tlock from inode anonymous locklist + * function: remove tlock from inode anonymous locklist */ void txFreelock(struct inode *ip) { @@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip) } /* - * txAbort() + * txAbort() * * function: abort tx before commit; * @@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty) } /* - * txLazyCommit(void) + * txLazyCommit(void) * * All transactions except those changing ipimap (COMMIT_FORCE) are * processed by this routine. This insures that the inode and block @@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk) } /* - * jfs_lazycommit(void) + * jfs_lazycommit(void) * * To be run as a kernel daemon. If lbmIODone is called in an interrupt * context, or where blocking is not wanted, this routine will process @@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb) } /* - * jfs_sync(void) + * jfs_sync(void) * * To be run as a kernel daemon. This is awakened when tlocks run low. * We write any inodes that have anonymous tlocks so they will become diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 7863cf21afca..ab7288937019 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */ */ struct tlock { lid_t next; /* 2: index next lockword on tid locklist - * next lockword on freelist + * next lockword on freelist */ tid_t tid; /* 2: transaction id holding lock */ diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h index 09b252958687..649f9817accd 100644 --- a/fs/jfs/jfs_types.h +++ b/fs/jfs/jfs_types.h @@ -21,7 +21,7 @@ /* * jfs_types.h: * - * basic type/utility definitions + * basic type/utility definitions * * note: this header file must be the 1st include file * of JFS include list in all JFS .c file. @@ -54,8 +54,8 @@ struct timestruc_t { */ #define LEFTMOSTONE 0x80000000 -#define HIGHORDER 0x80000000u /* high order bit on */ -#define ONES 0xffffffffu /* all bit on */ +#define HIGHORDER 0x80000000u /* high order bit on */ +#define ONES 0xffffffffu /* all bit on */ /* * logical xd (lxd) @@ -148,7 +148,7 @@ typedef struct { #define sizeDXD(dxd) le32_to_cpu((dxd)->size) /* - * directory entry argument + * directory entry argument */ struct component_name { int namlen; @@ -160,14 +160,14 @@ struct component_name { * DASD limit information - stored in directory inode */ struct dasd { - u8 thresh; /* Alert Threshold (in percent) */ - u8 delta; /* Alert Threshold delta (in percent) */ + u8 thresh; /* Alert Threshold (in percent) */ + u8 delta; /* Alert Threshold delta (in percent) */ u8 rsrvd1; - u8 limit_hi; /* DASD limit (in logical blocks) */ - __le32 limit_lo; /* DASD limit (in logical blocks) */ + u8 limit_hi; /* DASD limit (in logical blocks) */ + __le32 limit_lo; /* DASD limit (in logical blocks) */ u8 rsrvd2[3]; - u8 used_hi; /* DASD usage (in logical blocks) */ - __le32 used_lo; /* DASD usage (in logical blocks) */ + u8 used_hi; /* DASD usage (in logical blocks) */ + __le32 used_lo; /* DASD usage (in logical blocks) */ }; #define DASDLIMIT(dasdp) \ diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index a386f48c73fc..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb) jfs_info("UnMount JFS: sb:0x%p", sb); /* - * update superblock and close log + * update superblock and close log * * if mounted read-write and log based recovery was enabled */ diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index acc97c46d8a4..1543906a2e0d 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - * jfs_xtree.c: extent allocation descriptor B+-tree manager + * jfs_xtree.c: extent allocation descriptor B+-tree manager */ #include <linux/fs.h> @@ -32,30 +32,30 @@ /* * xtree local flag */ -#define XT_INSERT 0x00000001 +#define XT_INSERT 0x00000001 /* - * xtree key/entry comparison: extent offset + * xtree key/entry comparison: extent offset * * return: - * -1: k < start of extent - * 0: start_of_extent <= k <= end_of_extent - * 1: k > end_of_extent + * -1: k < start of extent + * 0: start_of_extent <= k <= end_of_extent + * 1: k > end_of_extent */ #define XT_CMP(CMP, K, X, OFFSET64)\ {\ - OFFSET64 = offsetXAD(X);\ - (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ - ((K) < OFFSET64) ? -1 : 0;\ + OFFSET64 = offsetXAD(X);\ + (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ + ((K) < OFFSET64) ? -1 : 0;\ } /* write a xad entry */ #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ {\ - (XAD)->flag = (FLAG);\ - XADoffset((XAD), (OFF));\ - XADlength((XAD), (LEN));\ - XADaddress((XAD), (ADDR));\ + (XAD)->flag = (FLAG);\ + XADoffset((XAD), (OFF));\ + XADlength((XAD), (LEN));\ + XADaddress((XAD), (ADDR));\ } #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) @@ -76,13 +76,13 @@ MP = NULL;\ RC = -EIO;\ }\ - }\ + }\ } /* for consistency */ #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) -#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ +#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) /* xtree entry parameter descriptor */ struct xtsplit { @@ -97,7 +97,7 @@ struct xtsplit { /* - * statistics + * statistics */ #ifdef CONFIG_JFS_STATISTICS static struct { @@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); #endif /* _STILL_TO_PORT */ /* - * xtLookup() + * xtLookup() * * function: map a single page into a physical extent; */ @@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart, } /* - * compute the physical extent covering logical extent + * compute the physical extent covering logical extent * * N.B. search may have failed (e.g., hole in sparse file), * and returned the index of the next entry. @@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart, /* - * xtLookupList() + * xtLookupList() * * function: map a single logical extent into a list of physical extent; * * parameter: - * struct inode *ip, - * struct lxdlist *lxdlist, lxd list (in) - * struct xadlist *xadlist, xad list (in/out) - * int flag) + * struct inode *ip, + * struct lxdlist *lxdlist, lxd list (in) + * struct xadlist *xadlist, xad list (in/out) + * int flag) * * coverage of lxd by xad under assumption of * . lxd's are ordered and disjoint. * . xad's are ordered and disjoint. * * return: - * 0: success + * 0: success * * note: a page being written (even a single byte) is backed fully, - * except the last page which is only backed with blocks - * required to cover the last byte; - * the extent backing a page is fully contained within an xad; + * except the last page which is only backed with blocks + * required to cover the last byte; + * the extent backing a page is fully contained within an xad; */ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, struct xadlist * xadlist, int flag) @@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, return rc; /* - * compute the physical extent covering logical extent + * compute the physical extent covering logical extent * * N.B. search may have failed (e.g., hole in sparse file), * and returned the index of the next entry. @@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, if (lstart >= size) goto mapend; - /* compare with the current xad */ + /* compare with the current xad */ goto compare1; } /* lxd is covered by xad */ @@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, /* * lxd is partially covered by xad */ - else { /* (xend < lend) */ + else { /* (xend < lend) */ /* * get next xad @@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, /* - * xtSearch() + * xtSearch() * - * function: search for the xad entry covering specified offset. + * function: search for the xad entry covering specified offset. * * parameters: - * ip - file object; - * xoff - extent offset; - * nextp - address of next extent (if any) for search miss - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag (XT_INSERT); + * ip - file object; + * xoff - extent offset; + * nextp - address of next extent (if any) for search miss + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag (XT_INSERT); * * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. */ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag) @@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, btstack->nsplit = 0; /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, XT_CMP(cmp, xoff, &p->xad[index], t64); if (cmp == 0) { /* - * search hit + * search hit */ /* search hit - leaf page: * return the entry found @@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, } /* - * search miss + * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. @@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, } /* - * xtInsert() + * xtInsert() * * function: * * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag (XAD_NOTRECORDED): - * xoff - extent offset; - * xlen - extent length; - * xaddrp - extent address pointer (in/out): - * if (*xaddrp) - * caller allocated data extent at *xaddrp; - * else - * allocate data extent and return its xaddr; - * flag - + * tid - transaction id; + * ip - file object; + * xflag - extent flag (XAD_NOTRECORDED): + * xoff - extent offset; + * xlen - extent length; + * xaddrp - extent address pointer (in/out): + * if (*xaddrp) + * caller allocated data extent at *xaddrp; + * else + * allocate data extent and return its xaddr; + * flag - * * return: */ @@ -813,7 +813,7 @@ int xtInsert(tid_t tid, /* transaction id */ jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* - * search for the entry location at which to insert: + * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). @@ -853,13 +853,13 @@ int xtInsert(tid_t tid, /* transaction id */ } /* - * insert entry for new extent + * insert entry for new extent */ xflag |= XAD_NEW; /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -886,7 +886,7 @@ int xtInsert(tid_t tid, /* transaction id */ } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ /* * acquire a transaction lock on the leaf page; @@ -930,16 +930,16 @@ int xtInsert(tid_t tid, /* transaction id */ /* - * xtSplitUp() + * xtSplitUp() * * function: - * split full pages as propagating insertion up the tree + * split full pages as propagating insertion up the tree * * parameter: - * tid - transaction id; - * ip - file object; - * split - entry parameter descriptor; - * btstack - traverse stack from xtSearch() + * tid - transaction id; + * ip - file object; + * split - entry parameter descriptor; + * btstack - traverse stack from xtSearch() * * return: */ @@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid, /* - * xtSplitPage() + * xtSplitPage() * * function: - * split a full non-root page into - * original/split/left page and new right page - * i.e., the original/split page remains as left page. + * split a full non-root page into + * original/split/left page and new right page + * i.e., the original/split page remains as left page. * * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp, - * u64 *rbnp, + * int tid, + * struct inode *ip, + * struct xtsplit *split, + * struct metapage **rmpp, + * u64 *rbnp, * * return: - * Pointer to page in which to insert or NULL on error. + * Pointer to page in which to insert or NULL on error. */ static int xtSplitPage(tid_t tid, struct inode *ip, @@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip, rbn = addressPXD(pxd); /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { - rc = -EDQUOT; - goto clean_up; + if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = -EDQUOT; + goto clean_up; } quota_allocation += lengthPXD(pxd); @@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip, skip = split->index; /* - * sequential append at tail (after last entry of last page) + * sequential append at tail (after last entry of last page) * * if splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is @@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip, } /* - * non-sequential insert (at possibly middle page) + * non-sequential insert (at possibly middle page) */ /* @@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip, /* - * xtSplitRoot() + * xtSplitRoot() * * function: - * split the full root page into - * original/root/split page and new right page - * i.e., root remains fixed in tree anchor (inode) and - * the root is copied to a single new right child page - * since root page << non-root page, and - * the split root page contains a single entry for the - * new right child page. + * split the full root page into original/root/split page and new + * right page + * i.e., root remains fixed in tree anchor (inode) and the root is + * copied to a single new right child page since root page << + * non-root page, and the split root page contains a single entry + * for the new right child page. * * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp) + * int tid, + * struct inode *ip, + * struct xtsplit *split, + * struct metapage **rmpp) * * return: - * Pointer to page in which to insert or NULL on error. + * Pointer to page in which to insert or NULL on error. */ static int xtSplitRoot(tid_t tid, @@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid, INCREMENT(xtStat.split); /* - * allocate a single (right) child page + * allocate a single (right) child page */ pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; @@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid, } /* - * reset the root + * reset the root * * init root with the single entry for the new right page * set the 1st entry offset to 0, which force the left-most key @@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid, /* - * xtExtend() + * xtExtend() * * function: extend in-place; * @@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid, /* transaction id */ goto extendOld; /* - * extent overflow: insert entry for new extent + * extent overflow: insert entry for new extent */ //insertNew: xoff = offsetXAD(xad) + MAXXLEN; @@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid, /* transaction id */ nextindex = le16_to_cpu(p->header.nextindex); /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid, /* transaction id */ } } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ else { /* insert the new entry: mark the entry NEW */ @@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid, /* transaction id */ #ifdef _NOTYET /* - * xtTailgate() + * xtTailgate() * * function: split existing 'tail' extent - * (split offset >= start offset of tail extent), and - * relocate and extend the split tail half; + * (split offset >= start offset of tail extent), and + * relocate and extend the split tail half; * * note: existing extent may or may not have been committed. * caller is responsible for pager buffer cache update, and @@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid, /* transaction id */ /* printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", - (ulong)xoff, xlen, (ulong)xaddr); + (ulong)xoff, xlen, (ulong)xaddr); */ /* there must exist extent to be tailgated */ @@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", xad = &p->xad[index]; /* printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", - (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); + (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); */ if ((llen = xoff - offsetXAD(xad)) == 0) goto updateOld; /* - * partially replace extent: insert entry for new extent + * partially replace extent: insert entry for new extent */ //insertNew: /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", } } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ else { /* insert the new entry: mark the entry NEW */ @@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", #endif /* _NOTYET */ /* - * xtUpdate() + * xtUpdate() * * function: update XAD; * - * update extent for allocated_but_not_recorded or - * compressed extent; + * update extent for allocated_but_not_recorded or + * compressed extent; * * parameter: - * nxad - new XAD; - * logical extent of the specified XAD must be completely - * contained by an existing XAD; + * nxad - new XAD; + * logical extent of the specified XAD must be completely + * contained by an existing XAD; */ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) { /* new XAD */ @@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p); /* - * xtAppend() + * xtAppend() * * function: grow in append mode from contiguous region specified ; * * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag: - * xoff - extent offset; - * maxblocks - max extent length; - * xlen - extent length (in/out); - * xaddrp - extent address pointer (in/out): - * flag - + * tid - transaction id; + * ip - file object; + * xflag - extent flag: + * xoff - extent offset; + * maxblocks - max extent length; + * xlen - extent length (in/out); + * xaddrp - extent address pointer (in/out): + * flag - * * return: */ @@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */ (ulong) xoff, maxblocks, xlen, (ulong) xaddr); /* - * search for the entry location at which to insert: + * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). @@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid, /* transaction id */ xlen = min(xlen, (int)(next - xoff)); //insert: /* - * insert entry for new extent + * insert entry for new extent */ xflag |= XAD_NEW; /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid, /* transaction id */ return 0; /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ insertLeaf: /* @@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid, /* transaction id */ /* - TBD for defragmentaion/reorganization - * - * xtDelete() + * xtDelete() * * function: - * delete the entry with the specified key. + * delete the entry with the specified key. * - * N.B.: whole extent of the entry is assumed to be deleted. + * N.B.: whole extent of the entry is assumed to be deleted. * * parameter: * * return: - * ENOENT: if the entry is not found. + * ENOENT: if the entry is not found. * * exception: */ @@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) /* - TBD for defragmentaion/reorganization - * - * xtDeleteUp() + * xtDeleteUp() * * function: - * free empty pages as propagating deletion up the tree + * free empty pages as propagating deletion up the tree * * parameter: * @@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip, /* - * NAME: xtRelocate() + * NAME: xtRelocate() * - * FUNCTION: relocate xtpage or data extent of regular file; - * This function is mainly used by defragfs utility. + * FUNCTION: relocate xtpage or data extent of regular file; + * This function is mainly used by defragfs utility. * - * NOTE: This routine does not have the logic to handle - * uncommitted allocated extent. The caller should call - * txCommit() to commit all the allocation before call - * this routine. + * NOTE: This routine does not have the logic to handle + * uncommitted allocated extent. The caller should call + * txCommit() to commit all the allocation before call + * this routine. */ int xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ @@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); /* - * 1. get and validate the parent xtpage/xad entry - * covering the source extent to be relocated; + * 1. get and validate the parent xtpage/xad entry + * covering the source extent to be relocated; */ if (xtype == DATAEXT) { /* search in leaf entry */ @@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ jfs_info("xtRelocate: parent xad entry validated."); /* - * 2. relocate the extent + * 2. relocate the extent */ if (xtype == DATAEXT) { /* if the extent is allocated-but-not-recorded @@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ XT_PUTPAGE(pmp); /* - * cmRelocate() + * cmRelocate() * * copy target data pages to be relocated; * @@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ pno = offset >> CM_L2BSIZE; npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; /* - npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - - (offset >> CM_L2BSIZE) + 1; + npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - + (offset >> CM_L2BSIZE) + 1; */ sxaddr = oxaddr; dxaddr = nxaddr; @@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); jfs_info("xtRelocate: target data extent relocated."); - } else { /* (xtype == XTPAGE) */ + } else { /* (xtype == XTPAGE) */ /* * read in the target xtpage from the source extent; @@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ */ if (lmp) { BT_MARK_DIRTY(lmp, ip); - tlck = - txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); + tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); lp->header.next = cpu_to_le64(nxaddr); XT_PUTPAGE(lmp); } if (rmp) { BT_MARK_DIRTY(rmp, ip); - tlck = - txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); + tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); rp->header.prev = cpu_to_le64(nxaddr); XT_PUTPAGE(rmp); } @@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ * scan may be skipped by commit() and logredo(); */ BT_MARK_DIRTY(mp, ip); - /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ + /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); xtlck = (struct xtlock *) & tlck->lock; @@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ } /* - * 3. acquire maplock for the source extent to be freed; + * 3. acquire maplock for the source extent to be freed; * * acquire a maplock saving the src relocated extent address; * to free of the extent at commit time; @@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ * is no buffer associated with this lock since the buffer * has been redirected to the target location. */ - else /* (xtype == XTPAGE) */ + else /* (xtype == XTPAGE) */ tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); pxdlock = (struct pxd_lock *) & tlck->lock; @@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ pxdlock->index = 1; /* - * 4. update the parent xad entry for relocation; + * 4. update the parent xad entry for relocation; * * acquire tlck for the parent entry with XAD_NEW as entry * update which will write LOG_REDOPAGE and update bmap for @@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ /* - * xtSearchNode() + * xtSearchNode() * - * function: search for the internal xad entry covering specified extent. - * This function is mainly used by defragfs utility. + * function: search for the internal xad entry covering specified extent. + * This function is mainly used by defragfs utility. * * parameters: - * ip - file object; - * xad - extent to find; - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag; + * ip - file object; + * xad - extent to find; + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag; * * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. */ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ int *cmpp, struct btstack * btstack, int flag) @@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ xaddr = addressXAD(xad); /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ XT_CMP(cmp, xoff, &p->xad[index], t64); if (cmp == 0) { /* - * search hit + * search hit * * verify for exact match; */ @@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ } /* - * search miss - non-leaf page: + * search miss - non-leaf page: * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. @@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ /* - * xtRelink() + * xtRelink() * * function: - * link around a freed page. + * link around a freed page. * * Parameter: - * int tid, - * struct inode *ip, - * xtpage_t *p) + * int tid, + * struct inode *ip, + * xtpage_t *p) * * returns: */ @@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) /* - * xtInitRoot() + * xtInitRoot() * * initialize file root (inline in inode) */ @@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip) #define MAX_TRUNCATE_LEAVES 50 /* - * xtTruncate() + * xtTruncate() * * function: - * traverse for truncation logging backward bottom up; - * terminate at the last extent entry at the current subtree - * root page covering new down size. - * truncation may occur within the last extent entry. + * traverse for truncation logging backward bottom up; + * terminate at the last extent entry at the current subtree + * root page covering new down size. + * truncation may occur within the last extent entry. * * parameter: - * int tid, - * struct inode *ip, - * s64 newsize, - * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} + * int tid, + * struct inode *ip, + * s64 newsize, + * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} * * return: * * note: - * PWMAP: - * 1. truncate (non-COMMIT_NOLINK file) - * by jfs_truncate() or jfs_open(O_TRUNC): - * xtree is updated; + * PWMAP: + * 1. truncate (non-COMMIT_NOLINK file) + * by jfs_truncate() or jfs_open(O_TRUNC): + * xtree is updated; * 2. truncate index table of directory when last entry removed - * map update via tlock at commit time; - * PMAP: + * map update via tlock at commit time; + * PMAP: * Call xtTruncate_pmap instead - * WMAP: - * 1. remove (free zero link count) on last reference release - * (pmap has been freed at commit zero link count); - * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): - * xtree is updated; - * map update directly at truncation time; + * WMAP: + * 1. remove (free zero link count) on last reference release + * (pmap has been freed at commit zero link count); + * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): + * xtree is updated; + * map update directly at truncation time; * - * if (DELETE) - * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); - * else if (TRUNCATE) - * must write LOG_NOREDOPAGE for deleted index page; + * if (DELETE) + * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); + * else if (TRUNCATE) + * must write LOG_NOREDOPAGE for deleted index page; * * pages may already have been tlocked by anonymous transactions * during file growth (i.e., write) before truncation; @@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) * retained in the new sized file. * if type is PMAP, the data and index pages are NOT * freed, and the data and index blocks are NOT freed - * from working map. + * from working map. * (this will allow continued access of data/index of * temporary file (zerolink count file truncated to zero-length)). */ @@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) goto getChild; /* - * leaf page + * leaf page */ freed = 0; @@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) } /* - * internal page: go down to child page of current entry + * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ @@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) /* - * xtTruncate_pmap() + * xtTruncate_pmap() * * function: * Perform truncate to zero lenghth for deleted file, leaving the @@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) * is committed to disk. * * parameter: - * tid_t tid, - * struct inode *ip, - * s64 committed_size) + * tid_t tid, + * struct inode *ip, + * s64 committed_size) * * return: new committed size * @@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } /* - * leaf page + * leaf page */ if (++locked_leaves > MAX_TRUNCATE_LEAVES) { @@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) xoff = offsetXAD(xad); xlen = lengthXAD(xad); XT_PUTPAGE(mp); - return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; + return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; } tlck = txLock(tid, ip, mp, tlckXTREE); tlck->type = tlckXTREE | tlckFREE; @@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) */ tlck = txLock(tid, ip, mp, tlckXTREE); xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = - le16_to_cpu(p->header.nextindex) - 1; + xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; tlck->type = tlckXTREE | tlckFREE; XT_PUTPAGE(mp); @@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) else index--; /* - * internal page: go down to child page of current entry + * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index 164f6f2b1019..70815c8a3d6a 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h @@ -19,14 +19,14 @@ #define _H_JFS_XTREE /* - * jfs_xtree.h: extent allocation descriptor B+-tree manager + * jfs_xtree.h: extent allocation descriptor B+-tree manager */ #include "jfs_btree.h" /* - * extent allocation descriptor (xad) + * extent allocation descriptor (xad) */ typedef struct xad { unsigned flag:8; /* 1: flag */ @@ -38,30 +38,30 @@ typedef struct xad { __le32 addr2; /* 4: address in unit of fsblksize */ } xad_t; /* (16) */ -#define MAXXLEN ((1 << 24) - 1) +#define MAXXLEN ((1 << 24) - 1) -#define XTSLOTSIZE 16 -#define L2XTSLOTSIZE 4 +#define XTSLOTSIZE 16 +#define L2XTSLOTSIZE 4 /* xad_t field construction */ #define XADoffset(xad, offset64)\ {\ - (xad)->off1 = ((u64)offset64) >> 32;\ - (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ + (xad)->off1 = ((u64)offset64) >> 32;\ + (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ } #define XADaddress(xad, address64)\ {\ - (xad)->addr1 = ((u64)address64) >> 32;\ - (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ + (xad)->addr1 = ((u64)address64) >> 32;\ + (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ } -#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) +#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) /* xad_t field extraction */ #define offsetXAD(xad)\ - ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) + ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) #define addressXAD(xad)\ - ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) -#define lengthXAD(xad) __le24_to_cpu((xad)->len) + ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) +#define lengthXAD(xad) __le24_to_cpu((xad)->len) /* xad list */ struct xadlist { @@ -71,22 +71,22 @@ struct xadlist { }; /* xad_t flags */ -#define XAD_NEW 0x01 /* new */ -#define XAD_EXTENDED 0x02 /* extended */ -#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ +#define XAD_NEW 0x01 /* new */ +#define XAD_EXTENDED 0x02 /* extended */ +#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ #define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ -#define XAD_COW 0x10 /* copy-on-write */ +#define XAD_COW 0x10 /* copy-on-write */ /* possible values for maxentry */ -#define XTROOTINITSLOT_DIR 6 -#define XTROOTINITSLOT 10 -#define XTROOTMAXSLOT 18 -#define XTPAGEMAXSLOT 256 -#define XTENTRYSTART 2 +#define XTROOTINITSLOT_DIR 6 +#define XTROOTINITSLOT 10 +#define XTROOTMAXSLOT 18 +#define XTPAGEMAXSLOT 256 +#define XTENTRYSTART 2 /* - * xtree page: + * xtree page: */ typedef union { struct xtheader { @@ -106,7 +106,7 @@ typedef union { } xtpage_t; /* - * external declaration + * external declaration */ extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, int *pflag, s64 * paddr, int *plen, int flag); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 41c204771262..25161c4121e4 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) * dentry - child directory dentry * * RETURN: -EINVAL - if name is . or .. - * -EINVAL - if . or .. exist but are invalid. + * -EINVAL - if . or .. exist but are invalid. * errors from subroutines * * note: @@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) inode_dec_link_count(ip); /* - * commit zero link count object + * commit zero link count object */ if (ip->i_nlink == 0) { assert(!test_cflag(COMMIT_Nolink, ip)); @@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) /* * NAME: commitZeroLink() * - * FUNCTION: for non-directory, called by jfs_remove(), + * FUNCTION: for non-directory, called by jfs_remove(), * truncate a regular file, directory or symbolic * link to zero length. return 0 if type is not * one of these. @@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) /* * NAME: jfs_free_zero_link() * - * FUNCTION: for non-directory, called by iClose(), + * FUNCTION: for non-directory, called by iClose(), * free resources of a file from cache and WORKING map * for a file previously committed with zero link count * while associated with a pager object, @@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry, * NAME: jfs_symlink(dip, dentry, name) * * FUNCTION: creates a symbolic link to <symlink> by name <name> - * in directory <dip> + * in directory <dip> * - * PARAMETER: dip - parent directory vnode - * dentry - dentry of symbolic link - * name - the path name of the existing object - * that will be the source of the link + * PARAMETER: dip - parent directory vnode + * dentry - dentry of symbolic link + * name - the path name of the existing object + * that will be the source of the link * * RETURN: errors from subroutines * @@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, /* - * NAME: jfs_rename + * NAME: jfs_rename * - * FUNCTION: rename a file or directory + * FUNCTION: rename a file or directory */ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* - * NAME: jfs_mknod + * NAME: jfs_mknod * - * FUNCTION: Create a special file (device) + * FUNCTION: Create a special file (device) */ static int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 79d625f3f733..71984ee95346 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -29,17 +29,17 @@ #include "jfs_txnmgr.h" #include "jfs_debug.h" -#define BITSPERPAGE (PSIZE << 3) -#define L2MEGABYTE 20 -#define MEGABYTE (1 << L2MEGABYTE) -#define MEGABYTE32 (MEGABYTE << 5) +#define BITSPERPAGE (PSIZE << 3) +#define L2MEGABYTE 20 +#define MEGABYTE (1 << L2MEGABYTE) +#define MEGABYTE32 (MEGABYTE << 5) /* convert block number to bmap file page number */ #define BLKTODMAPN(b)\ - (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) + (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) /* - * jfs_extendfs() + * jfs_extendfs() * * function: extend file system; * @@ -48,9 +48,9 @@ * workspace space * * input: - * new LVSize: in LV blocks (required) - * new LogSize: in LV blocks (optional) - * new FSSize: in LV blocks (optional) + * new LVSize: in LV blocks (required) + * new LogSize: in LV blocks (optional) + * new FSSize: in LV blocks (optional) * * new configuration: * 1. set new LogSize as specified or default from new LVSize; @@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * reconfigure LV spaces - * --------------------- + * reconfigure LV spaces + * --------------------- * * validate new size, or, if not specified, determine new size */ @@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) log_formatted = 1; } /* - * quiesce file system + * quiesce file system * * (prepare to move the inline log and to prevent map update) * @@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * extend block allocation map - * --------------------------- + * extend block allocation map + * --------------------------- * * extendfs() for new extension, retry after crash recovery; * @@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * s_size: aggregate size in physical blocks; */ /* - * compute the new block allocation map configuration + * compute the new block allocation map configuration * * map dinode: * di_size: map file size in byte; @@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) newNpages = BLKTODMAPN(t64) + 1; /* - * extend map from current map (WITHOUT growing mapfile) + * extend map from current map (WITHOUT growing mapfile) * * map new extension with unmapped part of the last partial * dmap page, if applicable, and extra page(s) allocated @@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) XSize -= nblocks; /* - * grow map file to cover remaining extension - * and/or one extra dmap page for next extendfs(); + * grow map file to cover remaining extension + * and/or one extra dmap page for next extendfs(); * * allocate new map pages and its backing blocks, and * update map file xtree @@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) dbFinalizeBmap(ipbmap); /* - * update inode allocation map - * --------------------------- + * update inode allocation map + * --------------------------- * * move iag lists from old to new iag; * agstart field is not updated for logredo() to reconstruct @@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * finalize - * -------- + * finalize + * -------- * * extension is committed when on-disk super block is * updated with new descriptors: logredo will recover @@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) diFreeSpecial(ipbmap2); /* - * update superblock + * update superblock */ if ((rc = readSuper(sb, &bh))) goto error_out; @@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) resume: /* - * resume file system transactions + * resume file system transactions */ txResume(sb); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index b753ba216450..b2375f0774b7 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -63,9 +63,9 @@ * * On-disk: * - * FEALISTs are stored on disk using blocks allocated by dbAlloc() and - * written directly. An EA list may be in-lined in the inode if there is - * sufficient room available. + * FEALISTs are stored on disk using blocks allocated by dbAlloc() and + * written directly. An EA list may be in-lined in the inode if there is + * sufficient room available. */ struct ea_buffer { @@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { printk(KERN_ERR "ea_get: invalid extended attribute\n"); - dump_mem("xattr", ea_buf->xattr, ea_size); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, + ea_buf->xattr, ea_size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 96070bff93fc..572601e98dcd 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, */ static struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *sin, - int proto, int version, - const char *hostname, - int hostname_len) + int proto, int version, const char *hostname, + int hostname_len, const struct sockaddr_in *ssin) { struct hlist_head *chain; struct hlist_node *pos; @@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, struct nsm_handle *nsm = NULL; int hash; - dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", + dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT + ", p=%d, v=%d, my role=%s, name=%.*s)\n", + NIPQUAD(ssin->sin_addr.s_addr), NIPQUAD(sin->sin_addr.s_addr), proto, version, server? "server" : "client", hostname_len, @@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, continue; if (host->h_server != server) continue; + if (!nlm_cmp_addr(&host->h_saddr, ssin)) + continue; /* Move to head of hash chain. */ hlist_del(&host->h_hash); @@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, host->h_name = nsm->sm_name; host->h_addr = *sin; host->h_addr.sin_port = 0; /* ouch! */ + host->h_saddr = *ssin; host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; @@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host) */ nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } + clnt = host->h_rpcclnt; + if (clnt != NULL) + rpc_shutdown_client(clnt); kfree(host); } @@ -180,8 +178,10 @@ struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + return nlm_lookup_host(0, sin, proto, version, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -191,9 +191,12 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp, const char *hostname, int hostname_len) { + struct sockaddr_in ssin = {0}; + + ssin.sin_addr = rqstp->rq_daddr.addr; return nlm_lookup_host(1, svc_addr_in(rqstp), rqstp->rq_prot, rqstp->rq_vers, - hostname, hostname_len); + hostname, hostname_len, &ssin); } /* @@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host) { struct rpc_clnt *clnt; - dprintk("lockd: nlm_bind_host(%08x)\n", - (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); + dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", + NIPQUAD(host->h_saddr.sin_addr), + NIPQUAD(host->h_addr.sin_addr)); /* Lock host handle */ mutex_lock(&host->h_mutex); @@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host) .protocol = host->h_proto, .address = (struct sockaddr *)&host->h_addr, .addrsize = sizeof(host->h_addr), + .saddress = (struct sockaddr *)&host->h_saddr, .timeout = &timeparms, .servername = host->h_name, .program = &nlm_program, diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2102e2d0134d..3353ed8421a7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) status); else status = 0; + rpc_shutdown_client(clnt); out: return status; } @@ -138,7 +139,6 @@ nsm_create(void) .program = &nsm_program, .version = SM_VERSION, .authflavor = RPC_AUTH_NULL, - .flags = (RPC_CLNT_CREATE_ONESHOT), }; return rpc_create(&args); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 126b1bf02c0e..26809325469c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp) /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); - /* kick rpciod */ - rpciod_up(); - dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); if (!nlm_timeout) @@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp) /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* release rpciod */ - rpciod_down(); - /* Release module */ unlock_kernel(); module_put_and_exit(0); diff --git a/fs/minix/file.c b/fs/minix/file.c index f92baa1d7570..17765f697e50 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = minix_sync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations minix_file_inode_operations = { diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index f4580b44eef4..b55cb236cf74 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ pagelist.o proc.o read.o symlink.o unlink.o \ - write.o namespace.o -nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o + write.o namespace.o mount_clnt.o +nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 881fa4900923..ccb455053ee4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, int nfsversion) { struct nfs_client *clp; - int error; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto error_1; - } - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); - if (nfsversion == 4) { if (nfs_callback_up() < 0) goto error_2; @@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, #ifdef CONFIG_NFS_V4 init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); @@ -154,9 +143,6 @@ error_3: if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); error_2: - rpciod_down(); - __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); -error_1: kfree(clp); error_0: return NULL; @@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) #ifdef CONFIG_NFS_V4 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - while (!list_empty(&clp->cl_unused)) { - struct nfs4_state_owner *sp; - - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); + BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); #endif @@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp) if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); - if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); - kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f37d1bea83f..20ac403469a0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation) kfree(delegation); } +static void nfs_free_delegation_callback(struct rcu_head *head) +{ + struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); + + nfs_free_delegation(delegation); +} + static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; @@ -57,7 +64,7 @@ out_err: return status; } -static void nfs_delegation_claim_opens(struct inode *inode) +static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -72,9 +79,11 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx->dentry, state); + err = nfs4_open_delegation_recall(ctx, state, stateid); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); @@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; - /* Ensure we first revalidate the attributes and page cache! */ - if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); if (delegation == NULL) return -ENOMEM; @@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->inode = inode; spin_lock(&clp->cl_lock); - if (nfsi->delegation == NULL) { - list_add(&delegation->super_list, &clp->cl_delegations); - nfsi->delegation = delegation; + if (rcu_dereference(nfsi->delegation) == NULL) { + list_add_rcu(&delegation->super_list, &clp->cl_delegations); nfsi->delegation_state = delegation->type; + rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; } else { if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, @@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct status = -EIO; } } + + /* Ensure we revalidate the attributes and page cache! */ + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); + spin_unlock(&clp->cl_lock); kfree(delegation); return status; @@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * int res = 0; res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); - nfs_free_delegation(delegation); + call_rcu(&delegation->rcu, nfs_free_delegation_callback); return res; } @@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -int __nfs_inode_return_delegation(struct inode *inode) +static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; - int res = 0; nfs_msync_inode(inode); down_read(&clp->cl_sem); /* Guard against new delegated open calls */ down_write(&nfsi->rwsem); - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); - if (delegation != NULL) - res = nfs_do_return_delegation(inode, delegation); - return res; + return nfs_do_return_delegation(inode, delegation); +} + +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +{ + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + + if (delegation == NULL) + goto nomatch; + if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + goto nomatch; + list_del_rcu(&delegation->super_list); + nfsi->delegation_state = 0; + rcu_assign_pointer(nfsi->delegation, NULL); + return delegation; +nomatch: + return NULL; +} + +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int err = 0; + + if (rcu_dereference(nfsi->delegation) != NULL) { + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, NULL); + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + err = __nfs_inode_return_delegation(inode, delegation); + } + return err; } /* @@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (delegation->inode->i_sb != sb) continue; inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } static int nfs_do_expire_all_delegations(void *ptr) @@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr) allow_signal(SIGKILL); restart: - spin_lock(&clp->cl_lock); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } + rcu_read_unlock(); out: - spin_unlock(&clp->cl_lock); nfs_put_client(clp); module_put_and_exit(0); } @@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) if (clp == NULL) return; restart: - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { inode = igrab(delegation->inode); if (inode == NULL) continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); - nfs_inode_return_delegation(inode); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); iput(inode); goto restart; } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } struct recall_threadargs { @@ -316,21 +361,14 @@ static int recall_thread(void *data) down_read(&clp->cl_sem); down_write(&nfsi->rwsem); spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; - if (delegation != NULL && memcmp(delegation->stateid.data, - args->stateid->data, - sizeof(delegation->stateid.data)) == 0) { - list_del_init(&delegation->super_list); - nfsi->delegation = NULL; - nfsi->delegation_state = 0; + delegation = nfs_detach_delegation_locked(nfsi, args->stateid); + if (delegation != NULL) args->result = 0; - } else { - delegation = NULL; + else args->result = -ENOENT; - } spin_unlock(&clp->cl_lock); complete(&args->started); - nfs_delegation_claim_opens(inode); + nfs_delegation_claim_opens(inode, args->stateid); up_write(&nfsi->rwsem); up_read(&clp->cl_sem); nfs_msync_inode(inode); @@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs { struct nfs_delegation *delegation; struct inode *res = NULL; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { res = igrab(delegation->inode); break; } } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); return res; } @@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; - spin_lock(&clp->cl_lock); - list_for_each_entry(delegation, &clp->cl_delegations, super_list) + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); } /* @@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp) */ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { - struct nfs_delegation *delegation, *n; - LIST_HEAD(head); - spin_lock(&clp->cl_lock); - list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + struct nfs_delegation *delegation; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) continue; - list_move(&delegation->super_list, &head); - NFS_I(delegation->inode)->delegation = NULL; - NFS_I(delegation->inode)->delegation_state = 0; - } - spin_unlock(&clp->cl_lock); - while(!list_empty(&head)) { - delegation = list_entry(head.next, struct nfs_delegation, super_list); - list_del(&delegation->super_list); - nfs_free_delegation(delegation); + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) + call_rcu(&delegation->rcu, nfs_free_delegation_callback); + goto restart; } + rcu_read_unlock(); } int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int res = 0; + int ret = 0; - if (nfsi->delegation_state == 0) - return 0; - spin_lock(&clp->cl_lock); - delegation = nfsi->delegation; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); if (delegation != NULL) { memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); - res = 1; + ret = 1; } - spin_unlock(&clp->cl_lock); - return res; + rcu_read_unlock(); + return ret; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2cfd4b24c7fe..5874ce7fdbae 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -22,11 +22,12 @@ struct nfs_delegation { long flags; loff_t maxsize; __u64 change_attr; + struct rcu_head rcu; }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int __nfs_inode_return_delegation(struct inode *inode); +int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); @@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); static inline int nfs_have_delegation(struct inode *inode, int flags) { + struct nfs_delegation *delegation; + int ret = 0; + flags &= FMODE_READ|FMODE_WRITE; - smp_rmb(); - if ((NFS_I(inode)->delegation_state & flags) == flags) - return 1; - return 0; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && (delegation->type & flags) == flags) + ret = 1; + rcu_read_unlock(); + return ret; } -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - int err = 0; - - if (NFS_I(inode)->delegation != NULL) - err = __nfs_inode_return_delegation(inode); - return err; -} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c27258b5d3e1..322141f4ab48 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) - /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); + /* Revalidate fsid using the parent directory */ + return __nfs_revalidate_inode(server, dir); return 0; } @@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); + error = nfs_reval_fsid(dir, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && (nd->flags & LOOKUP_CREATE)) + if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; lock_kernel(); @@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym lock_kernel(); - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_HIGHUSER); if (!page) { unlock_kernel(); return -ENOMEM; @@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) struct nfs_inode *nfsi; struct nfs_access_entry *cache; - spin_lock(&nfs_access_lru_lock); restart: + spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -1770,6 +1769,7 @@ remove_lru_entry: clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); } spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); iput(inode); goto restart; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 00eee87510fe..a5c82b6f3b45 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int pgbase; int result; @@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); @@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = { static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; @@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l break; } if ((unsigned)result < data->npages) { - nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); - break; + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + bytes -= pgbase; + data->npages = result; } get_dreq(dreq); @@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; - - if (count < 0) goto out; + retval = -EFAULT; if (!access_ok(VERIFY_WRITE, buf, count)) goto out; @@ -814,7 +822,7 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - ssize_t retval; + ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; /* XXX: temporary */ @@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; + goto out; retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9eb8eb4e4a08..8689b736fdd9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *); static int nfs_file_release(struct inode *, struct file *); static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); -static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t count, unsigned int flags); static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, @@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = { .fsync = nfs_fsync, .lock = nfs_lock, .flock = nfs_flock, - .sendfile = nfs_file_sendfile, + .splice_read = nfs_file_splice_read, .check_flags = nfs_check_flags, }; @@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, } static ssize_t -nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, - read_actor_t actor, void *target) +nfs_file_splice_read(struct file *filp, loff_t *ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; ssize_t res; - dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", + dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) - res = generic_file_sendfile(filp, ppos, count, actor, target); + res = generic_file_splice_read(filp, ppos, pipe, count, flags); return res; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd9f5a836592..3d9fccf4ef93 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - atomic_set(&ctx->count, 1); - ctx->dentry = dget(dentry); - ctx->vfsmnt = mntget(mnt); + ctx->path.dentry = dget(dentry); + ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; + kref_init(&ctx->kref); } return ctx; } @@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + kref_get(&ctx->kref); return ctx; } -void put_nfs_open_context(struct nfs_open_context *ctx) +static void nfs_free_open_context(struct kref *kref) { - if (atomic_dec_and_test(&ctx->count)) { - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } - if (ctx->state != NULL) - nfs4_close_state(ctx->state, ctx->mode); - if (ctx->cred != NULL) - put_rpccred(ctx->cred); - dput(ctx->dentry); - mntput(ctx->vfsmnt); - kfree(ctx); + struct nfs_open_context *ctx = container_of(kref, + struct nfs_open_context, kref); + + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->path.dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); } + if (ctx->state != NULL) + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->path.dentry); + mntput(ctx->path.mnt); + kfree(ctx); +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + kref_put(&ctx->kref, nfs_free_open_context); } /* @@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_changed; server = NFS_SERVER(inode); - /* Update the fsid if and only if this is the root directory */ - if (inode == inode->i_sb->s_root->d_inode + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) server->fsid = fattr->fsid; @@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid &= ~NFS_INO_INVALID_DATA; if (data_stable) invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); - if (!nfs_have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegation(inode, FMODE_READ) || + (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: @@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); - /* Now clear out any remaining state */ - while (!list_empty(&nfsi->open_states)) { - struct nfs4_state *state; - - state = list_entry(nfsi->open_states.next, - struct nfs4_state, - inode_states); - dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", - __FUNCTION__, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - state); - BUG_ON(atomic_read(&state->count) != 1); - nfs4_close_state(state, state->state); - } } #endif @@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct nfs_inode *nfsi = (struct nfs_inode *) foo; inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ad2b40db1e65..76cf55d57101 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) /* * Calculate the number of 512byte blocks used. */ -static inline unsigned long nfs_calc_block_size(u64 tsize) +static inline blkcnt_t nfs_calc_block_size(u64 tsize) { - loff_t used = (tsize + 511) >> 9; + blkcnt_t used = (tsize + 511) >> 9; return (used > ULONG_MAX) ? ULONG_MAX : used; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca5a266a3140..8afd9f7e7a97 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -1,7 +1,5 @@ /* - * linux/fs/nfs/mount_clnt.c - * - * MOUNT client to support NFSroot. + * In-kernel MOUNT protocol client * * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de> */ @@ -18,33 +16,31 @@ #include <linux/nfs_fs.h> #ifdef RPC_DEBUG -# define NFSDBG_FACILITY NFSDBG_ROOT +# define NFSDBG_FACILITY NFSDBG_MOUNT #endif -/* -#define MOUNT_PROGRAM 100005 -#define MOUNT_VERSION 1 -#define MOUNT_MNT 1 -#define MOUNT_UMNT 3 - */ - -static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, - int, int); static struct rpc_program mnt_program; struct mnt_fhstatus { - unsigned int status; - struct nfs_fh * fh; + u32 status; + struct nfs_fh *fh; }; -/* - * Obtain an NFS file handle for the given host and path +/** + * nfs_mount - Obtain an NFS file handle for the given host and path + * @addr: pointer to server's address + * @len: size of server's address + * @hostname: name of server host, or NULL + * @path: pointer to string containing export path to mount + * @version: mount version to use for this request + * @protocol: transport protocol to use for thie request + * @fh: pointer to location to place returned file handle + * + * Uses default timeout parameters specified by underlying transport. */ -int -nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, - int version, int protocol) +int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, + int version, int protocol, struct nfs_fh *fh) { - struct rpc_clnt *mnt_clnt; struct mnt_fhstatus result = { .fh = fh }; @@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, .rpc_argp = path, .rpc_resp = &result, }; - char hostname[32]; + struct rpc_create_args args = { + .protocol = protocol, + .address = addr, + .addrsize = len, + .servername = hostname, + .program = &mnt_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_INTR, + }; + struct rpc_clnt *mnt_clnt; int status; - dprintk("NFS: nfs_mount(%08x:%s)\n", - (unsigned)ntohl(addr->sin_addr.s_addr), path); + dprintk("NFS: sending MNT request for %s:%s\n", + (hostname ? hostname : "server"), path); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); - mnt_clnt = mnt_create(hostname, addr, version, protocol); + mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) - return PTR_ERR(mnt_clnt); + goto out_clnt_err; if (version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; @@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); - return status < 0? status : (result.status? -EACCES : 0); -} + rpc_shutdown_client(mnt_clnt); -static struct rpc_clnt * -mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, - int protocol) -{ - struct rpc_create_args args = { - .protocol = protocol, - .address = (struct sockaddr *)srvaddr, - .addrsize = sizeof(*srvaddr), - .servername = hostname, - .program = &mnt_program, - .version = version, - .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_INTR), - }; + if (status < 0) + goto out_call_err; + if (result.status != 0) + goto out_mnt_err; + + dprintk("NFS: MNT request succeeded\n"); + status = 0; + +out: + return status; + +out_clnt_err: + status = PTR_ERR(mnt_clnt); + dprintk("NFS: failed to create RPC client, status=%d\n", status); + goto out; + +out_call_err: + dprintk("NFS: failed to start MNT request, status=%d\n", status); + goto out; - return rpc_create(&args); +out_mnt_err: + dprintk("NFS: MNT server returned result %d\n", result.status); + status = -EACCES; + goto out; } /* * XDR encode/decode functions for MOUNT */ -static int -xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) +static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, + const char *path) { p = xdr_encode_string(p, path); @@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path) return 0; } -static int -xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) return 0; } -static int -xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) +static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, + struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; @@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) #define MNT_fhstatus_sz (1 + 8) #define MNT_fhstatus3_sz (1 + 16) -static struct rpc_procinfo mnt_procedures[] = { -[MNTPROC_MNT] = { - .p_proc = MNTPROC_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus_sz, - .p_statidx = MNTPROC_MNT, - .p_name = "MOUNT", +static struct rpc_procinfo mnt_procedures[] = { + [MNTPROC_MNT] = { + .p_proc = MNTPROC_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus_sz, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; static struct rpc_procinfo mnt3_procedures[] = { -[MOUNTPROC3_MNT] = { - .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus3_sz, - .p_statidx = MOUNTPROC3_MNT, - .p_name = "MOUNT", + [MOUNTPROC3_MNT] = { + .p_proc = MOUNTPROC3_MNT, + .p_encode = (kxdrproc_t) xdr_encode_dirpath, + .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus3_sz, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; -static struct rpc_version mnt_version1 = { - .number = 1, - .nrprocs = 2, - .procs = mnt_procedures +static struct rpc_version mnt_version1 = { + .number = 1, + .nrprocs = 2, + .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { - .number = 3, - .nrprocs = 2, - .procs = mnt3_procedures +static struct rpc_version mnt_version3 = { + .number = 3, + .nrprocs = 2, + .procs = mnt3_procedures, }; -static struct rpc_version * mnt_version[] = { +static struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, &mnt_version3, }; -static struct rpc_stat mnt_stats; +static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cd3ca7b5d3db..7fcc78f2aa71 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) static int nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 offset = (u32)args->offset; u32 count = args->count; @@ -380,7 +380,7 @@ static int nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) { struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) static int nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 45268d6def2e..814d886b6aa4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -335,9 +335,7 @@ again: * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); - if (status == 0) - nfs_setattr_update_inode(dentry->d_inode, sattr); - nfs_refresh_inode(dentry->d_inode, &fattr); + nfs_post_op_update_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } if (status != 0) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b51df8eb9f01..b4647a22f349 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg static int nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) static int nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -643,7 +643,7 @@ static int nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, struct nfs3_getaclargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); @@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cf3a17eb5c09..6c028e734fe6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { - spinlock_t so_lock; - struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs_unique_id so_owner_id; struct nfs_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - atomic_t so_count; + struct nfs_server *so_server; + struct rb_node so_client_node; struct rpc_cred *so_cred; /* Associated cred */ + + spinlock_t so_lock; + atomic_t so_count; struct list_head so_states; struct list_head so_delegations; struct nfs_seqid_counter so_seqid; @@ -108,7 +115,7 @@ struct nfs4_lock_state { #define NFS_LOCK_INITIALIZED 1 int ls_flags; struct nfs_seqid_counter ls_seqid; - u32 ls_id; + struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; }; @@ -116,7 +123,10 @@ struct nfs4_lock_state { /* bits for nfs4_state->flags */ enum { LK_STATE_IN_USE, - NFS_DELEGATED_STATE, + NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ + NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ + NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ }; struct nfs4_state { @@ -130,11 +140,14 @@ struct nfs4_state { unsigned long flags; /* Do we hold any locks? */ spinlock_t state_lock; /* Protects the lock_states list */ - nfs4_stateid stateid; + seqlock_t seqlock; /* Protects the stateid/open_stateid */ + nfs4_stateid stateid; /* Current stateid: may be delegation */ + nfs4_stateid open_stateid; /* OPEN stateid */ - unsigned int n_rdonly; - unsigned int n_wronly; - unsigned int n_rdwr; + /* The following 3 fields are protected by owner->so_lock */ + unsigned int n_rdonly; /* Number of read-only references */ + unsigned int n_wronly; /* Number of write-only references */ + unsigned int n_rdwr; /* Number of read/write references */ int state; /* State on the server (R,W, or RW) */ atomic_t count; }; @@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1; #else -#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_close_state(a, b, c) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 648e0ac0f90e..fee2da856c95 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) } struct nfs4_opendata { - atomic_t count; + struct kref kref; struct nfs_openargs o_arg; struct nfs_openres o_res; struct nfs_open_confirmargs c_arg; struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; - struct dentry *dentry; + struct path path; struct dentry *dir; struct nfs4_state_owner *owner; + struct nfs4_state *state; struct iattr attrs; unsigned long timestamp; + unsigned int rpc_done : 1; int rpc_status; int cancelled; }; -static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, + +static void nfs4_init_opendata_res(struct nfs4_opendata *p) +{ + p->o_res.f_attr = &p->f_attr; + p->o_res.dir_attr = &p->dir_attr; + p->o_res.server = p->o_arg.server; + nfs_fattr_init(&p->f_attr); + nfs_fattr_init(&p->dir_attr); +} + +static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, int flags, const struct iattr *attrs) { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dget_parent(path->dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; @@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - atomic_set(&p->count, 1); - p->dentry = dget(dentry); + p->path.mnt = mntget(path->mnt); + p->path.dentry = dget(path->dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_id; - p->o_arg.name = &dentry->d_name; + p->o_arg.id = sp->so_owner_id.id; + p->o_arg.name = &p->path.dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - p->o_res.f_attr = &p->f_attr; - p->o_res.dir_attr = &p->dir_attr; - p->o_res.server = server; - nfs_fattr_init(&p->f_attr); - nfs_fattr_init(&p->dir_attr); if (flags & O_EXCL) { u32 *s = (u32 *) p->o_arg.u.verifier.data; s[0] = jiffies; @@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; + nfs4_init_opendata_res(p); + kref_init(&p->kref); return p; err_free: kfree(p); @@ -282,27 +292,25 @@ err: return NULL; } -static void nfs4_opendata_free(struct nfs4_opendata *p) +static void nfs4_opendata_free(struct kref *kref) { - if (p != NULL && atomic_dec_and_test(&p->count)) { - nfs_free_seqid(p->o_arg.seqid); - nfs4_put_state_owner(p->owner); - dput(p->dir); - dput(p->dentry); - kfree(p); - } + struct nfs4_opendata *p = container_of(kref, + struct nfs4_opendata, kref); + + nfs_free_seqid(p->o_arg.seqid); + if (p->state != NULL) + nfs4_put_open_state(p->state); + nfs4_put_state_owner(p->owner); + dput(p->dir); + dput(p->path.dentry); + mntput(p->path.mnt); + kfree(p); } -/* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, - const struct rpc_call_ops *tk_ops, void *calldata) +static void nfs4_opendata_put(struct nfs4_opendata *p) { - struct rpc_task *task; - - if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) - return -ENOMEM; - rpc_execute(task); - return 0; + if (p != NULL) + kref_put(&p->kref, nfs4_opendata_free); } static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) @@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } -static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) +static int can_open_cached(struct nfs4_state *state, int mode) +{ + int ret = 0; + switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { + case FMODE_READ: + ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_WRITE: + ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + break; + case FMODE_READ|FMODE_WRITE: + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + } + return ret; +} + +static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) +{ + if ((delegation->type & open_flags) != open_flags) + return 0; + if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) + return 0; + return 1; +} + +static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) { switch (open_flags) { case FMODE_WRITE: @@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_ case FMODE_READ|FMODE_WRITE: state->n_rdwr++; } + nfs4_state_set_mode_locked(state, state->state | open_flags); } -static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { - struct inode *inode = state->inode; + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); + memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + switch (open_flags) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); + } +} + +static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + write_seqlock(&state->seqlock); + nfs_set_open_stateid_locked(state, stateid, open_flags); + write_sequnlock(&state->seqlock); +} +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) +{ open_flags &= (FMODE_READ|FMODE_WRITE); - /* Protect against nfs4_find_state_byowner() */ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + if (deleg_stateid != NULL) { + memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + } + if (open_stateid != NULL) + nfs_set_open_stateid_locked(state, open_stateid, open_flags); + write_sequnlock(&state->seqlock); spin_lock(&state->owner->so_lock); - spin_lock(&inode->i_lock); - memcpy(&state->stateid, stateid, sizeof(state->stateid)); update_open_stateflags(state, open_flags); - nfs4_state_set_mode_locked(state, state->state | open_flags); - spin_unlock(&inode->i_lock); spin_unlock(&state->owner->so_lock); } +static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL || (delegation->type & open_flags) == open_flags) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + nfs_inode_return_delegation(inode); +} + +static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) +{ + struct nfs4_state *state = opendata->state; + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *delegation; + int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); + nfs4_stateid stateid; + int ret = -EAGAIN; + + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + for (;;) { + if (can_open_cached(state, open_mode)) { + spin_lock(&state->owner->so_lock); + if (can_open_cached(state, open_mode)) { + update_open_stateflags(state, open_mode); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + goto out_return_state; + } + spin_unlock(&state->owner->so_lock); + } + if (delegation == NULL) + break; + if (!can_open_delegated(delegation, open_mode)) + break; + /* Save the delegation */ + memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + rcu_read_unlock(); + lock_kernel(); + ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + unlock_kernel(); + if (ret != 0) + goto out; + ret = -EAGAIN; + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + /* If no delegation, try a cached open */ + if (delegation == NULL) + continue; + /* Is the delegation still valid? */ + if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0) + continue; + rcu_read_unlock(); + update_open_stateid(state, NULL, &stateid, open_mode); + goto out_return_state; + } + rcu_read_unlock(); +out: + return ERR_PTR(ret); +out_return_state: + atomic_inc(&state->count); + return state; +} + static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { struct inode *inode; struct nfs4_state *state = NULL; + struct nfs_delegation *delegation; + nfs4_stateid *deleg_stateid = NULL; + int ret; - if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + if (!data->rpc_done) { + state = nfs4_try_open_cached(data); goto out; + } + + ret = -EAGAIN; + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + goto err; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + ret = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + goto err; + ret = -ENOMEM; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) - goto put_inode; - update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags); -put_inode: + goto err_put_inode; + if (data->o_res.delegation_type != 0) { + int delegation_flags = 0; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); + if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + } + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) + deleg_stateid = &delegation->stateid; + update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); + rcu_read_unlock(); iput(inode); out: return state; +err_put_inode: + iput(inode); +err: + return ERR_PTR(ret); } static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) @@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) { + struct nfs4_state *newstate; int ret; opendata->o_arg.open_flags = openflags; + memset(&opendata->o_res, 0, sizeof(opendata->o_res)); + memset(&opendata->c_res, 0, sizeof(opendata->c_res)); + nfs4_init_opendata_res(opendata); ret = _nfs4_proc_open(opendata); if (ret != 0) return ret; - memcpy(stateid->data, opendata->o_res.stateid.data, - sizeof(stateid->data)); + newstate = nfs4_opendata_to_nfs4_state(opendata); + if (IS_ERR(newstate)) + return PTR_ERR(newstate); + nfs4_close_state(&opendata->path, newstate, openflags); + *res = newstate; return 0; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - nfs4_stateid stateid; struct nfs4_state *newstate; - int mode = 0; - int delegation = 0; int ret; /* memory barrier prior to reading state->n_* */ + clear_bit(NFS_DELEGATED_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ|FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_wronly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); if (ret != 0) return ret; - mode |= FMODE_WRITE; - if (opendata->o_res.delegation_type != 0) - delegation = opendata->o_res.delegation_type; - smp_rmb(); + if (newstate != state) + return -ESTALE; } if (state->n_rdonly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid); + ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); if (ret != 0) return ret; - mode |= FMODE_READ; + if (newstate != state) + return -ESTALE; } - clear_bit(NFS_DELEGATED_STATE, &state->flags); - if (mode == 0) - return 0; - if (opendata->o_res.delegation_type == 0) - opendata->o_res.delegation_type = delegation; - opendata->o_arg.open_flags |= mode; - newstate = nfs4_opendata_to_nfs4_state(opendata); - if (newstate != NULL) { - if (opendata->o_res.delegation_type != 0) { - struct nfs_inode *nfsi = NFS_I(newstate->inode); - int delegation_flags = 0; - if (nfsi->delegation) - delegation_flags = nfsi->delegation->flags; - if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) - nfs_inode_set_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - else - nfs_inode_reclaim_delegation(newstate->inode, - opendata->owner->so_cred, - &opendata->o_res); - } - nfs4_close_state(newstate, opendata->o_arg.open_flags); + /* + * We may have performed cached opens for all three recoveries. + * Check if we need to update the current stateid. + */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && + memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + write_seqlock(&state->seqlock); + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + write_sequnlock(&state->seqlock); } - if (newstate != state) - return -ESTALE; return 0; } @@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * OPEN_RECLAIM: * reclaim state on the server after a reboot. */ -static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; + struct nfs_delegation *delegation; struct nfs4_opendata *opendata; int delegation_type = 0; int status; - if (delegation != NULL) { - if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - memcpy(&state->stateid, &delegation->stateid, - sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - delegation_type = delegation->type; - } - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; opendata->o_arg.fh = NFS_FH(state->inode); nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) + delegation_type = delegation->flags; + rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return status; } -static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_open_reclaim(sp, state, dentry); + err = _nfs4_do_open_reclaim(ctx, state); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); + ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_state_owner *sp = state->owner; struct nfs4_opendata *opendata; int ret; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, + memcpy(opendata->o_arg.u.delegation.data, stateid->data, sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); int err; do { - err = _nfs4_open_delegation_recall(dentry, state); + err = _nfs4_open_delegation_recall(ctx, state, stateid); switch (err) { case 0: return err; @@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) memcpy(data->o_res.stateid.data, data->c_res.stateid.data, sizeof(data->o_res.stateid.data)); renew_lease(data->o_res.server, data->timestamp); + data->rpc_done = 1; } - nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); + nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); } static void nfs4_open_confirm_release(void *calldata) @@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (!data->rpc_done) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_confirm_ops = { @@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); if (IS_ERR(task)) return PTR_ERR(task); @@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) return; + /* + * Check if we still need to send an OPEN call, or if we can use + * a delegation instead. + */ + if (data->state != NULL) { + struct nfs_delegation *delegation; + + if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) + goto out_no_action; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + if (delegation != NULL && + (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { + rcu_read_unlock(); + goto out_no_action; + } + rcu_read_unlock(); + } /* Update sequence id. */ - data->o_arg.id = sp->so_id; + data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->timestamp = jiffies; rpc_call_setup(task, &msg, 0); + return; +out_no_action: + task->tk_action = NULL; + } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = -ENOTDIR; } renew_lease(data->o_res.server, data->timestamp); + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) + nfs_confirm_seqid(&data->owner->so_seqid, 0); } nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); + data->rpc_done = 1; } static void nfs4_open_release(void *calldata) @@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata) if (data->cancelled == 0) goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0) + if (data->rpc_status != 0 || !data->rpc_done) goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) goto out_free; nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); - if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + if (!IS_ERR(state)) + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_ops = { @@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct rpc_task *task; int status; - atomic_inc(&data->count); - /* - * If rpc_run_task() ends up calling ->rpc_release(), we - * want to ensure that it takes the 'error' code path. - */ - data->rpc_status = -ENOMEM; + kref_get(&data->kref); + data->rpc_done = 0; + data->rpc_status = 0; + data->cancelled = 0; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); if (IS_ERR(task)) return PTR_ERR(task); @@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } else status = data->rpc_status; rpc_put_task(task); - if (status != 0) + if (status != 0 || !data->rpc_done) return status; if (o_arg->open_flags & O_CREAT) { @@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; @@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf mask |= MAY_READ; if (openflags & FMODE_WRITE) mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out; @@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) * reclaim state on the server after a network partition. * Assumes caller holds the appropriate lock */ -static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct inode *inode = state->inode; - struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs4_opendata *opendata; - int openflags = state->state & (FMODE_READ|FMODE_WRITE); int ret; - if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - ret = _nfs4_do_access(inode, sp->so_cred, openflags); - if (ret < 0) - return ret; - memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); - set_bit(NFS_DELEGATED_STATE, &state->flags); - return 0; - } - opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) { /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(sp); - d_drop(dentry); + nfs4_drop_state_owner(state->owner); + d_drop(ctx->path.dentry); } - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_open_expired(sp, state, dentry); + err = _nfs4_open_expired(ctx, state); if (err == -NFS4ERR_DELAY) nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_expired(sp, state, ctx->dentry); + ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; } /* - * Returns a referenced nfs4_state if there is an open delegation on the file + * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* + * fields corresponding to attributes that were used to store the verifier. + * Make sure we clobber those fields in the later setattr call */ -static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) -{ - struct nfs_delegation *delegation; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs4_state_owner *sp = NULL; - struct nfs4_state *state = NULL; - int open_flags = flags & (FMODE_READ|FMODE_WRITE); - int err; - - err = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { - dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); - return err; - } - err = nfs4_recover_expired_lease(server); - if (err != 0) - goto out_put_state_owner; - /* Protect against reboot recovery - NOTE ORDER! */ - down_read(&clp->cl_sem); - /* Protect against delegation recall */ - down_read(&nfsi->rwsem); - delegation = NFS_I(inode)->delegation; - err = -ENOENT; - if (delegation == NULL || (delegation->type & open_flags) != open_flags) - goto out_err; - err = -ENOMEM; - state = nfs4_get_open_state(inode, sp); - if (state == NULL) - goto out_err; - - err = -ENOENT; - if ((state->state & open_flags) == open_flags) { - spin_lock(&inode->i_lock); - update_open_stateflags(state, open_flags); - spin_unlock(&inode->i_lock); - goto out_ok; - } else if (state->state != 0) - goto out_put_open_state; - - lock_kernel(); - err = _nfs4_do_access(inode, cred, open_flags); - unlock_kernel(); - if (err != 0) - goto out_put_open_state; - set_bit(NFS_DELEGATED_STATE, &state->flags); - update_open_stateid(state, &delegation->stateid, open_flags); -out_ok: - nfs4_put_state_owner(sp); - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - *res = state; - return 0; -out_put_open_state: - nfs4_put_open_state(state); -out_err: - up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); - if (err != -EACCES) - nfs_inode_return_delegation(inode); -out_put_state_owner: - nfs4_put_state_owner(sp); - return err; -} - -static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) { - struct nfs4_exception exception = { }; - struct nfs4_state *res = ERR_PTR(-EIO); - int err; + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && + !(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; - do { - err = _nfs4_open_delegated(inode, flags, cred, &res); - if (err == 0) - break; - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), - err, &exception)); - } while (exception.retry); - return res; + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && + !(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; } /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; - int status; + int status; /* Protect against reboot recovery conflicts */ status = -ENOMEM; @@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st status = nfs4_recover_expired_lease(server); if (status != 0) goto err_put_state_owner; + if (path->dentry->d_inode != NULL) + nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); down_read(&clp->cl_sem); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) goto err_release_rwsem; + if (path->dentry->d_inode != NULL) + opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); + status = _nfs4_proc_open(opendata); if (status != 0) - goto err_opendata_free; + goto err_opendata_put; + + if (opendata->o_arg.open_flags & O_EXCL) + nfs4_exclusive_attrset(opendata, sattr); - status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); - if (state == NULL) - goto err_opendata_free; - if (opendata->o_res.delegation_type != 0) - nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); - nfs4_opendata_free(opendata); + status = PTR_ERR(state); + if (IS_ERR(state)) + goto err_opendata_put; + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; return 0; -err_opendata_free: - nfs4_opendata_free(opendata); +err_opendata_put: + nfs4_opendata_put(opendata); err_release_rwsem: up_read(&clp->cl_sem); err_put_state_owner: @@ -1006,14 +1105,14 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + printk(KERN_WARNING "NFS: v4 server %s " + " returned a bad sequence-id error!\n", + NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; continue; } @@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, exception.retry = 1; continue; } + if (status == -EAGAIN) { + /* We must have found a delegation */ + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); @@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, } struct nfs4_closedata { + struct path path; struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; @@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); + dput(calldata->path.dentry); + mntput(calldata->path.mnt); kfree(calldata); } @@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: - memcpy(&state->stateid, &calldata->res.stateid, - sizeof(state->stateid)); + nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: @@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) .rpc_resp = &calldata->res, .rpc_cred = state->owner->so_cred, }; - int mode = 0, old_mode; + int clear_rd, clear_wr, clear_rdwr; + int mode; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - /* Recalculate the new open mode in case someone reopened the file - * while we were waiting in line to be scheduled. - */ + + mode = FMODE_READ|FMODE_WRITE; + clear_rd = clear_wr = clear_rdwr = 0; spin_lock(&state->owner->so_lock); - spin_lock(&calldata->inode->i_lock); - mode = old_mode = state->state; + /* Calculate the change in open mode */ if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { mode &= ~FMODE_READ; - if (state->n_wronly == 0) + clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { mode &= ~FMODE_WRITE; + clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + } } - nfs4_state_set_mode_locked(state, mode); - spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); - if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { + if (!clear_rd && !clear_wr && !clear_rdwr) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; return; @@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +int nfs4_do_close(struct path *path, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; - calldata->inode = inode; + calldata->inode = state->inode; calldata->state = state; - calldata->arg.fh = NFS_FH(inode); - calldata->arg.stateid = &state->stateid; + calldata->arg.fh = NFS_FH(state->inode); + calldata->arg.stateid = &state->open_stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) @@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.server = server; + calldata->path.mnt = mntget(path->mnt); + calldata->path.dentry = dget(path->dentry); - status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); - if (status == 0) - goto out; - - nfs_free_seqid(calldata->arg.seqid); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; out_free_calldata: kfree(calldata); out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) { struct file *filp; + int ret; - filp = lookup_instantiate_filp(nd, dentry, NULL); + /* If the open_intent is for execute, we have an extra check to make */ + if (nd->intent.open.flags & FMODE_EXEC) { + ret = _nfs4_do_access(state->inode, + state->owner->so_cred, + nd->intent.open.flags); + if (ret < 0) + goto out_close; + } + filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; return 0; } - nfs4_close_state(state, nd->intent.open.flags); - return PTR_ERR(filp); + ret = PTR_ERR(filp); +out_close: + nfs4_close_state(path, state, nd->intent.open.flags); + return ret; } struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; @@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; - state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) @@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) dentry = res; - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return res; } int nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct rpc_cred *cred; struct nfs4_state *state; cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - state = nfs4_open_delegated(dentry->d_inode, openflags, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { @@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return 1; } - nfs4_close_state(state, openflags); + nfs4_close_state(&path, state, openflags); out_drop: d_drop(dentry); return 0; @@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, dprintk("NFS call lookupfh %s\n", name->name); status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply lookupfh: %d\n", status); - if (status == -NFS4ERR_MOVED) - status = -EREMOTE; return status; } @@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_lookupfh(server, dirfh, name, - fhandle, fattr), - &exception); + err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); + /* FIXME: !!!! */ + if (err == -NFS4ERR_MOVED) { + err = -EREMOTE; + break; + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - int status; - struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_lookup_arg args = { - .bitmask = server->attr_bitmask, - .dir_fh = NFS_FH(dir), - .name = name, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; - - nfs_fattr_init(fattr); + int status; dprintk("NFS call lookup %s\n", name->name); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); if (status == -NFS4ERR_MOVED) status = nfs4_get_referral(dir, name, fattr, fhandle); dprintk("NFS reply lookup: %d\n", status); @@ -1752,6 +1870,10 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct nfs4_state *state; struct rpc_cred *cred; int status = 0; @@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = PTR_ERR(cred); goto out; } - state = nfs4_do_open(dir, dentry, flags, sattr, cred); + state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); + nfs_post_op_update_inode(state->inode, &fattr); } - if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) - status = nfs4_intent_set_file(nd, dentry, state); + if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) + status = nfs4_intent_set_file(nd, &path, state); else - nfs4_close_state(state, flags); + nfs4_close_state(&path, state, flags); out: return status; } @@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id; + arg.lock_owner.id = lsp->ls_id.id; status = rpc_call_sync(server->client, &msg, 0); switch (status) { case 0: @@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, { struct nfs4_unlockdata *data; + /* Ensure this is an unlock - when canceling a lock, the + * canceled lock is passed in, and it won't be an unlock. + */ + fl->fl_type = F_UNLCK; + data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { nfs_free_seqid(seqid); @@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id; + p->arg.lock_owner.id = lsp->ls_id.id; p->lsp = lsp; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); @@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8ed79d5c54f9..e9662ba81d86 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -38,12 +38,14 @@ * subsequent patch. */ +#include <linux/kernel.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/kthread.h> #include <linux/module.h> +#include <linux/random.h> #include <linux/workqueue.h> #include <linux/bitops.h> @@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -u32 -nfs4_alloc_lockowner_id(struct nfs_client *clp) -{ - return clp->cl_lockowner_id ++; -} - -static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) -{ - struct nfs4_state_owner *sp = NULL; - - if (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list); - atomic_inc(&sp->so_count); - sp->so_cred = cred; - list_move(&sp->so_list, &clp->cl_state_owners); - clp->cl_nunused--; - } - return sp; -} - struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct rpc_cred *cred = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); if (list_empty(&sp->so_states)) continue; cred = get_rpccred(sp->so_cred); @@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; - if (!list_empty(&clp->cl_state_owners)) { - sp = list_entry(clp->cl_state_owners.next, - struct nfs4_state_owner, so_list); + pos = rb_first(&clp->cl_state_owners); + if (pos != NULL) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); return get_rpccred(sp->so_cred); } return NULL; } +static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, + __u64 minval, int maxbits) +{ + struct rb_node **p, *parent; + struct nfs_unique_id *pos; + __u64 mask = ~0ULL; + + if (maxbits < 64) + mask = (1ULL << maxbits) - 1ULL; + + /* Ensure distribution is more or less flat */ + get_random_bytes(&new->id, sizeof(new->id)); + new->id &= mask; + if (new->id < minval) + new->id += minval; +retry: + p = &root->rb_node; + parent = NULL; + + while (*p != NULL) { + parent = *p; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + + if (new->id < pos->id) + p = &(*p)->rb_left; + else if (new->id > pos->id) + p = &(*p)->rb_right; + else + goto id_exists; + } + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + return; +id_exists: + for (;;) { + new->id++; + if (new->id < minval || (new->id & mask) != new->id) { + new->id = minval; + break; + } + parent = rb_next(parent); + if (parent == NULL) + break; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + if (new->id < pos->id) + break; + } + goto retry; +} + +static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) +{ + rb_erase(&id->rb_node, root); +} + static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) { + struct nfs_client *clp = server->nfs_client; + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; struct nfs4_state_owner *sp, *res = NULL; - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_cred != cred) + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (server < sp->so_server) { + p = &parent->rb_left; continue; - atomic_inc(&sp->so_count); - /* Move to the head of the list */ - list_move(&sp->so_list, &clp->cl_state_owners); - res = sp; - break; + } + if (server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (cred < sp->so_cred) + p = &parent->rb_left; + else if (cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + res = sp; + break; + } } return res; } +static struct nfs4_state_owner * +nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) +{ + struct rb_node **p = &clp->cl_state_owners.rb_node, + *parent = NULL; + struct nfs4_state_owner *sp; + + while (*p != NULL) { + parent = *p; + sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + + if (new->so_server < sp->so_server) { + p = &parent->rb_left; + continue; + } + if (new->so_server > sp->so_server) { + p = &parent->rb_right; + continue; + } + if (new->so_cred < sp->so_cred) + p = &parent->rb_left; + else if (new->so_cred > sp->so_cred) + p = &parent->rb_right; + else { + atomic_inc(&sp->so_count); + return sp; + } + } + nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); + rb_link_node(&new->so_client_node, parent, p); + rb_insert_color(&new->so_client_node, &clp->cl_state_owners); + return new; +} + +static void +nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) +{ + if (!RB_EMPTY_NODE(&sp->so_client_node)) + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs_client *clp = sp->so_client; - spin_lock(&clp->cl_lock); - list_del_init(&sp->so_list); - spin_unlock(&clp->cl_lock); + if (!RB_EMPTY_NODE(&sp->so_client_node)) { + struct nfs_client *clp = sp->so_client; + + spin_lock(&clp->cl_lock); + rb_erase(&sp->so_client_node, &clp->cl_state_owners); + RB_CLEAR_NODE(&sp->so_client_node); + spin_unlock(&clp->cl_lock); + } } /* @@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; - get_rpccred(cred); - new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_find_state_owner(clp, cred); - if (sp == NULL) - sp = nfs4_client_grab_unused(clp, cred); - if (sp == NULL && new != NULL) { - list_add(&new->so_list, &clp->cl_state_owners); - new->so_client = clp; - new->so_id = nfs4_alloc_lockowner_id(clp); - new->so_cred = cred; - sp = new; - new = NULL; - } + sp = nfs4_find_state_owner(server, cred); spin_unlock(&clp->cl_lock); - kfree(new); if (sp != NULL) return sp; - put_rpccred(cred); - return NULL; + new = nfs4_alloc_state_owner(); + if (new == NULL) + return NULL; + new->so_client = clp; + new->so_server = server; + new->so_cred = cred; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner(clp, new); + spin_unlock(&clp->cl_lock); + if (sp == new) + get_rpccred(cred); + else + kfree(new); + return sp; } /* @@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) - goto out_free; - if (list_empty(&sp->so_list)) - goto out_free; - list_move(&sp->so_list, &clp->cl_unused); - clp->cl_nunused++; - spin_unlock(&clp->cl_lock); - put_rpccred(cred); - cred = NULL; - return; -out_free: - list_del(&sp->so_list); + nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); put_rpccred(cred); kfree(sp); @@ -236,6 +325,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); + seqlock_init(&state->seqlock); return state; } @@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { - /* Is this in the process of being freed? */ - if (state->state == 0) + if (state->owner != owner) continue; - if (state->owner == owner) { - atomic_inc(&state->count); + if (atomic_inc_not_zero(&state->count)) return state; - } } return NULL; } @@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -void nfs4_close_state(struct nfs4_state *state, mode_t mode) +void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { - struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - int oldstate, newstate = 0; + int call_close = 0; + int newstate; atomic_inc(&owner->so_count); /* Protect against nfs4_find_state() */ spin_lock(&owner->so_lock); - spin_lock(&inode->i_lock); switch (mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_READ: state->n_rdonly--; @@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) case FMODE_READ|FMODE_WRITE: state->n_rdwr--; } - oldstate = newstate = state->state; + newstate = FMODE_READ|FMODE_WRITE; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) + if (state->n_rdonly == 0) { newstate &= ~FMODE_READ; - if (state->n_wronly == 0) + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (state->n_wronly == 0) { newstate &= ~FMODE_WRITE; + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + } + if (newstate == 0) + clear_bit(NFS_DELEGATED_STATE, &state->flags); } - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - nfs4_state_set_mode_locked(state, newstate); - oldstate = newstate; - } - spin_unlock(&inode->i_lock); + nfs4_state_set_mode_locked(state, newstate); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(inode, state) == 0) - return; - nfs4_put_open_state(state); - nfs4_put_state_owner(owner); + if (!call_close) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state); } /* @@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); - lsp->ls_id = nfs4_alloc_lockowner_id(clp); + nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); spin_unlock(&clp->cl_lock); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } +static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +{ + struct nfs_client *clp = lsp->ls_state->owner->so_client; + + spin_lock(&clp->cl_lock); + nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); + spin_unlock(&clp->cl_lock); + kfree(lsp); +} + /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ return NULL; } spin_unlock(&state->state_lock); - kfree(new); + if (new != NULL) + nfs4_free_lock_state(new); return lsp; } @@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - kfree(lsp); + nfs4_free_lock_state(lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; + int seq; - memcpy(dst, &state->stateid, sizeof(*dst)); + do { + seq = read_seqbegin(&state->seqlock); + memcpy(dst, &state->stateid, sizeof(*dst)); + } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) return; @@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid) * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() */ -static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { switch (status) { case 0: break; case -NFS4ERR_BAD_SEQID: + if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) + return; + printk(KERN_WARNING "NFS: v4 server returned a bad" + "sequence-id error on an" + "unconfirmed sequence %p!\n", + seqid->sequence); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); } - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } /* @@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) */ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) { - return nfs_increment_seqid(status, seqid); + nfs_increment_seqid(status, seqid); } int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) @@ -748,15 +860,21 @@ out_err: static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state *state; struct nfs4_lock_state *lock; /* Reset all sequence ids to zero */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); sp->so_seqid.counter = 0; sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { + clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); list_for_each_entry(lock, &state->lock_states, ls_locks) { lock->ls_seqid.counter = 0; lock->ls_seqid.flags = 0; @@ -771,6 +889,7 @@ static int reclaimer(void *ptr) { struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; + struct rb_node *pos; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; int status = 0; @@ -816,7 +935,8 @@ restart_loop: /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ - list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { if (status == -NFS4ERR_NO_GRACE) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8003c91ccb9a..c08738441f73 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int); #endif /* lock,open owner id: - * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define owner_id_maxsz (1 + 1) +#define open_owner_id_maxsz (1 + 4) +#define lock_owner_id_maxsz (1 + 4) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int); #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + 2 * nfs4_name_maxsz)) + 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int); 3 + (NFS4_VERIFIER_SIZE >> 2)) #define decode_setclientid_confirm_maxsz \ (op_decode_hdr_maxsz) -#define encode_lookup_maxsz (op_encode_hdr_maxsz + \ - 1 + ((3 + NFS4_FHSIZE) >> 2)) +#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) +#define decode_lookup_maxsz (op_decode_hdr_maxsz) +#define encode_share_access_maxsz \ + (2) +#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz) +#define encode_opentype_maxsz (1 + encode_createmode_maxsz) +#define encode_claim_null_maxsz (1 + nfs4_name_maxsz) +#define encode_open_maxsz (op_encode_hdr_maxsz + \ + 2 + encode_share_access_maxsz + 2 + \ + open_owner_id_maxsz + \ + encode_opentype_maxsz + \ + encode_claim_null_maxsz) +#define decode_ace_maxsz (3 + nfs4_owner_maxsz) +#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \ + decode_ace_maxsz) +#define decode_change_info_maxsz (5) +#define decode_open_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ + decode_change_info_maxsz + 1 + \ + nfs4_fattr_bitmap_maxsz + \ + decode_delegation_maxsz) #define encode_remove_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define encode_rename_maxsz (op_encode_hdr_maxsz + \ @@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int); #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ nfs4_fattr_maxsz) -#define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define decode_create_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + nfs4_fattr_bitmap_maxsz) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) +#define encode_fs_locations_maxsz \ + (encode_getattr_maxsz) +#define decode_fs_locations_maxsz \ + (0) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ @@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 2 + \ decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ - encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 13 + 3 + 2 + 64 + \ - encode_getattr_maxsz + \ - encode_getfh_maxsz) + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_open_maxsz + \ + encode_getfh_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ - decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz + \ - decode_getfh_maxsz) + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_open_maxsz + \ + decode_getfh_maxsz + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int); op_decode_hdr_maxsz + 4) #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + \ - 11) + encode_open_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - 4 + 5 + 2 + 3) + decode_open_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int); op_encode_hdr_maxsz + \ 1 + 1 + 2 + 2 + \ 1 + 4 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz + \ op_decode_hdr_maxsz + \ 2 + 2 + 1 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ 1 + 2 + 2 + 2 + \ - owner_id_maxsz) + lock_owner_id_maxsz) #define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int); encode_getfh_maxsz) #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ + decode_lookup_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ @@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_fs_locations_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz) + encode_lookup_maxsz + \ + encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz) + decode_lookup_maxsz + \ + decode_fs_locations_maxsz) static struct { unsigned int mode; @@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITE64(nfs4_lock_length(args->fl)); WRITE32(args->new_lock_owner); if (args->new_lock_owner){ - RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); WRITE32(args->open_seqid->sequence->counter); WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); } else { RESERVE_SPACE(NFS4_STATEID_SIZE+4); @@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg { __be32 *p; - RESERVE_SPACE(40); + RESERVE_SPACE(52); WRITE32(OP_LOCKT); WRITE32(nfs4_lock_type(args->fl, 0)); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); WRITE64(args->lock_owner.clientid); - WRITE32(4); - WRITE32(args->lock_owner.id); + WRITE32(16); + WRITEMEM("lock id:", 8); + WRITE64(args->lock_owner.id); return 0; } @@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena WRITE32(OP_OPEN); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); - RESERVE_SPACE(16); + RESERVE_SPACE(28); WRITE64(arg->clientid); - WRITE32(4); - WRITE32(arg->id); + WRITE32(16); + WRITEMEM("open id:", 8); + WRITE64(arg->id); } static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) @@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; __be32 *p; @@ -1735,7 +1772,7 @@ out: */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { .nops = 2, }; @@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs struct compound_hdr hdr = { .nops = 3, }; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; int status; @@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { __be32 *p; - uint32_t bmlen; + uint32_t savewords, bmlen, i; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) goto xdr_error; READ_BUF(bmlen << 2); - p += bmlen; + savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); + for (i = 0; i < savewords; ++i) + READ32(res->attrset[i]); + for (; i < NFS4_BITMAP_SIZE; i++) + res->attrset[i] = 0; + return decode_delegation(xdr, res); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 49d1008ce1d7..3490322d1145 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpcb_getport_external(&sin, program, version, proto); + return rpcb_getport_sync(&sin, program, version, proto); } @@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void) NFS_MNT3_VERSION : NFS_MNT_VERSION; set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol); + status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, + nfs_path, version, protocol, &fh); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " "while mounting %s\n", status, nfs_path); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c5bb51a29e80..f56dae5216f4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - atomic_set(&req->wb_count, 1); req->wb_context = get_nfs_open_context(ctx); - + kref_init(&req->wb_kref); return req; } @@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req) } /** - * nfs_set_page_writeback_locked - Lock a request for writeback + * nfs_set_page_tag_locked - Tag a request as locked * @req: */ -int nfs_set_page_writeback_locked(struct nfs_page *req) +static int nfs_set_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (!nfs_lock_request(req)) return 0; - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } /** - * nfs_clear_page_writeback - Unlock request and wake up sleepers + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ -void nfs_clear_page_writeback(struct nfs_page *req) +void nfs_clear_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); if (req->wb_page != NULL) { - spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); - spin_unlock(&nfsi->req_lock); + spin_lock(&inode->i_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); } nfs_unlock_request(req); } @@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req) * * Note: Should never be called with the spinlock held! */ -void -nfs_release_request(struct nfs_page *req) +static void nfs_free_request(struct kref *kref) { - if (!atomic_dec_and_test(&req->wb_count)) - return; + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); /* Release struct file or cached credential */ nfs_clear_request(req); @@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } +void nfs_release_request(struct nfs_page *req) +{ + kref_put(&req->wb_kref, nfs_free_request); +} + static int nfs_wait_bit_interruptible(void *word) { int ret = 0; @@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word) int nfs_wait_on_request(struct nfs_page *req) { - struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode); sigset_t oldmask; int ret = 0; @@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) /** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode - * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space * starting at index idx_start, is scanned. * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's req_lock when calling this function + * You must be holding the inode's i_lock when calling this function */ -int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, - unsigned int npages) + unsigned int npages, int tag) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; @@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, idx_end = idx_start + npages - 1; for (;;) { - found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES); + NFS_SCAN_MAXENTRIES, tag); if (found <= 0) break; for (i = 0; i < found; i++) { @@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; - if (req->wb_list_head != head) - continue; - if (nfs_set_page_writeback_locked(req)) { + if (nfs_set_page_tag_locked(req)) { nfs_list_remove_request(req); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); nfs_list_add_request(req, dst); res++; + if (res == INT_MAX) + goto out; } } - + /* for latency reduction */ + cond_resched_lock(&nfsi->vfs_inode.i_lock); } out: return res; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7bd7cb95c034..6ae2e58ed05a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_clear_request(req); @@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, int flags; data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page) */ error = nfs_wb_page(inode, page); if (error) - goto out_error; + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; error = -ESTALE; if (NFS_STALE(inode)) - goto out_error; + goto out_unlock; if (file == NULL) { error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - goto out_error; + goto out_unlock; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page) put_nfs_open_context(ctx); return error; - -out_error: +out_unlock: unlock_page(page); return error; } @@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page) struct inode *inode = page->mapping->host; struct nfs_page *new; unsigned int len; + int error; + + error = nfs_wb_page(inode, page); + if (error) + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; - nfs_wb_page(inode, page); len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + new = nfs_create_request(desc->ctx, inode, page, 0, len); - if (IS_ERR(new)) { - SetPageError(page); - unlock_page(page); - return PTR_ERR(new); - } + if (IS_ERR(new)) + goto out_error; + if (len < PAGE_CACHE_SIZE) zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; +out_error: + error = PTR_ERR(new); + SetPageError(page); +out_unlock: + unlock_page(page); + return error; } int nfs_readpages(struct file *filp, struct address_space *mapping, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ca20d3cc2609..a2b1af89ca1a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -45,6 +45,7 @@ #include <linux/inet.h> #include <linux/nfs_xdr.h> #include <linux/magic.h> +#include <linux/parser.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -57,6 +58,167 @@ #define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + + struct { + struct sockaddr_in address; + unsigned int program; + unsigned int version; + unsigned short port; + int protocol; + } mount_server; + + struct { + struct sockaddr_in address; + char *hostname; + char *export_path; + unsigned int program; + int protocol; + } nfs_server; +}; + +enum { + /* Mount options that take no arguments */ + Opt_soft, Opt_hard, + Opt_intr, Opt_nointr, + Opt_posix, Opt_noposix, + Opt_cto, Opt_nocto, + Opt_ac, Opt_noac, + Opt_lock, Opt_nolock, + Opt_v2, Opt_v3, + Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, + Opt_rdirplus, Opt_nordirplus, + Opt_sharecache, Opt_nosharecache, + + /* Mount options that take integer arguments */ + Opt_port, + Opt_rsize, Opt_wsize, Opt_bsize, + Opt_timeo, Opt_retrans, + Opt_acregmin, Opt_acregmax, + Opt_acdirmin, Opt_acdirmax, + Opt_actimeo, + Opt_namelen, + Opt_mountport, + Opt_mountprog, Opt_mountvers, + Opt_nfsprog, Opt_nfsvers, + + /* Mount options that take string arguments */ + Opt_sec, Opt_proto, Opt_mountproto, + Opt_addr, Opt_mounthost, Opt_clientaddr, + + /* Mount options that are ignored */ + Opt_userspace, Opt_deprecated, + + Opt_err +}; + +static match_table_t nfs_mount_option_tokens = { + { Opt_userspace, "bg" }, + { Opt_userspace, "fg" }, + { Opt_soft, "soft" }, + { Opt_hard, "hard" }, + { Opt_intr, "intr" }, + { Opt_nointr, "nointr" }, + { Opt_posix, "posix" }, + { Opt_noposix, "noposix" }, + { Opt_cto, "cto" }, + { Opt_nocto, "nocto" }, + { Opt_ac, "ac" }, + { Opt_noac, "noac" }, + { Opt_lock, "lock" }, + { Opt_nolock, "nolock" }, + { Opt_v2, "v2" }, + { Opt_v3, "v3" }, + { Opt_udp, "udp" }, + { Opt_tcp, "tcp" }, + { Opt_acl, "acl" }, + { Opt_noacl, "noacl" }, + { Opt_rdirplus, "rdirplus" }, + { Opt_nordirplus, "nordirplus" }, + { Opt_sharecache, "sharecache" }, + { Opt_nosharecache, "nosharecache" }, + + { Opt_port, "port=%u" }, + { Opt_rsize, "rsize=%u" }, + { Opt_wsize, "wsize=%u" }, + { Opt_bsize, "bsize=%u" }, + { Opt_timeo, "timeo=%u" }, + { Opt_retrans, "retrans=%u" }, + { Opt_acregmin, "acregmin=%u" }, + { Opt_acregmax, "acregmax=%u" }, + { Opt_acdirmin, "acdirmin=%u" }, + { Opt_acdirmax, "acdirmax=%u" }, + { Opt_actimeo, "actimeo=%u" }, + { Opt_userspace, "retry=%u" }, + { Opt_namelen, "namlen=%u" }, + { Opt_mountport, "mountport=%u" }, + { Opt_mountprog, "mountprog=%u" }, + { Opt_mountvers, "mountvers=%u" }, + { Opt_nfsprog, "nfsprog=%u" }, + { Opt_nfsvers, "nfsvers=%u" }, + { Opt_nfsvers, "vers=%u" }, + + { Opt_sec, "sec=%s" }, + { Opt_proto, "proto=%s" }, + { Opt_mountproto, "mountproto=%s" }, + { Opt_addr, "addr=%s" }, + { Opt_clientaddr, "clientaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, + + { Opt_err, NULL } +}; + +enum { + Opt_xprt_udp, Opt_xprt_tcp, + + Opt_xprt_err +}; + +static match_table_t nfs_xprt_protocol_tokens = { + { Opt_xprt_udp, "udp" }, + { Opt_xprt_tcp, "tcp" }, + + { Opt_xprt_err, NULL } +}; + +enum { + Opt_sec_none, Opt_sec_sys, + Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, + Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, + Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, + + Opt_sec_err +}; + +static match_table_t nfs_secflavor_tokens = { + { Opt_sec_none, "none" }, + { Opt_sec_none, "null" }, + { Opt_sec_sys, "sys" }, + + { Opt_sec_krb5, "krb5" }, + { Opt_sec_krb5i, "krb5i" }, + { Opt_sec_krb5p, "krb5p" }, + + { Opt_sec_lkey, "lkey" }, + { Opt_sec_lkeyi, "lkeyi" }, + { Opt_sec_lkeyp, "lkeyp" }, + + { Opt_sec_err, NULL } +}; + + static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); @@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) { RPC_AUTH_GSS_SPKM, "spkm" }, { RPC_AUTH_GSS_SPKMI, "spkmi" }, { RPC_AUTH_GSS_SPKMP, "spkmp" }, - { -1, "unknown" } + { UINT_MAX, "unknown" } }; int i; - for (i=0; sec_flavours[i].flavour != -1; i++) { + for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) { if (sec_flavours[i].flavour == flavour) break; } @@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; @@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { + struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); + struct rpc_clnt *rpc; + shrink_submounts(vfsmnt, &nfs_automount_list); + + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); } /* - * Validate the NFS2/NFS3 mount data - * - fills in the mount root filehandle + * Sanity-check a server address provided by the mount command */ -static int nfs_validate_mount_data(struct nfs_mount_data *data, - struct nfs_fh *mntfh) +static int nfs_verify_server_address(struct sockaddr *addr) { - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *) addr; + if (sa->sin_addr.s_addr != INADDR_ANY) + return 1; + break; + } } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; + return 0; +} + +/* + * Error-check and convert a string of mount options from user space into + * a data structure + */ +static int nfs_parse_mount_options(char *raw, + struct nfs_parsed_mount_data *mnt) +{ + char *p, *string; + + if (!raw) { + dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); + return 1; } + dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - return -EINVAL; + while ((p = strsep(&raw, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int option, token; + + if (!*p) + continue; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); + + token = match_token(p, nfs_mount_option_tokens, args); + switch (token) { + case Opt_soft: + mnt->flags |= NFS_MOUNT_SOFT; + break; + case Opt_hard: + mnt->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_intr: + mnt->flags |= NFS_MOUNT_INTR; + break; + case Opt_nointr: + mnt->flags &= ~NFS_MOUNT_INTR; + break; + case Opt_posix: + mnt->flags |= NFS_MOUNT_POSIX; + break; + case Opt_noposix: + mnt->flags &= ~NFS_MOUNT_POSIX; + break; + case Opt_cto: + mnt->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_nocto: + mnt->flags |= NFS_MOUNT_NOCTO; + break; + case Opt_ac: + mnt->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_noac: + mnt->flags |= NFS_MOUNT_NOAC; + break; + case Opt_lock: + mnt->flags &= ~NFS_MOUNT_NONLM; + break; + case Opt_nolock: + mnt->flags |= NFS_MOUNT_NONLM; + break; + case Opt_v2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case Opt_v3: + mnt->flags |= NFS_MOUNT_VER3; + break; + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + case Opt_acl: + mnt->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + mnt->flags |= NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_nordirplus: + mnt->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + mnt->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_nosharecache: + mnt->flags |= NFS_MOUNT_UNSHARED; + break; + + case Opt_port: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->nfs_server.address.sin_port = htonl(option); + break; + case Opt_rsize: + if (match_int(args, &mnt->rsize)) + return 0; + break; + case Opt_wsize: + if (match_int(args, &mnt->wsize)) + return 0; + break; + case Opt_bsize: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->bsize = option; + break; + case Opt_timeo: + if (match_int(args, &mnt->timeo)) + return 0; + break; + case Opt_retrans: + if (match_int(args, &mnt->retrans)) + return 0; + break; + case Opt_acregmin: + if (match_int(args, &mnt->acregmin)) + return 0; + break; + case Opt_acregmax: + if (match_int(args, &mnt->acregmax)) + return 0; + break; + case Opt_acdirmin: + if (match_int(args, &mnt->acdirmin)) + return 0; + break; + case Opt_acdirmax: + if (match_int(args, &mnt->acdirmax)) + return 0; + break; + case Opt_actimeo: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->acregmin = + mnt->acregmax = + mnt->acdirmin = + mnt->acdirmax = option; + break; + case Opt_namelen: + if (match_int(args, &mnt->namlen)) + return 0; + break; + case Opt_mountport: + if (match_int(args, &option)) + return 0; + if (option < 0 || option > 65535) + return 0; + mnt->mount_server.port = option; + break; + case Opt_mountprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.program = option; + break; + case Opt_mountvers: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->mount_server.version = option; + break; + case Opt_nfsprog: + if (match_int(args, &option)) + return 0; + if (option < 0) + return 0; + mnt->nfs_server.program = option; + break; + case Opt_nfsvers: + if (match_int(args, &option)) + return 0; + switch (option) { + case 2: + mnt->flags &= ~NFS_MOUNT_VER3; + break; + case 3: + mnt->flags |= NFS_MOUNT_VER3; + break; + default: + goto out_unrec_vers; } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - return -EINVAL; + break; + + case Opt_sec: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_secflavor_tokens, args); + kfree(string); + + /* + * The flags setting is for v2/v3. The flavor_len + * setting is for v4. v2/v3 also need to know the + * difference between NULL and UNIX. + */ + switch (token) { + case Opt_sec_none: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + goto out_unrec_sec; } - case 5: - memset(data->context, 0, sizeof(data->context)); - } + break; + case Opt_proto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->flags &= ~NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_UDP; + mnt->timeo = 7; + mnt->retrans = 5; + break; + case Opt_tcp: + mnt->flags |= NFS_MOUNT_TCP; + mnt->nfs_server.protocol = IPPROTO_TCP; + mnt->timeo = 600; + mnt->retrans = 2; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_mountproto: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_xprt_protocol_tokens, args); + kfree(string); + + switch (token) { + case Opt_udp: + mnt->mount_server.protocol = IPPROTO_UDP; + break; + case Opt_tcp: + mnt->mount_server.protocol = IPPROTO_TCP; + break; + default: + goto out_unrec_xprt; + } + break; + case Opt_addr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->nfs_server.address.sin_family = AF_INET; + mnt->nfs_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; + case Opt_clientaddr: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->client_address = string; + break; + case Opt_mounthost: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + mnt->mount_server.address.sin_family = AF_INET; + mnt->mount_server.address.sin_addr.s_addr = + in_aton(string); + kfree(string); + break; - /* Set the pseudoflavor */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; + case Opt_userspace: + case Opt_deprecated: + break; -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - return -EPROTONOSUPPORT; + default: + goto out_unknown; + } } -#endif /* CONFIG_NFS_V3 */ - /* We now require that the mount process passes the remote address */ - if (data->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - return -EINVAL; + return 1; + +out_nomem: + printk(KERN_INFO "NFS: not enough memory to parse option\n"); + return 0; + +out_unrec_vers: + printk(KERN_INFO "NFS: unrecognized NFS version number\n"); + return 0; + +out_unrec_xprt: + printk(KERN_INFO "NFS: unrecognized transport protocol\n"); + return 0; + +out_unrec_sec: + printk(KERN_INFO "NFS: unrecognized security flavor\n"); + return 0; + +out_unknown: + printk(KERN_INFO "NFS: unknown mount option: %s\n", p); + return 0; +} + +/* + * Use the remote server's MOUNT service to request the NFS file handle + * corresponding to the provided path. + */ +static int nfs_try_mount(struct nfs_parsed_mount_data *args, + struct nfs_fh *root_fh) +{ + struct sockaddr_in sin; + int status; + + if (args->mount_server.version == 0) { + if (args->flags & NFS_MOUNT_VER3) + args->mount_server.version = NFS_MNT3_VERSION; + else + args->mount_server.version = NFS_MNT_VERSION; } - /* Prepare the root filehandle */ - if (data->flags & NFS_MOUNT_VER3) - mntfh->size = data->root.size; + /* + * Construct the mount server's address. + */ + if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY) + sin = args->mount_server.address; else - mntfh->size = NFS2_FHSIZE; + sin = args->nfs_server.address; + if (args->mount_server.port == 0) { + status = rpcb_getport_sync(&sin, + args->mount_server.program, + args->mount_server.version, + args->mount_server.protocol); + if (status < 0) + goto out_err; + sin.sin_port = htons(status); + } else + sin.sin_port = htons(args->mount_server.port); + + /* + * Now ask the mount server to map our export path + * to a file handle. + */ + status = nfs_mount((struct sockaddr *) &sin, + sizeof(sin), + args->nfs_server.hostname, + args->nfs_server.export_path, + args->mount_server.version, + args->mount_server.protocol, + root_fh); + if (status < 0) + goto out_err; + + return status; - if (mntfh->size > sizeof(mntfh->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - return -EINVAL; +out_err: + dfprintk(MOUNT, "NFS: unable to contact server on host " + NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); + return status; +} + +/* + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + * + * XXX: as far as I can tell, changing the NFS program number is not + * supported in the NFS client. + */ +static int nfs_validate_mount_data(struct nfs_mount_data **options, + struct nfs_fh *mntfh, + const char *dev_name) +{ + struct nfs_mount_data *data = *options; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + case 5: + memset(data->context, 0, sizeof(data->context)); + case 6: + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; + + if (mntfh->size > sizeof(mntfh->data)) + goto out_invalid_fh; + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + break; + default: { + unsigned int len; + char *c; + int status; + struct nfs_parsed_mount_data args = { + .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .mount_server.protocol = IPPROTO_UDP, + .mount_server.program = NFS_MNT_PROGRAM, + .nfs_server.protocol = IPPROTO_TCP, + .nfs_server.program = NFS_PROGRAM, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* + * NB: after this point, caller will free "data" + * if we return an error + */ + *options = data; + + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + len = c - dev_name - 1; + if (len > sizeof(data->hostname)) + return -EINVAL; + strncpy(data->hostname, dev_name, len); + args.nfs_server.hostname = data->hostname; + + c++; + if (strlen(c) > NFS_MAXPATHLEN) + return -EINVAL; + args.nfs_server.export_path = c; + + status = nfs_try_mount(&args, mntfh); + if (status) + return -EINVAL; + + /* + * Translate to nfs_mount_data, which nfs_fill_super + * can deal with. + */ + data->version = 6; + data->flags = args.flags; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->addr = args.nfs_server.address; + data->namlen = args.namlen; + data->bsize = args.bsize; + data->pseudoflavor = args.auth_flavors[0]; + + break; + } } - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + +#ifndef CONFIG_NFS_V3 + if (data->flags & NFS_MOUNT_VER3) + goto out_v3_not_compiled; +#endif /* !CONFIG_NFS_V3 */ + + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) + goto out_no_address; return 0; + +out_no_data: + dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_no_v3: + dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", + data->version); + return -EINVAL; + +out_no_sec: + dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); + return -EINVAL; + +#ifndef CONFIG_NFS_V3 +out_v3_not_compiled: + dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); + return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V3 */ + +out_no_address: + dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); + return -EINVAL; + +out_invalid_fh: + dfprintk(MOUNT, "NFS: invalid root filehandle\n"); + return -EINVAL; } /* @@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data) { struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->nfs_client != server->nfs_client) + if (memcmp(&old->nfs_client->cl_addr, + &server->nfs_client->cl_addr, + sizeof(old->nfs_client->cl_addr)) != 0) + return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) return 0; if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; return 1; } +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +{ + const struct nfs_server *a = s->s_fs_info; + const struct rpc_clnt *clnt_a = a->client; + const struct rpc_clnt *clnt_b = b->client; + + if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + goto Ebusy; + if (a->nfs_client != b->nfs_client) + goto Ebusy; + if (a->flags != b->flags) + goto Ebusy; + if (a->wsize != b->wsize) + goto Ebusy; + if (a->rsize != b->rsize) + goto Ebusy; + if (a->acregmin != b->acregmin) + goto Ebusy; + if (a->acregmax != b->acregmax) + goto Ebusy; + if (a->acdirmin != b->acdirmin) + goto Ebusy; + if (a->acdirmax != b->acdirmax) + goto Ebusy; + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + goto Ebusy; + return 0; +Ebusy: + return -EBUSY; +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; /* Validate the mount data */ - error = nfs_validate_mount_data(data, &mntfh); + error = nfs_validate_mount_data(&data, &mntfh, dev_name); if (error < 0) - return error; + goto out; /* Get a volume representation */ server = nfs_create_server(data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; - return 0; + error = 0; + +out: + if (data != raw_data) + kfree(data); + return error; out_err_nosb: nfs_free_server(server); -out_err_noserver: - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - return error; + goto out; } /* @@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs_xdev_get_sb()\n"); @@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } -static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +/* + * Validate NFSv4 mount options + */ +static int nfs4_validate_mount_data(struct nfs4_mount_data **options, + const char *dev_name, + struct sockaddr_in *addr, + rpc_authflavor_t *authflavour, + char **hostname, + char **mntpath, + char **ip_addr) { - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - kfree(p); - return ERR_PTR(-EFAULT); + struct nfs4_mount_data *data = *options; + char *c; + + if (data == NULL) + goto out_no_data; + + switch (data->version) { + case 1: + if (data->host_addrlen != sizeof(*addr)) + goto out_no_address; + if (copy_from_user(addr, data->host_addr, sizeof(*addr))) + return -EFAULT; + if (addr->sin_port == 0) + addr->sin_port = htons(NFS_PORT); + if (!nfs_verify_server_address((struct sockaddr *) addr)) + goto out_no_address; + + switch (data->auth_flavourlen) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + if (copy_from_user(authflavour, data->auth_flavours, + sizeof(*authflavour))) + return -EFAULT; + break; + default: + goto out_inval_auth; + } + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + *mntpath = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + *ip_addr = c; + + break; + default: { + unsigned int len; + struct nfs_parsed_mount_data args = { + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .nfs_server.protocol = IPPROTO_TCP, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) + return -EINVAL; + + if (!nfs_verify_server_address((struct sockaddr *) + &args.nfs_server.address)) + return -EINVAL; + *addr = args.nfs_server.address; + + switch (args.auth_flavor_len) { + case 0: + *authflavour = RPC_AUTH_UNIX; + break; + case 1: + *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; + break; + default: + goto out_inval_auth; + } + + /* + * Translate to nfs4_mount_data, which nfs4_fill_super + * can deal with. + */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + *options = data; + + data->version = 1; + data->flags = args.flags & NFS4_MOUNT_FLAGMASK; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->proto = args.nfs_server.protocol; + + /* + * Split "dev_name" into "hostname:mntpath". + */ + c = strchr(dev_name, ':'); + if (c == NULL) + return -EINVAL; + /* while calculating len, pretend ':' is '\0' */ + len = c - dev_name; + if (len > NFS4_MAXNAMLEN) + return -EINVAL; + *hostname = kzalloc(len, GFP_KERNEL); + if (*hostname == NULL) + return -ENOMEM; + strncpy(*hostname, dev_name, len - 1); + + c++; /* step over the ':' */ + len = strlen(c); + if (len > NFS4_MAXPATHLEN) + return -EINVAL; + *mntpath = kzalloc(len + 1, GFP_KERNEL); + if (*mntpath == NULL) + return -ENOMEM; + strncpy(*mntpath, c, len); + + dprintk("MNTPATH: %s\n", *mntpath); + + *ip_addr = args.client_address; + + break; + } } - dst[maxlen] = '\0'; - return dst; + + return 0; + +out_no_data: + dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); + return -EINVAL; + +out_inval_auth: + dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", + data->auth_flavourlen); + return -EINVAL; + +out_no_address: + dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); + return -EINVAL; } /* @@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type, rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; - char *mntpath = NULL, *hostname = NULL, ip_addr[16]; - void *p; + char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return -EINVAL; - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return -EINVAL; - } - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(addr)) - return -EINVAL; - - if (copy_from_user(&addr, data->host_addr, sizeof(addr))) - return -EFAULT; - - if (addr.sin_family != AF_INET || - addr.sin_addr.s_addr == INADDR_ANY - ) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - return -EINVAL; - } - /* RFC3530: The default port for NFS is 2049 */ - if (addr.sin_port == 0) - addr.sin_port = htons(NFS_PORT); - - /* Grab the authentication type */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - error = -EINVAL; - goto out_err_noserver; - } - - if (copy_from_user(&authflavour, data->auth_flavours, - sizeof(authflavour))) { - error = -EFAULT; - goto out_err_noserver; - } - } - - p = nfs_copy_user_string(NULL, &data->hostname, 256); - if (IS_ERR(p)) - goto out_err; - hostname = p; - - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); - if (IS_ERR(p)) - goto out_err; - mntpath = p; - - dprintk("MNTPATH: %s\n", mntpath); - - p = nfs_copy_user_string(ip_addr, &data->client_addr, - sizeof(ip_addr) - 1); - if (IS_ERR(p)) - goto out_err; + /* Validate the mount data */ + error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, + &hostname, &mntpath, &ip_addr); + if (error < 0) + goto out; /* Get a volume representation */ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, authflavour, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); - goto out_err_noserver; + goto out; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; + error = 0; + +out: + kfree(ip_addr); kfree(mntpath); kfree(hostname); - return 0; - -out_err: - error = PTR_ERR(p); - goto out_err_noserver; + return error; out_free: nfs_free_server(server); -out_err_noserver: - kfree(mntpath); - kfree(hostname); - return error; + goto out; error_splat_super: up_write(&s->s_umount); deactivate_super(s); - goto out_err_noserver; + goto out; } static void nfs4_kill_super(struct super_block *sb) @@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_xdev_get_sb()\n"); @@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot; struct nfs_fh mntfh; + int (*compare_super)(struct super_block *, void *) = nfs_compare_super; int error; dprintk("--> nfs4_referral_get_sb()\n"); @@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af344a158e01..73ac992ece85 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) if (PagePrivate(page)) { req = (struct nfs_page *)page_private(page); if (req != NULL) - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); } return req; } static struct nfs_page *nfs_page_find_request(struct page *page) { + struct inode *inode = page->mapping->host; struct nfs_page *req = NULL; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return req; } @@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, } /* Update file length */ nfs_grow_file(page, offset, count); - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); return 0; } @@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page) static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { + struct inode *inode = page->mapping->host; + struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - spinlock_t *req_lock = &nfsi->req_lock; int ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); for(;;) { req = nfs_page_find_request_locked(page); if (req == NULL) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return 1; } if (nfs_lock_request_dontget(req)) @@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) return ret; - spin_lock(req_lock); + spin_lock(&inode->i_lock); } if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { /* This request is marked for commit */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); return 1; } if (nfs_set_page_writeback(page) != 0) { - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); BUG(); } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_WRITEBACK); + NFS_PAGE_TAG_LOCKED); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); return ret; } @@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (PageDirty(req->wb_page)) set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 0; } @@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); @@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); iput(inode); } else - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_clear_request(req); nfs_release_request(req); } @@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfsi->req_lock); - nfs_list_add_request(req, &nfsi->commit); + spin_lock(&inode->i_lock); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); - spin_unlock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); + spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } @@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u idx_end = idx_start + npages - 1; next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; BUG_ON(!NFS_WBACK_BUSY(req)); - atomic_inc(&req->wb_count); - spin_unlock(&nfsi->req_lock); + kref_get(&req->wb_kref); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); if (error < 0) return error; res++; @@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, dst, idx_start, npages, + NFS_PAGE_TAG_COMMIT); nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); } return res; } @@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; pgoff_t rqend, end; @@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { if (!nfs_lock_request_dontget(req)) { int error; - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { @@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, } continue; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (new) nfs_release_request(new); break; @@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, nfs_lock_request_dontget(new); error = nfs_inode_add_request(inode, new); if (error) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_unlock_request(new); return ERR_PTR(error); } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return new; } - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) @@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + if (PageError(req->wb_page)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else if (!nfs_reschedule_unstable_write(req)) { + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } static inline int flush_task_priority(int how) @@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -885,7 +888,7 @@ out_bad: } nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) } if (nfs_write_need_commit(data)) { - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct inode *inode = page->mapping->host; - spin_lock(req_lock); + spin_lock(&inode->i_lock); if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { /* Do nothing we need to resend the writes */ } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { @@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) clear_bit(PG_NEED_COMMIT, &req->wb_flags); dprintk(" server reboot detected\n"); } - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); } else dprintk(" OK\n"); @@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) dprintk(" marked for commit\n"); goto next; } + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - inode = first->wb_context->dentry->d_inode; + inode = first->wb_context->path.dentry->d_inode; data->inode = inode; data->cred = first->wb_context->cred; @@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_mark_request_commit(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { @@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, + req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; @@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk(" mismatch\n"); nfs_redirty_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = { int nfs_commit_inode(struct inode *inode, int how) { - struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); int res; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (res) { int error = nfs_commit_list(inode, &head, how); if (error < 0) @@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); @@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr } } how &= ~FLUSH_NOCOMMIT; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) @@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr if (pages == 0) break; if (how & FLUSH_INVALIDATE) { - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_cancel_commit_list(&head); ret = pages; - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); continue; } pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); + } while (ret >= 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); return ret; } @@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; - spinlock_t *req_lock; struct nfs_page *req; int ret; @@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page) inode = mapping->host; if (!inode) goto out_raced; - req_lock = &NFS_I(inode)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); nfs_release_request(req); return ret; } ret = __set_page_dirty_nobuffers(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return ret; out_raced: return !TestSetPageDirty(page); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 864090edc28b..5443c52b57aa 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - char clientname[16]; int status; if (atomic_read(&cb->cb_set)) @@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) memset(program->stats, 0, sizeof(cb->cb_stat)); program->stats->program = program; - /* Just here to make some printk's more useful: */ - snprintf(clientname, sizeof(clientname), - "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); - args.servername = clientname; - /* Create RPC client */ cb->cb_client = rpc_create(&args); if (IS_ERR(cb->cb_client)) { @@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp) goto out_err; } - /* Kick rpciod, put the call on the wire. */ - if (rpciod_up() != 0) - goto out_clnt; - /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) - goto out_rpciod; + goto out_release_clp; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); - goto out_rpciod; + goto out_release_clp; } return; -out_rpciod: +out_release_clp: atomic_dec(&clp->cl_count); - rpciod_down(); -out_clnt: rpc_shutdown_client(cb->cb_client); out_err: cb->cb_client = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cc8ce422ab1..8c52913d7cb6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp) if (clnt) { clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); - rpciod_down(); } } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7e6aa245b5d5..945b1cedde2b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -23,7 +23,7 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/major.h> -#include <linux/ext2_fs.h> +#include <linux/splice.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/fcntl.h> @@ -801,26 +801,32 @@ found: } /* - * Grab and keep cached pages assosiated with a file in the svc_rqst - * so that they can be passed to the netowork sendmsg/sendpage routines - * directrly. They will be released after the sending has completed. + * Grab and keep cached pages associated with a file in the svc_rqst + * so that they can be passed to the network sendmsg/sendpage routines + * directly. They will be released after the sending has completed. */ static int -nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) +nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { - unsigned long count = desc->count; - struct svc_rqst *rqstp = desc->arg.data; + struct svc_rqst *rqstp = sd->u.data; struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page *page = buf->page; + size_t size; + int ret; + + ret = buf->ops->confirm(pipe, buf); + if (unlikely(ret)) + return ret; - if (size > count) - size = count; + size = sd->len; if (rqstp->rq_res.page_len == 0) { get_page(page); put_page(*pp); *pp = page; rqstp->rq_resused++; - rqstp->rq_res.page_base = offset; + rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); @@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset } else rqstp->rq_res.page_len += size; - desc->count = count - size; - desc->written += size; return size; } +static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, + struct splice_desc *sd) +{ + return __splice_from_pipe(pipe, sd, nfsd_splice_actor); +} + static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) @@ -861,10 +871,16 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (ra && ra->p_set) file->f_ra = ra->p_ra; - if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { + if (file->f_op->splice_read && rqstp->rq_splice_ok) { + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + rqstp->rq_resused = 1; - host_err = file->f_op->sendfile(file, &offset, *count, - nfsd_read_actor, rqstp); + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); } else { oldfs = get_fs(); set_fs(KERNEL_DS); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 7ed56390b582..ffcc504a1667 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = { mounted filesystem. */ .mmap = generic_file_mmap, /* Mmap file. */ .open = ntfs_file_open, /* Open file. */ - .sendfile = generic_file_sendfile, /* Zero-copy data send with + .splice_read = generic_file_splice_read /* Zero-copy data send with the data source being on the ntfs partition. We do not need to care about the diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 074791ce4ab2..b532a730cec2 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -140,7 +140,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) if (!ni->name) return -ENOMEM; memcpy(ni->name, na->name, i); - ni->name[i] = 0; + ni->name[na->name_len] = 0; } return 0; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0023b31e48a8..a480b09c79b9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -798,6 +798,11 @@ int ocfs2_map_and_write_splice_data(struct inode *inode, } to = from + bytes; + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from < cluster_start); + BUG_ON(to > cluster_end); + if (wc->w_this_page_new) ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, cluster_start, cluster_end, 1); @@ -809,11 +814,6 @@ int ocfs2_map_and_write_splice_data(struct inode *inode, goto out; } - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from > osb->s_clustersize); - BUG_ON(to > osb->s_clustersize); - src = buf->ops->map(sp->s_pipe, buf, 1); dst = kmap_atomic(wc->w_this_page, KM_USER1); memcpy(dst + from, src + src_from, bytes); @@ -890,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode, to = from + bytes; + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from < cluster_start); + BUG_ON(to > cluster_end); + if (wc->w_this_page_new) ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, cluster_start, cluster_end, 1); @@ -901,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode, goto out; } - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from > osb->s_clustersize); - BUG_ON(to > osb->s_clustersize); - dst = kmap(wc->w_this_page); memcpy(dst + from, bp->b_src_buf + src_from, bytes); kunmap(wc->w_this_page); diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index a93620ce4aca..e9e042b93dbf 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -74,7 +74,6 @@ struct mlog_attribute { #define define_mask(_name) { \ .attr = { \ .name = #_name, \ - .owner = THIS_MODULE, \ .mode = S_IRUGO | S_IWUSR, \ }, \ .mask = ML_##_name, \ @@ -144,8 +143,7 @@ static struct kobj_type mlog_ktype = { }; static struct kset mlog_kset = { - .kobj = {.name = "logmask"}, - .ktype = &mlog_ktype + .kobj = {.name = "logmask", .ktype = &mlog_ktype}, }; int mlog_sys_init(struct kset *o2cb_subsys) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ac6c96431bbc..4979b6675717 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -31,7 +31,7 @@ #include <linux/pagemap.h> #include <linux/uio.h> #include <linux/sched.h> -#include <linux/pipe_fs_i.h> +#include <linux/splice.h> #include <linux/mount.h> #include <linux/writeback.h> @@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, ssize_t copied = 0; struct ocfs2_splice_write_priv sp; - ret = buf->ops->pin(pipe, buf); + ret = buf->ops->confirm(pipe, buf); if (ret) goto out; @@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, * might enter ocfs2_buffered_write_cluster() more * than once, so keep track of our progress here. */ - copied = ocfs2_buffered_write_cluster(sd->file, + copied = ocfs2_buffered_write_cluster(sd->u.file, (loff_t)sd->pos + total, count, ocfs2_map_and_write_splice_data, @@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, int ret, err; struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; - - ret = __splice_from_pipe(pipe, out, ppos, len, flags, - ocfs2_splice_write_actor); + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; + + ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); if (ret > 0) { *ppos += ret; @@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = { const struct file_operations ocfs2_fops = { .read = do_sync_read, .write = do_sync_write, - .sendfile = generic_file_sendfile, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 9a3a058f3553..98e0b85a9bb2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, static struct attribute addpartattr = { .name = "whole_disk", .mode = S_IRUSR | S_IRGRP | S_IROTH, - .owner = THIS_MODULE, }; sysfs_create_file(&p->kobj, &addpartattr); diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 9f7ad4244f63..1e064c4a4f86 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) { int blocksize, offset, size,res; loff_t i_size; - dasd_information_t *info; + dasd_information2_t *info; struct hd_geometry *geo; char type[5] = {0,}; char name[7] = {0,}; @@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) if (i_size == 0) goto out_exit; - if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL) + info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); + if (info == NULL) goto out_exit; - if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) + geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL); + if (geo == NULL) goto out_nogeo; - if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) + label = kmalloc(sizeof(union label_t), GFP_KERNEL); + if (label == NULL) goto out_nolab; - if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || + if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 || ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) goto out_freeall; @@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) res = 1; /* - * Three different types: CMS1, VOL1 and LNX1/unlabeled + * Three different formats: LDL, CDL and unformated disk + * + * identified by info->format + * + * unformated disks we do not have to care about */ - if (strncmp(type, "CMS1", 4) == 0) { - /* - * VM style CMS1 labeled disk - */ - if (label->cms.disk_offset != 0) { - printk("CMS1/%8s(MDSK):", name); - /* disk is reserved minidisk */ - blocksize = label->cms.block_size; - offset = label->cms.disk_offset; - size = (label->cms.block_count - 1) * (blocksize >> 9); + if (info->format == DASD_FORMAT_LDL) { + if (strncmp(type, "CMS1", 4) == 0) { + /* + * VM style CMS1 labeled disk + */ + if (label->cms.disk_offset != 0) { + printk("CMS1/%8s(MDSK):", name); + /* disk is reserved minidisk */ + blocksize = label->cms.block_size; + offset = label->cms.disk_offset; + size = (label->cms.block_count - 1) + * (blocksize >> 9); + } else { + printk("CMS1/%8s:", name); + offset = (info->label_block + 1); + size = i_size >> 9; + } } else { - printk("CMS1/%8s:", name); + /* + * Old style LNX1 or unlabeled disk + */ + if (strncmp(type, "LNX1", 4) == 0) + printk ("LNX1/%8s:", name); + else + printk("(nonl)"); offset = (info->label_block + 1); size = i_size >> 9; } put_partition(state, 1, offset*(blocksize >> 9), - size-offset*(blocksize >> 9)); - } else if ((strncmp(type, "VOL1", 4) == 0) && - (!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) { + size-offset*(blocksize >> 9)); + } else if (info->format == DASD_FORMAT_CDL) { /* - * New style VOL1 labeled disk + * New style CDL formatted disk */ unsigned int blk; int counter; - printk("VOL1/%8s:", name); - - /* get block number and read then go through format1 labels */ - blk = cchhb2blk(&label->vol.vtoc, geo) + 1; - counter = 0; - while ((data = read_dev_sector(bdev, blk*(blocksize/512), - §)) != NULL) { - struct vtoc_format1_label f1; - - memcpy(&f1, data, sizeof(struct vtoc_format1_label)); - put_dev_sector(sect); - - /* skip FMT4 / FMT5 / FMT7 labels */ - if (f1.DS1FMTID == _ascebc['4'] - || f1.DS1FMTID == _ascebc['5'] - || f1.DS1FMTID == _ascebc['7']) { - blk++; - continue; - } - - /* only FMT1 valid at this point */ - if (f1.DS1FMTID != _ascebc['1']) - break; - - /* OK, we got valid partition data */ - offset = cchh2blk(&f1.DS1EXT1.llimit, geo); - size = cchh2blk(&f1.DS1EXT1.ulimit, geo) - - offset + geo->sectors; - if (counter >= state->limit) - break; - put_partition(state, counter + 1, - offset * (blocksize >> 9), - size * (blocksize >> 9)); - counter++; - blk++; - } - if (!data) - /* Are we not supposed to report this ? */ - goto out_readerr; - } else { /* - * Old style LNX1 or unlabeled disk + * check if VOL1 label is available + * if not, something is wrong, skipping partition detection */ - if (strncmp(type, "LNX1", 4) == 0) - printk ("LNX1/%8s:", name); - else - printk("(nonl)/%8s:", name); - offset = (info->label_block + 1); - size = i_size >> 9; - put_partition(state, 1, offset*(blocksize >> 9), - size-offset*(blocksize >> 9)); + if (strncmp(type, "VOL1", 4) == 0) { + printk("VOL1/%8s:", name); + /* + * get block number and read then go through format1 + * labels + */ + blk = cchhb2blk(&label->vol.vtoc, geo) + 1; + counter = 0; + data = read_dev_sector(bdev, blk * (blocksize/512), + §); + while (data != NULL) { + struct vtoc_format1_label f1; + + memcpy(&f1, data, + sizeof(struct vtoc_format1_label)); + put_dev_sector(sect); + + /* skip FMT4 / FMT5 / FMT7 labels */ + if (f1.DS1FMTID == _ascebc['4'] + || f1.DS1FMTID == _ascebc['5'] + || f1.DS1FMTID == _ascebc['7']) { + blk++; + data = read_dev_sector(bdev, blk * + (blocksize/512), + §); + continue; + } + + /* only FMT1 valid at this point */ + if (f1.DS1FMTID != _ascebc['1']) + break; + + /* OK, we got valid partition data */ + offset = cchh2blk(&f1.DS1EXT1.llimit, geo); + size = cchh2blk(&f1.DS1EXT1.ulimit, geo) - + offset + geo->sectors; + if (counter >= state->limit) + break; + put_partition(state, counter + 1, + offset * (blocksize >> 9), + size * (blocksize >> 9)); + counter++; + blk++; + data = read_dev_sector(bdev, + blk * (blocksize/512), + §); + } + + if (!data) + /* Are we not supposed to report this ? */ + goto out_readerr; + } else + printk(KERN_WARNING "Warning, expected Label VOL1 not " + "found, treating as CDL formated Disk"); + } printk("\n"); diff --git a/fs/pipe.c b/fs/pipe.c index 3a89592bdf57..d007830d9c87 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } +/** + * generic_pipe_buf_map - virtually map a pipe buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer that should be mapped + * @atomic: whether to use an atomic map + * + * Description: + * This function returns a kernel virtual address mapping for the + * passed in @pipe_buffer. If @atomic is set, an atomic map is provided + * and the caller has to be careful not to fault before calling + * the unmap function. + * + * Note that this function occupies KM_USER0 if @atomic != 0. + */ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *buf, int atomic) { @@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, return kmap(buf->page); } +/** + * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer that should be unmapped + * @map_data: the data that the mapping function returned + * + * Description: + * This function undoes the mapping that ->map() provided. + */ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *buf, void *map_data) { @@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, kunmap(buf->page); } +/** + * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to attempt to steal + * + * Description: + * This function attempts to steal the @struct page attached to + * @buf. If successful, this function returns 0 and returns with + * the page locked. The caller may then reuse the page for whatever + * he wishes, the typical use is insertion into a different file + * page cache. + */ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; + /* + * A reference of one is golden, that means that the owner of this + * page is the only one holding a reference to it. lock the page + * and return OK. + */ if (page_count(page) == 1) { lock_page(page); return 0; @@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, return 1; } -void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) +/** + * generic_pipe_buf_get - get a reference to a @struct pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to get a reference to + * + * Description: + * This function grabs an extra reference to @buf. It's used in + * in the tee() system call, when we duplicate the buffers in one + * pipe into another. + */ +void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_get(buf->page); } -int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) +/** + * generic_pipe_buf_confirm - verify contents of the pipe buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to confirm + * + * Description: + * This function does nothing, because the generic pipe code uses + * pages that are always good when inserted into the pipe. + */ +int generic_pipe_buf_confirm(struct pipe_inode_info *info, + struct pipe_buffer *buf) { return 0; } @@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, - .pin = generic_pipe_buf_pin, + .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = generic_pipe_buf_steal, .get = generic_pipe_buf_get, @@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, if (chars > total_len) chars = total_len; - error = ops->pin(pipe, buf); + error = ops->confirm(pipe, buf); if (error) { if (!ret) error = ret; @@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, int error, atomic = 1; void *addr; - error = ops->pin(pipe, buf); + error = ops->confirm(pipe, buf); if (error) goto out; diff --git a/fs/proc/array.c b/fs/proc/array.c index 74f30e0c0381..98e78e2f18d6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer) rcu_read_lock(); buffer += sprintf(buffer, "State:\t%s\n" - "SleepAVG:\t%lu%%\n" "Tgid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" @@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer) "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), - (p->sleep_avg/1024)*100/(1020000000/1024), p->tgid, p->pid, pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, @@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer) return buffer - orig; } +static clock_t task_utime(struct task_struct *p) +{ + clock_t utime = cputime_to_clock_t(p->utime), + total = utime + cputime_to_clock_t(p->stime); + u64 temp; + + /* + * Use CFS's precise accounting: + */ + temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + + if (total) { + temp *= utime; + do_div(temp, total); + } + utime = (clock_t)temp; + + return utime; +} + +static clock_t task_stime(struct task_struct *p) +{ + clock_t stime = cputime_to_clock_t(p->stime); + + /* + * Use CFS's precise accounting. (we subtract utime from + * the total, to make sure the total observed by userspace + * grows monotonically - apps rely on that): + */ + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p); + + return stime; +} + + static int do_task_stat(struct task_struct *task, char * buffer, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; @@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime, utime, stime; + cputime_t cutime, cstime; + clock_t utime, stime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; @@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = cputime_zero; + cutime = cstime = cputime_zero; + utime = stime = 0; rcu_read_lock(); if (lock_task_sighand(task, &flags)) { @@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) do { min_flt += t->min_flt; maj_flt += t->maj_flt; - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); + utime += task_utime(t); + stime += task_stime(t); t = next_thread(t); } while (t != task); min_flt += sig->min_flt; maj_flt += sig->maj_flt; - utime = cputime_add(utime, sig->utime); - stime = cputime_add(stime, sig->stime); + utime += cputime_to_clock_t(sig->utime); + stime += cputime_to_clock_t(sig->stime); } sid = signal_session(sig); @@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - utime = task->utime; - stime = task->stime; + utime = task_utime(task); + stime = task_stime(task); } /* scale priority and nice values from timeslices to -20..20 */ @@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) cmin_flt, maj_flt, cmaj_flt, - cputime_to_clock_t(utime), - cputime_to_clock_t(stime), + utime, + stime, cputime_to_clock_t(cutime), cputime_to_clock_t(cstime), priority, diff --git a/fs/proc/base.c b/fs/proc/base.c index a5fa1fdafc4e..46ea5d56e1bb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) */ static int proc_pid_schedstat(struct task_struct *task, char *buffer) { - return sprintf(buffer, "%lu %lu %lu\n", + return sprintf(buffer, "%llu %llu %lu\n", task->sched_info.cpu_time, task->sched_info.run_delay, task->sched_info.pcnt); @@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = { }; #endif +#ifdef CONFIG_SCHED_DEBUG +/* + * Print out various scheduling related per-task fields: + */ +static int sched_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + WARN_ON(!inode); + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct task_struct *p; + + WARN_ON(!inode); + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_set_task(p); + + put_task_struct(p); + + return count; +} + +static int sched_open(struct inode *inode, struct file *filp) +{ + int ret; + + ret = single_open(filp, sched_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_pid_sched_operations = { + .open = sched_open, + .read = seq_read, + .write = sched_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = { INF("environ", S_IRUSR, pid_environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), +#ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), +#endif INF("cmdline", S_IRUGO, pid_cmdline), INF("stat", S_IRUGO, tgid_stat), INF("statm", S_IRUGO, pid_statm), @@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = { INF("environ", S_IRUSR, pid_environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), +#ifdef CONFIG_SCHED_DEBUG + REG("sched", S_IRUGO|S_IWUSR, pid_sched), +#endif INF("cmdline", S_IRUGO, pid_cmdline), INF("stat", S_IRUGO, tid_stat), INF("statm", S_IRUGO, pid_statm), diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 44649981bbc8..867f42b02035 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations = .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, #ifdef CONFIG_QNX4FS_RW .write = do_sync_write, .aio_write = generic_file_aio_write, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 2f14774a124f..97bdc0b2f9d2 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = simple_sync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .llseek = generic_file_llseek, }; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 9345a46ffb32..cad2b7ace630 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .fsync = simple_sync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, .llseek = generic_file_llseek, }; @@ -195,6 +195,11 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) unsigned int old_ia_valid = ia->ia_valid; int ret = 0; + /* POSIX UID/GID verification for setting inode attributes */ + ret = inode_change_ok(inode, ia); + if (ret) + return ret; + /* by providing our own setattr() method, we skip this quotaism */ if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) diff --git a/fs/read_write.c b/fs/read_write.c index 4d03008f015b..507ddff48a9a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/syscalls.h> #include <linux/pagemap.h> +#include <linux/splice.h> #include "read_write.h" #include <asm/uaccess.h> @@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = { .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_readonly_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; EXPORT_SYMBOL(generic_ro_fops); @@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, struct inode * in_inode, * out_inode; loff_t pos; ssize_t retval; - int fput_needed_in, fput_needed_out; + int fput_needed_in, fput_needed_out, fl; /* * Get input file, and verify that it is ok.. @@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, in_inode = in_file->f_path.dentry->d_inode; if (!in_inode) goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) + if (!in_file->f_op || !in_file->f_op->splice_read) goto fput_in; retval = -ESPIPE; if (!ppos) @@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, count = max - pos; } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + fl = 0; +#if 0 + /* + * We need to debate whether we can enable this or not. The + * man page documents EAGAIN return for the output at least, + * and the application is arguably buggy if it doesn't expect + * EAGAIN on a non-blocking file descriptor. + */ + if (in_file->f_flags & O_NONBLOCK) + fl = SPLICE_F_NONBLOCK; +#endif + retval = do_splice_direct(in_file, ppos, out_file, count, fl); if (retval > 0) { add_rchar(current, retval); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9e451a68580f..30eebfb1b2d8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = { .open = generic_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, - .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, diff --git a/fs/seq_file.c b/fs/seq_file.c index 0ac22af7afe5..49194a4e6b91 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -447,3 +447,37 @@ int seq_puts(struct seq_file *m, const char *s) return -1; } EXPORT_SYMBOL(seq_puts); + +struct list_head *seq_list_start(struct list_head *head, loff_t pos) +{ + struct list_head *lh; + + list_for_each(lh, head) + if (pos-- == 0) + return lh; + + return NULL; +} + +EXPORT_SYMBOL(seq_list_start); + +struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) +{ + if (!pos) + return head; + + return seq_list_start(head, pos - 1); +} + +EXPORT_SYMBOL(seq_list_start_head); + +struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) +{ + struct list_head *lh; + + lh = ((struct list_head *)v)->next; + ++*ppos; + return lh == head ? NULL : lh; +} + +EXPORT_SYMBOL(seq_list_next); diff --git a/fs/signalfd.c b/fs/signalfd.c index f1da89203a9a..3b07f26d984d 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -133,7 +133,8 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait) * the peer disconnects. */ if (signalfd_lock(ctx, &lk)) { - if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 || + if ((lk.tsk == current && + next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || next_signal(&lk.tsk->signal->shared_pending, &ctx->sigmask) > 0) events |= POLLIN; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index aea3f8aa54c0..c5d78a7e492b 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -262,8 +262,9 @@ out: } static ssize_t -smb_file_sendfile(struct file *file, loff_t *ppos, - size_t count, read_actor_t actor, void *target) +smb_file_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) { struct dentry *dentry = file->f_path.dentry; ssize_t status; @@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos, DENTRY_PATH(dentry), status); goto out; } - status = generic_file_sendfile(file, ppos, count, actor, target); + status = generic_file_splice_read(file, ppos, pipe, count, flags); out: return status; } @@ -416,7 +417,7 @@ const struct file_operations smb_file_operations = .open = smb_file_open, .release = smb_file_release, .fsync = smb_fsync, - .sendfile = smb_file_sendfile, + .splice_read = smb_file_splice_read, }; const struct inode_operations smb_file_inode_operations = diff --git a/fs/splice.c b/fs/splice.c index 12f28281d2b1..6c9828651e6f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -20,7 +20,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> -#include <linux/pipe_fs_i.h> +#include <linux/splice.h> #include <linux/mm_inline.h> #include <linux/swap.h> #include <linux/writeback.h> @@ -28,22 +28,7 @@ #include <linux/module.h> #include <linux/syscalls.h> #include <linux/uio.h> - -struct partial_page { - unsigned int offset; - unsigned int len; -}; - -/* - * Passed to splice_to_pipe - */ -struct splice_pipe_desc { - struct page **pages; /* page map */ - struct partial_page *partial; /* pages[] may not be contig */ - int nr_pages; /* number of pages in map */ - unsigned int flags; /* splice flags */ - const struct pipe_buf_operations *ops;/* ops associated with output pipe */ -}; +#include <linux/security.h> /* * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -101,8 +86,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, buf->flags &= ~PIPE_BUF_FLAG_LRU; } -static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +/* + * Check whether the contents of buf is OK to access. Since the content + * is a page cache page, IO may be in flight. + */ +static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; int err; @@ -143,7 +132,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, - .pin = page_cache_pipe_buf_pin, + .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, .get = generic_pipe_buf_get, @@ -163,19 +152,27 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, - .pin = generic_pipe_buf_pin, + .confirm = generic_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, .get = generic_pipe_buf_get, }; -/* - * Pipe output worker. This sets up our pipe format with the page cache - * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). +/** + * splice_to_pipe - fill passed data into a pipe + * @pipe: pipe to fill + * @spd: data to fill + * + * Description: + * @spd contains a map of pages and len/offset tupples, a long with + * the struct pipe_buf_operations associated with these pages. This + * function will link that data to the pipe. + * */ -static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) +ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) { + unsigned int spd_pages = spd->nr_pages; int ret, do_wakeup, page_nr; ret = 0; @@ -200,6 +197,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; buf->len = spd->partial[page_nr].len; + buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; if (spd->flags & SPLICE_F_GIFT) buf->flags |= PIPE_BUF_FLAG_GIFT; @@ -244,17 +242,18 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) + if (pipe->inode) { mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + } } - while (page_nr < spd->nr_pages) + while (page_nr < spd_pages) page_cache_release(spd->pages[page_nr++]); return ret; @@ -272,7 +271,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct page *page; pgoff_t index, end_index; loff_t isize; - size_t total_len; int error, page_nr; struct splice_pipe_desc spd = { .pages = pages, @@ -295,20 +293,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); /* - * Now fill in the holes: - */ - error = 0; - total_len = 0; - - /* * Lookup the (hopefully) full range of pages we need. */ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); /* * If find_get_pages_contig() returned fewer pages than we needed, - * allocate the rest. + * allocate the rest and fill in the holes. */ + error = 0; index += spd.nr_pages; while (spd.nr_pages < nr_pages) { /* @@ -415,43 +408,47 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; } + } +fill_it: + /* + * i_size must be checked after PageUptodate. + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) + break; + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + unsigned int plen; /* - * i_size must be checked after ->readpage(). + * max good bytes in this page */ - isize = i_size_read(mapping->host); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!isize || index > end_index)) + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) break; /* - * if this is the last page, see if we need to shrink - * the length and stop + * force quit after adding this page */ - if (end_index == index) { - loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); - if (total_len + loff > isize) - break; - /* - * force quit after adding this page - */ - len = this_len; - this_len = min(this_len, loff); - loff = 0; - } + this_len = min(this_len, plen - loff); + len = this_len; } -fill_it: + partial[page_nr].offset = loff; partial[page_nr].len = this_len; len -= this_len; - total_len += this_len; loff = 0; spd.nr_pages++; index++; } /* - * Release any pages at the end, if we quit early. 'i' is how far + * Release any pages at the end, if we quit early. 'page_nr' is how far * we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) @@ -466,11 +463,16 @@ fill_it: /** * generic_file_splice_read - splice data from file to a pipe * @in: file to splice from + * @ppos: position in @in * @pipe: pipe to splice to * @len: number of bytes to splice * @flags: splice modifier flags * - * Will read pages from given file and fill them into a pipe. + * Description: + * Will read pages from given file and fill them into a pipe. Can be + * used as long as the address_space operations for the source implements + * a readpage() hook. + * */ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, @@ -478,11 +480,19 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { ssize_t spliced; int ret; + loff_t isize, left; + + isize = i_size_read(in->f_mapping->host); + if (unlikely(*ppos >= isize)) + return 0; + + left = isize - *ppos; + if (unlikely(left < len)) + len = left; ret = 0; spliced = 0; - - while (len) { + while (len && !spliced) { ret = __generic_file_splice_read(in, ppos, pipe, len, flags); if (ret < 0) @@ -516,11 +526,11 @@ EXPORT_SYMBOL(generic_file_splice_read); static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { - struct file *file = sd->file; + struct file *file = sd->u.file; loff_t pos = sd->pos; int ret, more; - ret = buf->ops->pin(pipe, buf); + ret = buf->ops->confirm(pipe, buf); if (!ret) { more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; @@ -554,7 +564,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { - struct file *file = sd->file; + struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; @@ -564,7 +574,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, /* * make sure the data in this buffer is uptodate */ - ret = buf->ops->pin(pipe, buf); + ret = buf->ops->confirm(pipe, buf); if (unlikely(ret)) return ret; @@ -644,7 +654,6 @@ find_page: * accessed, we are now done! */ mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); @@ -652,36 +661,37 @@ out_ret: return ret; } -/* - * Pipe input worker. Most of this logic works like a regular pipe, the - * key here is the 'actor' worker passed in that actually moves the data - * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. +/** + * __splice_from_pipe - splice data from a pipe to given actor + * @pipe: pipe to splice from + * @sd: information to @actor + * @actor: handler that splices the data + * + * Description: + * This function does little more than loop over the pipe and call + * @actor to do the actual moving of a single struct pipe_buffer to + * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * pipe_to_user. + * */ -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, size_t len, - unsigned int flags, splice_actor *actor) +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) { int ret, do_wakeup, err; - struct splice_desc sd; ret = 0; do_wakeup = 0; - sd.total_len = len; - sd.flags = flags; - sd.file = out; - sd.pos = *ppos; - for (;;) { if (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; const struct pipe_buf_operations *ops = buf->ops; - sd.len = buf->len; - if (sd.len > sd.total_len) - sd.len = sd.total_len; + sd->len = buf->len; + if (sd->len > sd->total_len) + sd->len = sd->total_len; - err = actor(pipe, buf, &sd); + err = actor(pipe, buf, sd); if (err <= 0) { if (!ret && err != -ENODATA) ret = err; @@ -693,10 +703,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, buf->offset += err; buf->len -= err; - sd.len -= err; - sd.pos += err; - sd.total_len -= err; - if (sd.len) + sd->len -= err; + sd->pos += err; + sd->total_len -= err; + if (sd->len) continue; if (!buf->len) { @@ -708,7 +718,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, do_wakeup = 1; } - if (!sd.total_len) + if (!sd->total_len) break; } @@ -721,7 +731,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, break; } - if (flags & SPLICE_F_NONBLOCK) { + if (sd->flags & SPLICE_F_NONBLOCK) { if (!ret) ret = -EAGAIN; break; @@ -755,12 +765,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, } EXPORT_SYMBOL(__splice_from_pipe); +/** + * splice_from_pipe - splice data from a pipe to a file + * @pipe: pipe to splice from + * @out: file to splice to + * @ppos: position in @out + * @len: how many bytes to splice + * @flags: splice modifier flags + * @actor: handler that splices the data + * + * Description: + * See __splice_from_pipe. This function locks the input and output inodes, + * otherwise it's identical to __splice_from_pipe(). + * + */ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, splice_actor *actor) { ssize_t ret; struct inode *inode = out->f_mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; /* * The actor worker might be calling ->prepare_write and @@ -769,7 +799,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, * pipe->inode, we have to order lock acquiry here. */ inode_double_lock(inode, pipe->inode); - ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); + ret = __splice_from_pipe(pipe, &sd, actor); inode_double_unlock(inode, pipe->inode); return ret; @@ -779,12 +809,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, * generic_file_splice_write_nolock - generic_file_splice_write without mutexes * @pipe: pipe info * @out: file to write to + * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * - * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. The caller is responsible - * for acquiring i_mutex on both inodes. + * Description: + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. The caller is responsible + * for acquiring i_mutex on both inodes. * */ ssize_t @@ -793,6 +825,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; ssize_t ret; int err; @@ -800,9 +838,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, if (unlikely(err)) return err; - ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); if (ret > 0) { + unsigned long nr_pages; + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, @@ -815,6 +856,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } return ret; @@ -826,11 +868,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock); * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info * @out: file to write to + * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * - * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. + * Description: + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. * */ ssize_t @@ -853,7 +897,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { + unsigned long nr_pages; + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, @@ -868,6 +915,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } return ret; @@ -877,13 +925,15 @@ EXPORT_SYMBOL(generic_file_splice_write); /** * generic_splice_sendpage - splice data from a pipe to a socket - * @inode: pipe inode + * @pipe: pipe to splice from * @out: socket to write to + * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * - * Will send @len bytes from the pipe to a network socket. No data copying - * is involved. + * Description: + * Will send @len bytes from the pipe to a network socket. No data copying + * is involved. * */ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, @@ -912,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (unlikely(ret < 0)) return ret; + ret = security_file_permission(out, MAY_WRITE); + if (unlikely(ret < 0)) + return ret; + return out->f_op->splice_write(pipe, out, ppos, len, flags); } @@ -922,7 +976,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - loff_t isize, left; int ret; if (unlikely(!in->f_op || !in->f_op->splice_read)) @@ -935,25 +988,34 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - isize = i_size_read(in->f_mapping->host); - if (unlikely(*ppos >= isize)) - return 0; - - left = isize - *ppos; - if (unlikely(left < len)) - len = left; + ret = security_file_permission(in, MAY_READ); + if (unlikely(ret < 0)) + return ret; return in->f_op->splice_read(in, ppos, pipe, len, flags); } -long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - size_t len, unsigned int flags) +/** + * splice_direct_to_actor - splices data directly between two non-pipes + * @in: file to splice from + * @sd: actor information on where to splice to + * @actor: handles the data splicing + * + * Description: + * This is a special case helper to splice directly between two + * points, without requiring an explicit pipe. Internally an allocated + * pipe is cached in the process, and reused during the life time of + * that process. + * + */ +ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, + splice_direct_actor *actor) { struct pipe_inode_info *pipe; long ret, bytes; - loff_t out_off; umode_t i_mode; - int i; + size_t len; + int i, flags; /* * We require the input being a regular file, as we don't want to @@ -989,49 +1051,41 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, */ ret = 0; bytes = 0; - out_off = 0; + len = sd->total_len; + flags = sd->flags; - while (len) { - size_t read_len, max_read_len; + /* + * Don't block on output, we have to drain the direct pipe. + */ + sd->flags &= ~SPLICE_F_NONBLOCK; - /* - * Do at most PIPE_BUFFERS pages worth of transfer: - */ - max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); + while (len) { + size_t read_len; - ret = do_splice_to(in, ppos, pipe, max_read_len, flags); - if (unlikely(ret < 0)) + ret = do_splice_to(in, &sd->pos, pipe, len, flags); + if (unlikely(ret <= 0)) goto out_release; read_len = ret; + sd->total_len = read_len; /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: */ - ret = do_splice_from(pipe, out, &out_off, read_len, - flags & ~SPLICE_F_NONBLOCK); - if (unlikely(ret < 0)) + ret = actor(pipe, sd); + if (unlikely(ret <= 0)) goto out_release; bytes += ret; len -= ret; - /* - * In nonblocking mode, if we got back a short read then - * that was due to either an IO error or due to the - * pagecache entry not being there. In the IO error case - * the _next_ splice attempt will produce a clean IO error - * return value (not a short read), so in both cases it's - * correct to break out of the loop here: - */ - if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) - break; + if (ret < read_len) + goto out_release; } pipe->nrbufs = pipe->curbuf = 0; - return bytes; out_release: @@ -1056,9 +1110,51 @@ out_release: return bytes; return ret; + } +EXPORT_SYMBOL(splice_direct_to_actor); -EXPORT_SYMBOL(do_splice_direct); +static int direct_splice_actor(struct pipe_inode_info *pipe, + struct splice_desc *sd) +{ + struct file *file = sd->u.file; + + return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); +} + +/** + * do_splice_direct - splices data directly between two files + * @in: file to splice from + * @ppos: input file offset + * @out: file to splice to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Description: + * For use by do_sendfile(). splice can easily emulate sendfile, but + * doing it in the application would incur an extra system call + * (splice in + splice out, as compared to just sendfile()). So this helper + * can splice directly through a process-private pipe. + * + */ +long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + size_t len, unsigned int flags) +{ + struct splice_desc sd = { + .len = len, + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; + long ret; + + ret = splice_direct_to_actor(in, &sd, direct_splice_actor); + if (ret > 0) + *ppos += ret; + + return ret; +} /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same @@ -1240,28 +1336,131 @@ static int get_iovec_page_array(const struct iovec __user *iov, return error; } +static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + char *src; + int ret; + + ret = buf->ops->confirm(pipe, buf); + if (unlikely(ret)) + return ret; + + /* + * See if we can use the atomic maps, by prefaulting in the + * pages and doing an atomic copy + */ + if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { + src = buf->ops->map(pipe, buf, 1); + ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, + sd->len); + buf->ops->unmap(pipe, buf, src); + if (!ret) { + ret = sd->len; + goto out; + } + } + + /* + * No dice, use slow non-atomic map and copy + */ + src = buf->ops->map(pipe, buf, 0); + + ret = sd->len; + if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) + ret = -EFAULT; + +out: + if (ret > 0) + sd->u.userptr += ret; + buf->ops->unmap(pipe, buf, src); + return ret; +} + +/* + * For lack of a better implementation, implement vmsplice() to userspace + * as a simple copy of the pipes pages to the user iov. + */ +static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct pipe_inode_info *pipe; + struct splice_desc sd; + ssize_t size; + int error; + long ret; + + pipe = pipe_info(file->f_path.dentry->d_inode); + if (!pipe) + return -EBADF; + + if (pipe->inode) + mutex_lock(&pipe->inode->i_mutex); + + error = ret = 0; + while (nr_segs) { + void __user *base; + size_t len; + + /* + * Get user address base and length for this iovec. + */ + error = get_user(base, &iov->iov_base); + if (unlikely(error)) + break; + error = get_user(len, &iov->iov_len); + if (unlikely(error)) + break; + + /* + * Sanity check this iovec. 0 read succeeds. + */ + if (unlikely(!len)) + break; + if (unlikely(!base)) { + error = -EFAULT; + break; + } + + sd.len = 0; + sd.total_len = len; + sd.flags = flags; + sd.u.userptr = base; + sd.pos = 0; + + size = __splice_from_pipe(pipe, &sd, pipe_to_user); + if (size < 0) { + if (!ret) + ret = size; + + break; + } + + ret += size; + + if (size < len) + break; + + nr_segs--; + iov++; + } + + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + + if (!ret) + ret = error; + + return ret; +} + /* * vmsplice splices a user address range into a pipe. It can be thought of * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. - * - * Note that vmsplice only supports splicing _from_ user memory to a pipe, - * not the other way around. Splicing from user memory is a simple operation - * that can be supported without any funky alignment restrictions or nasty - * vm tricks. We simply map in the user memory and fill them into a pipe. - * The reverse isn't quite as easy, though. There are two possible solutions - * for that: - * - * - memcpy() the data internally, at which point we might as well just - * do a regular read() on the buffer anyway. - * - Lots of nasty vm tricks, that are neither fast nor flexible (it - * has restriction limitations on both ends of the pipe). - * - * Alas, it isn't here. - * */ -static long do_vmsplice(struct file *file, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; struct page *pages[PIPE_BUFFERS]; @@ -1276,10 +1475,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, pipe = pipe_info(file->f_path.dentry->d_inode); if (!pipe) return -EBADF; - if (unlikely(nr_segs > UIO_MAXIOV)) - return -EINVAL; - else if (unlikely(!nr_segs)) - return 0; spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, flags & SPLICE_F_GIFT); @@ -1289,6 +1484,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, return splice_to_pipe(pipe, &spd); } +/* + * Note that vmsplice only really supports true splicing _from_ user memory + * to a pipe, not the other way around. Splicing from user memory is a simple + * operation that can be supported without any funky alignment restrictions + * or nasty vm tricks. We simply map in the user memory and fill them into + * a pipe. The reverse isn't quite as easy, though. There are two possible + * solutions for that: + * + * - memcpy() the data internally, at which point we might as well just + * do a regular read() on the buffer anyway. + * - Lots of nasty vm tricks, that are neither fast nor flexible (it + * has restriction limitations on both ends of the pipe). + * + * Currently we punt and implement it as a normal copy, see pipe_to_user(). + * + */ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { @@ -1296,11 +1507,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, long error; int fput; + if (unlikely(nr_segs > UIO_MAXIOV)) + return -EINVAL; + else if (unlikely(!nr_segs)) + return 0; + error = -EBADF; file = fget_light(fd, &fput); if (file) { if (file->f_mode & FMODE_WRITE) - error = do_vmsplice(file, iov, nr_segs, flags); + error = vmsplice_to_pipe(file, iov, nr_segs, flags); + else if (file->f_mode & FMODE_READ) + error = vmsplice_to_user(file, iov, nr_segs, flags); fput_light(file, fput); } diff --git a/fs/sync.c b/fs/sync.c index 2f97576355b8..7cd005ea7639 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -236,6 +236,14 @@ out: return ret; } +/* It would be nice if people remember that not all the world's an i386 + when they introduce new system calls */ +asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, + loff_t offset, loff_t nbytes) +{ + return sys_sync_file_range(fd, offset, nbytes, flags); +} + /* * `endbyte' is inclusive */ diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index d3b9f5f07db1..135353f8a296 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -20,29 +20,41 @@ #include "sysfs.h" +struct bin_buffer { + struct mutex mutex; + void *buffer; + int mmapped; +}; + static int fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) { - struct bin_attribute * attr = to_bin_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; - if (!attr->read) - return -EIO; + rc = -EIO; + if (attr->read) + rc = attr->read(kobj, attr, buffer, off, count); - return attr->read(kobj, buffer, off, count); + sysfs_put_active_two(attr_sd); + + return rc; } static ssize_t -read(struct file * file, char __user * userbuf, size_t count, loff_t * off) +read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) { - char *buffer = file->private_data; + struct bin_buffer *bb = file->private_data; struct dentry *dentry = file->f_path.dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; - int ret; - - if (count > PAGE_SIZE) - count = PAGE_SIZE; + int count = min_t(size_t, bytes, PAGE_SIZE); if (size) { if (offs > size) @@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off) count = size - offs; } - ret = fill_read(dentry, buffer, offs, count); - if (ret < 0) - return ret; - count = ret; + mutex_lock(&bb->mutex); + + count = fill_read(dentry, bb->buffer, offs, count); + if (count < 0) + goto out_unlock; - if (copy_to_user(userbuf, buffer, count)) - return -EFAULT; + if (copy_to_user(userbuf, bb->buffer, count)) { + count = -EFAULT; + goto out_unlock; + } - pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); + pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); *off = offs + count; + out_unlock: + mutex_unlock(&bb->mutex); return count; } static int flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) { - struct bin_attribute *attr = to_bin_attr(dentry); - struct kobject *kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + rc = -EIO; + if (attr->write) + rc = attr->write(kobj, attr, buffer, offset, count); - if (!attr->write) - return -EIO; + sysfs_put_active_two(attr_sd); - return attr->write(kobj, buffer, offset, count); + return rc; } -static ssize_t write(struct file * file, const char __user * userbuf, - size_t count, loff_t * off) +static ssize_t write(struct file *file, const char __user *userbuf, + size_t bytes, loff_t *off) { - char *buffer = file->private_data; + struct bin_buffer *bb = file->private_data; struct dentry *dentry = file->f_path.dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; + int count = min_t(size_t, bytes, PAGE_SIZE); - if (count > PAGE_SIZE) - count = PAGE_SIZE; if (size) { if (offs > size) return 0; @@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf, count = size - offs; } - if (copy_from_user(buffer, userbuf, count)) - return -EFAULT; + mutex_lock(&bb->mutex); - count = flush_write(dentry, buffer, offs, count); + if (copy_from_user(bb->buffer, userbuf, count)) { + count = -EFAULT; + goto out_unlock; + } + + count = flush_write(dentry, bb->buffer, offs, count); if (count > 0) *off = offs + count; + + out_unlock: + mutex_unlock(&bb->mutex); return count; } static int mmap(struct file *file, struct vm_area_struct *vma) { - struct dentry *dentry = file->f_path.dentry; - struct bin_attribute *attr = to_bin_attr(dentry); - struct kobject *kobj = to_kobj(dentry->d_parent); + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; + + mutex_lock(&bb->mutex); + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; - if (!attr->mmap) - return -EINVAL; + rc = -EINVAL; + if (attr->mmap) + rc = attr->mmap(kobj, attr, vma); - return attr->mmap(kobj, attr, vma); + if (rc == 0 && !bb->mmapped) + bb->mmapped = 1; + else + sysfs_put_active_two(attr_sd); + + mutex_unlock(&bb->mutex); + + return rc; } static int open(struct inode * inode, struct file * file) { - struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); - struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); - int error = -EINVAL; - - if (!kobj || !attr) - goto Done; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct bin_buffer *bb = NULL; + int error; - /* Grab the module reference for this attribute if we have one */ - error = -ENODEV; - if (!try_module_get(attr->attr.owner)) - goto Done; + /* need attr_sd for attr */ + if (!sysfs_get_active(attr_sd)) + return -ENODEV; error = -EACCES; if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) - goto Error; + goto err_out; if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) - goto Error; + goto err_out; error = -ENOMEM; - file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!file->private_data) - goto Error; - - error = 0; - goto Done; - - Error: - module_put(attr->attr.owner); - Done: - if (error) - kobject_put(kobj); + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) + goto err_out; + + bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!bb->buffer) + goto err_out; + + mutex_init(&bb->mutex); + file->private_data = bb; + + /* open succeeded, put active reference and pin attr_sd */ + sysfs_put_active(attr_sd); + sysfs_get(attr_sd); + return 0; + + err_out: + sysfs_put_active(attr_sd); + kfree(bb); return error; } static int release(struct inode * inode, struct file * file) { - struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); - struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); - u8 * buffer = file->private_data; - - kobject_put(kobj); - module_put(attr->attr.owner); - kfree(buffer); + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_buffer *bb = file->private_data; + + if (bb->mmapped) + sysfs_put_active_two(attr_sd); + sysfs_put(attr_sd); + kfree(bb->buffer); + kfree(bb); return 0; } @@ -181,9 +234,9 @@ const struct file_operations bin_fops = { int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - BUG_ON(!kobj || !kobj->dentry || !attr); + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); + return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); } @@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { + if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) { printk(KERN_ERR "%s: " "bad dentry or inode or no such file: \"%s\"\n", __FUNCTION__, attr->attr.name); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 85a668680f82..aee966c44aac 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -9,18 +9,346 @@ #include <linux/module.h> #include <linux/kobject.h> #include <linux/namei.h> +#include <linux/idr.h> +#include <linux/completion.h> #include <asm/semaphore.h> #include "sysfs.h" -DECLARE_RWSEM(sysfs_rename_sem); +DEFINE_MUTEX(sysfs_mutex); +spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; + +static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_IDA(sysfs_ino_ida); + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling list + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ +void sysfs_link_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *parent_sd = sd->s_parent; + + BUG_ON(sd->s_sibling); + sd->s_sibling = parent_sd->s_children; + parent_sd->s_children = sd; +} + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ +void sysfs_unlink_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent **pos; + + for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { + if (*pos == sd) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + break; + } + } +} + +/** + * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Dentry is looked up if currently not + * present. This function climbs sysfs_dirent tree till it + * reaches a sysfs_dirent with valid dentry attached and descends + * down from there looking up dentry for each step. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, ERR_PTR() value on error. + */ +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *cur; + struct dentry *parent_dentry, *dentry; + int i, depth; + + /* Find the first parent which has valid s_dentry and get the + * dentry. + */ + mutex_lock(&sysfs_mutex); + restart0: + spin_lock(&sysfs_assoc_lock); + restart1: + spin_lock(&dcache_lock); + + dentry = NULL; + depth = 0; + cur = sd; + while (!cur->s_dentry || !cur->s_dentry->d_inode) { + if (cur->s_flags & SYSFS_FLAG_REMOVED) { + dentry = ERR_PTR(-ENOENT); + depth = 0; + break; + } + cur = cur->s_parent; + depth++; + } + if (!IS_ERR(dentry)) + dentry = dget_locked(cur->s_dentry); + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); + + /* from the found dentry, look up depth times */ + while (depth--) { + /* find and get depth'th ancestor */ + for (cur = sd, i = 0; cur && i < depth; i++) + cur = cur->s_parent; + + /* This can happen if tree structure was modified due + * to move/rename. Restart. + */ + if (i != depth) { + dput(dentry); + goto restart0; + } + + sysfs_get(cur); + + mutex_unlock(&sysfs_mutex); + + /* look it up */ + parent_dentry = dentry; + dentry = lookup_one_len_kern(cur->s_name, parent_dentry, + strlen(cur->s_name)); + dput(parent_dentry); + + if (IS_ERR(dentry)) { + sysfs_put(cur); + return dentry; + } + + mutex_lock(&sysfs_mutex); + spin_lock(&sysfs_assoc_lock); + + /* This, again, can happen if tree structure has + * changed and we looked up the wrong thing. Restart. + */ + if (cur->s_dentry != dentry) { + dput(dentry); + sysfs_put(cur); + goto restart1; + } + + spin_unlock(&sysfs_assoc_lock); + + sysfs_put(cur); + } + + mutex_unlock(&sysfs_mutex); + return dentry; +} + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) +{ + if (unlikely(!sd)) + return NULL; + + while (1) { + int v, t; + + v = atomic_read(&sd->s_active); + if (unlikely(v < 0)) + return NULL; + + t = atomic_cmpxchg(&sd->s_active, v, v + 1); + if (likely(t == v)) + return sd; + if (t < 0) + return NULL; + + cpu_relax(); + } +} + +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ +void sysfs_put_active(struct sysfs_dirent *sd) +{ + struct completion *cmpl; + int v; + + if (unlikely(!sd)) + return; + + v = atomic_dec_return(&sd->s_active); + if (likely(v != SD_DEACTIVATED_BIAS)) + return; + + /* atomic_dec_return() is a mb(), we'll always see the updated + * sd->s_sibling. + */ + cmpl = (void *)sd->s_sibling; + complete(cmpl); +} + +/** + * sysfs_get_active_two - get active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Get active reference to @sd and its parent. Parent's active + * reference is grabbed first. This function is noop if @sd is + * NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent))) + return NULL; + if (unlikely(!sysfs_get_active(sd))) { + sysfs_put_active(sd->s_parent); + return NULL; + } + } + return sd; +} + +/** + * sysfs_put_active_two - put active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Put active references to @sd and its parent. This function is + * noop if @sd is NULL. + */ +void sysfs_put_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + sysfs_put_active(sd); + sysfs_put_active(sd->s_parent); + } +} + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. + */ +static void sysfs_deactivate(struct sysfs_dirent *sd) +{ + DECLARE_COMPLETION_ONSTACK(wait); + int v; + + BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); + sd->s_sibling = (void *)&wait; + + /* atomic_add_return() is a mb(), put_active() will always see + * the updated sd->s_sibling. + */ + v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); + + if (v != SD_DEACTIVATED_BIAS) + wait_for_completion(&wait); + + sd->s_sibling = NULL; +} + +static int sysfs_alloc_ino(ino_t *pino) +{ + int ino, rc; + + retry: + spin_lock(&sysfs_ino_lock); + rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); + spin_unlock(&sysfs_ino_lock); + + if (rc == -EAGAIN) { + if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) + goto retry; + rc = -ENOMEM; + } + + *pino = ino; + return rc; +} + +static void sysfs_free_ino(ino_t ino) +{ + spin_lock(&sysfs_ino_lock); + ida_remove(&sysfs_ino_ida, ino); + spin_unlock(&sysfs_ino_lock); +} + +void release_sysfs_dirent(struct sysfs_dirent * sd) +{ + struct sysfs_dirent *parent_sd; + + repeat: + /* Moving/renaming is always done while holding reference. + * sd->s_parent won't change beneath us. + */ + parent_sd = sd->s_parent; + + if (sysfs_type(sd) == SYSFS_KOBJ_LINK) + sysfs_put(sd->s_elem.symlink.target_sd); + if (sysfs_type(sd) & SYSFS_COPY_NAME) + kfree(sd->s_name); + kfree(sd->s_iattr); + sysfs_free_ino(sd->s_ino); + kmem_cache_free(sysfs_dir_cachep, sd); + + sd = parent_sd; + if (sd && atomic_dec_and_test(&sd->s_count)) + goto repeat; +} static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) { struct sysfs_dirent * sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); - sd->s_dentry = NULL; + /* sd->s_dentry is protected with sysfs_assoc_lock. + * This allows sysfs_drop_dentry() to dereference it. + */ + spin_lock(&sysfs_assoc_lock); + + /* The dentry might have been deleted or another + * lookup could have happened updating sd->s_dentry to + * point the new dentry. Ignore if it isn't pointing + * to this dentry. + */ + if (sd->s_dentry == dentry) + sd->s_dentry = NULL; + spin_unlock(&sysfs_assoc_lock); sysfs_put(sd); } iput(inode); @@ -30,245 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = { .d_iput = sysfs_d_iput, }; -/* - * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent - */ -static struct sysfs_dirent * __sysfs_new_dirent(void * element) +struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) { - struct sysfs_dirent * sd; + char *dup_name = NULL; + struct sysfs_dirent *sd = NULL; + + if (type & SYSFS_COPY_NAME) { + name = dup_name = kstrdup(name, GFP_KERNEL); + if (!name) + goto err_out; + } sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); if (!sd) - return NULL; + goto err_out; + + if (sysfs_alloc_ino(&sd->s_ino)) + goto err_out; atomic_set(&sd->s_count, 1); + atomic_set(&sd->s_active, 0); atomic_set(&sd->s_event, 1); - INIT_LIST_HEAD(&sd->s_children); - INIT_LIST_HEAD(&sd->s_sibling); - sd->s_element = element; + + sd->s_name = name; + sd->s_mode = mode; + sd->s_flags = type; return sd; + + err_out: + kfree(dup_name); + kmem_cache_free(sysfs_dir_cachep, sd); + return NULL; } -static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, - struct sysfs_dirent *sd) +/** + * sysfs_attach_dentry - associate sysfs_dirent with dentry + * @sd: target sysfs_dirent + * @dentry: dentry to associate + * + * Associate @sd with @dentry. This is protected by + * sysfs_assoc_lock to avoid race with sysfs_d_iput(). + * + * LOCKING: + * mutex_lock(sysfs_mutex) + */ +static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry) { - if (sd) - list_add(&sd->s_sibling, &parent_sd->s_children); + dentry->d_op = &sysfs_dentry_ops; + dentry->d_fsdata = sysfs_get(sd); + + /* protect sd->s_dentry against sysfs_d_iput */ + spin_lock(&sysfs_assoc_lock); + sd->s_dentry = dentry; + spin_unlock(&sysfs_assoc_lock); + + d_rehash(dentry); } -static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, - void * element) +static int sysfs_ilookup_test(struct inode *inode, void *arg) { - struct sysfs_dirent *sd; - sd = __sysfs_new_dirent(element); - __sysfs_list_dirent(parent_sd, sd); - return sd; + struct sysfs_dirent *sd = arg; + return inode->i_ino == sd->s_ino; } -/* +/** + * sysfs_addrm_start - prepare for sysfs_dirent add/remove + * @acxt: pointer to sysfs_addrm_cxt to be used + * @parent_sd: parent sysfs_dirent * - * Return -EEXIST if there is already a sysfs element with the same name for - * the same parent. + * This function is called when the caller is about to add or + * remove sysfs_dirent under @parent_sd. This function acquires + * sysfs_mutex, grabs inode for @parent_sd if available and lock + * i_mutex of it. @acxt is used to keep and pass context to + * other addrm functions. * - * called with parent inode's i_mutex held + * LOCKING: + * Kernel thread context (may sleep). sysfs_mutex is locked on + * return. i_mutex of parent inode is locked on return if + * available. */ -int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, - const unsigned char *new) +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd) { - struct sysfs_dirent * sd; + struct inode *inode; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_element) { - const unsigned char *existing = sysfs_get_name(sd); - if (strcmp(existing, new)) - continue; - else - return -EEXIST; - } - } + memset(acxt, 0, sizeof(*acxt)); + acxt->parent_sd = parent_sd; - return 0; + /* Lookup parent inode. inode initialization and I_NEW + * clearing are protected by sysfs_mutex. By grabbing it and + * looking up with _nowait variant, inode state can be + * determined reliably. + */ + mutex_lock(&sysfs_mutex); + + inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, + parent_sd); + + if (inode && !(inode->i_state & I_NEW)) { + /* parent inode available */ + acxt->parent_inode = inode; + + /* sysfs_mutex is below i_mutex in lock hierarchy. + * First, trylock i_mutex. If fails, unlock + * sysfs_mutex and lock them in order. + */ + if (!mutex_trylock(&inode->i_mutex)) { + mutex_unlock(&sysfs_mutex); + mutex_lock(&inode->i_mutex); + mutex_lock(&sysfs_mutex); + } + } else + iput(inode); } +/** + * sysfs_add_one - add sysfs_dirent to parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory. @sd is NOT + * linked into the children list of the parent. The caller + * should invoke sysfs_link_sibling() after this function + * completes if @sd needs to be on the children list. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + sd->s_parent = sysfs_get(acxt->parent_sd); + + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + inc_nlink(acxt->parent_inode); + + acxt->cnt++; +} -static struct sysfs_dirent * -__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) +/** + * sysfs_remove_one - remove sysfs_dirent from parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Mark @sd removed and drop nlink of parent inode if @sd is a + * directory. @sd is NOT unlinked from the children list of the + * parent. The caller is repsonsible for removing @sd from the + * children list before calling this function. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { - struct sysfs_dirent * sd; + BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); - sd = __sysfs_new_dirent(element); - if (!sd) - goto out; + sd->s_flags |= SYSFS_FLAG_REMOVED; + sd->s_sibling = acxt->removed; + acxt->removed = sd; - sd->s_mode = mode; - sd->s_type = type; - sd->s_dentry = dentry; - if (dentry) { - dentry->d_fsdata = sysfs_get(sd); - dentry->d_op = &sysfs_dentry_ops; - } + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + drop_nlink(acxt->parent_inode); -out: - return sd; + acxt->cnt++; } -int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, - void * element, umode_t mode, int type) +/** + * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent + * @sd: target sysfs_dirent + * + * Drop dentry for @sd. @sd must have been unlinked from its + * parent on entry to this function such that it can't be looked + * up anymore. + * + * @sd->s_dentry which is protected with sysfs_assoc_lock points + * to the currently associated dentry but we're not holding a + * reference to it and racing with dput(). Grab dcache_lock and + * verify dentry before dropping it. If @sd->s_dentry is NULL or + * dput() beats us, no need to bother. + */ +static void sysfs_drop_dentry(struct sysfs_dirent *sd) { - struct sysfs_dirent *sd; + struct dentry *dentry = NULL; + struct inode *inode; + + /* We're not holding a reference to ->s_dentry dentry but the + * field will stay valid as long as sysfs_assoc_lock is held. + */ + spin_lock(&sysfs_assoc_lock); + spin_lock(&dcache_lock); + + /* drop dentry if it's there and dput() didn't kill it yet */ + if (sd->s_dentry && sd->s_dentry->d_inode) { + dentry = dget_locked(sd->s_dentry); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + } - sd = __sysfs_make_dirent(dentry, element, mode, type); - __sysfs_list_dirent(parent_sd, sd); + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); - return sd ? 0 : -ENOMEM; + /* dentries for shadowed inodes are pinned, unpin */ + if (dentry && sysfs_is_shadowed_inode(dentry->d_inode)) + dput(dentry); + dput(dentry); + + /* adjust nlink and update timestamp */ + inode = ilookup(sysfs_sb, sd->s_ino); + if (inode) { + mutex_lock(&inode->i_mutex); + + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (sysfs_type(sd) == SYSFS_DIR) + drop_nlink(inode); + + mutex_unlock(&inode->i_mutex); + iput(inode); + } } -static int init_dir(struct inode * inode) +/** + * sysfs_addrm_finish - finish up sysfs_dirent add/remove + * @acxt: addrm context to finish up + * + * Finish up sysfs_dirent add/remove. Resources acquired by + * sysfs_addrm_start() are released and removed sysfs_dirents are + * cleaned up. Timestamps on the parent inode are updated. + * + * LOCKING: + * All mutexes acquired by sysfs_addrm_start() are released. + * + * RETURNS: + * Number of added/removed sysfs_dirents since sysfs_addrm_start(). + */ +int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) { - inode->i_op = &sysfs_dir_inode_operations; - inode->i_fop = &sysfs_dir_operations; + /* release resources acquired by sysfs_addrm_start() */ + mutex_unlock(&sysfs_mutex); + if (acxt->parent_inode) { + struct inode *inode = acxt->parent_inode; - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - return 0; + /* if added/removed, update timestamps on the parent */ + if (acxt->cnt) + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + + mutex_unlock(&inode->i_mutex); + iput(inode); + } + + /* kill removed sysfs_dirents */ + while (acxt->removed) { + struct sysfs_dirent *sd = acxt->removed; + + acxt->removed = sd->s_sibling; + sd->s_sibling = NULL; + + sysfs_drop_dentry(sd); + sysfs_deactivate(sd); + sysfs_put(sd); + } + + return acxt->cnt; } -static int init_file(struct inode * inode) +/** + * sysfs_find_dirent - find sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd. + * + * LOCKING: + * mutex_lock(sysfs_mutex) + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) { - inode->i_size = PAGE_SIZE; - inode->i_fop = &sysfs_file_operations; - return 0; + struct sysfs_dirent *sd; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) + if (sysfs_type(sd) && !strcmp(sd->s_name, name)) + return sd; + return NULL; } -static int init_symlink(struct inode * inode) +/** + * sysfs_get_dirent - find and get sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd and get + * it if found. + * + * LOCKING: + * Kernel thread context (may sleep). Grabs sysfs_mutex. + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) { - inode->i_op = &sysfs_symlink_inode_operations; - return 0; + struct sysfs_dirent *sd; + + mutex_lock(&sysfs_mutex); + sd = sysfs_find_dirent(parent_sd, name); + sysfs_get(sd); + mutex_unlock(&sysfs_mutex); + + return sd; } -static int create_dir(struct kobject * k, struct dentry * p, - const char * n, struct dentry ** d) +static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, + const char *name, struct sysfs_dirent **p_sd) { - int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; - mutex_lock(&p->d_inode->i_mutex); - *d = lookup_one_len(n, p, strlen(n)); - if (!IS_ERR(*d)) { - if (sysfs_dirent_exist(p->d_fsdata, n)) - error = -EEXIST; - else - error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, - SYSFS_DIR); - if (!error) { - error = sysfs_create(*d, mode, init_dir); - if (!error) { - inc_nlink(p->d_inode); - (*d)->d_op = &sysfs_dentry_ops; - d_rehash(*d); - } - } - if (error && (error != -EEXIST)) { - struct sysfs_dirent *sd = (*d)->d_fsdata; - if (sd) { - list_del_init(&sd->s_sibling); - sysfs_put(sd); - } - d_drop(*d); - } - dput(*d); - } else - error = PTR_ERR(*d); - mutex_unlock(&p->d_inode->i_mutex); - return error; -} + /* allocate */ + sd = sysfs_new_dirent(name, mode, SYSFS_DIR); + if (!sd) + return -ENOMEM; + sd->s_elem.dir.kobj = kobj; + /* link in */ + sysfs_addrm_start(&acxt, parent_sd); + if (!sysfs_find_dirent(parent_sd, name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + } + if (sysfs_addrm_finish(&acxt)) { + *p_sd = sd; + return 0; + } -int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) + sysfs_put(sd); + return -EEXIST; +} + +int sysfs_create_subdir(struct kobject *kobj, const char *name, + struct sysfs_dirent **p_sd) { - return create_dir(k,k->dentry,n,d); + return create_dir(kobj, kobj->sd, name, p_sd); } /** * sysfs_create_dir - create a directory for an object. * @kobj: object we're creating directory for. - * @shadow_parent: parent parent object. + * @shadow_parent: parent object. */ - -int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) +int sysfs_create_dir(struct kobject *kobj, + struct sysfs_dirent *shadow_parent_sd) { - struct dentry * dentry = NULL; - struct dentry * parent; + struct sysfs_dirent *parent_sd, *sd; int error = 0; BUG_ON(!kobj); - if (shadow_parent) - parent = shadow_parent; + if (shadow_parent_sd) + parent_sd = shadow_parent_sd; else if (kobj->parent) - parent = kobj->parent->dentry; + parent_sd = kobj->parent->sd; else if (sysfs_mount && sysfs_mount->mnt_sb) - parent = sysfs_mount->mnt_sb->s_root; + parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; else return -EFAULT; - error = create_dir(kobj,parent,kobject_name(kobj),&dentry); + error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); if (!error) - kobj->dentry = dentry; + kobj->sd = sd; return error; } -/* attaches attribute's sysfs_dirent to the dentry corresponding to the - * attribute file - */ -static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) +static int sysfs_count_nlink(struct sysfs_dirent *sd) { - struct attribute * attr = NULL; - struct bin_attribute * bin_attr = NULL; - int (* init) (struct inode *) = NULL; - int error = 0; + struct sysfs_dirent *child; + int nr = 0; - if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) { - bin_attr = sd->s_element; - attr = &bin_attr->attr; - } else { - attr = sd->s_element; - init = init_file; - } - - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; - error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); - if (error) { - sysfs_put(sd); - return error; - } - - if (bin_attr) { - dentry->d_inode->i_size = bin_attr->size; - dentry->d_inode->i_fop = &bin_fops; - } - dentry->d_op = &sysfs_dentry_ops; - d_rehash(dentry); - - return 0; -} - -static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) -{ - int err = 0; - - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; - err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); - if (!err) { - dentry->d_op = &sysfs_dentry_ops; - d_rehash(dentry); - } else - sysfs_put(sd); - - return err; + for (child = sd->s_children; child; child = child->s_sibling) + if (sysfs_type(child) == SYSFS_DIR) + nr++; + return nr + 2; } static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, @@ -276,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, { struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct sysfs_dirent * sd; - int err = 0; + struct bin_attribute *bin_attr; + struct inode *inode; + int found = 0; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & SYSFS_NOT_PINNED) { - const unsigned char * name = sysfs_get_name(sd); + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sysfs_type(sd) && + !strcmp(sd->s_name, dentry->d_name.name)) { + found = 1; + break; + } + } + + /* no such entry */ + if (!found) + return NULL; - if (strcmp(name, dentry->d_name.name)) - continue; + /* attach dentry and inode */ + inode = sysfs_get_inode(sd); + if (!inode) + return ERR_PTR(-ENOMEM); - if (sd->s_type & SYSFS_KOBJ_LINK) - err = sysfs_attach_link(sd, dentry); - else - err = sysfs_attach_attr(sd, dentry); + mutex_lock(&sysfs_mutex); + + if (inode->i_state & I_NEW) { + /* initialize inode according to type */ + switch (sysfs_type(sd)) { + case SYSFS_DIR: + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; + inode->i_nlink = sysfs_count_nlink(sd); break; + case SYSFS_KOBJ_ATTR: + inode->i_size = PAGE_SIZE; + inode->i_fop = &sysfs_file_operations; + break; + case SYSFS_KOBJ_BIN_ATTR: + bin_attr = sd->s_elem.bin_attr.bin_attr; + inode->i_size = bin_attr->size; + inode->i_fop = &bin_fops; + break; + case SYSFS_KOBJ_LINK: + inode->i_op = &sysfs_symlink_inode_operations; + break; + default: + BUG(); } } - return ERR_PTR(err); + sysfs_instantiate(dentry, inode); + sysfs_attach_dentry(sd, dentry); + + mutex_unlock(&sysfs_mutex); + + return NULL; } const struct inode_operations sysfs_dir_inode_operations = { @@ -301,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = { .setattr = sysfs_setattr, }; -static void remove_dir(struct dentry * d) +static void remove_dir(struct sysfs_dirent *sd) { - struct dentry * parent = dget(d->d_parent); - struct sysfs_dirent * sd; - - mutex_lock(&parent->d_inode->i_mutex); - d_delete(d); - sd = d->d_fsdata; - list_del_init(&sd->s_sibling); - sysfs_put(sd); - if (d->d_inode) - simple_rmdir(parent->d_inode,d); + struct sysfs_addrm_cxt acxt; - pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); - - mutex_unlock(&parent->d_inode->i_mutex); - dput(parent); + sysfs_addrm_start(&acxt, sd->s_parent); + sysfs_unlink_sibling(sd); + sysfs_remove_one(&acxt, sd); + sysfs_addrm_finish(&acxt); } -void sysfs_remove_subdir(struct dentry * d) +void sysfs_remove_subdir(struct sysfs_dirent *sd) { - remove_dir(d); + remove_dir(sd); } -static void __sysfs_remove_dir(struct dentry *dentry) +static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) { - struct sysfs_dirent * parent_sd; - struct sysfs_dirent * sd, * tmp; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent **pos; - dget(dentry); - if (!dentry) + if (!dir_sd) return; - pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); - mutex_lock(&dentry->d_inode->i_mutex); - parent_sd = dentry->d_fsdata; - list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { - if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) - continue; - list_del_init(&sd->s_sibling); - sysfs_drop_dentry(sd, dentry); - sysfs_put(sd); + pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); + sysfs_addrm_start(&acxt, dir_sd); + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + sysfs_remove_one(&acxt, sd); + } else + pos = &(*pos)->s_sibling; } - mutex_unlock(&dentry->d_inode->i_mutex); + sysfs_addrm_finish(&acxt); - remove_dir(dentry); - /** - * Drop reference from dget() on entrance. - */ - dput(dentry); + remove_dir(dir_sd); } /** @@ -366,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry) void sysfs_remove_dir(struct kobject * kobj) { - __sysfs_remove_dir(kobj->dentry); - kobj->dentry = NULL; + struct sysfs_dirent *sd = kobj->sd; + + spin_lock(&sysfs_assoc_lock); + kobj->sd = NULL; + spin_unlock(&sysfs_assoc_lock); + + __sysfs_remove_dir(sd); } -int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, +int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, const char *new_name) { - int error = 0; - struct dentry * new_dentry; + struct sysfs_dirent *sd = kobj->sd; + struct dentry *new_parent = NULL; + struct dentry *old_dentry = NULL, *new_dentry = NULL; + const char *dup_name = NULL; + int error; - if (!new_parent) - return -EFAULT; + /* get dentries */ + old_dentry = sysfs_get_dentry(sd); + if (IS_ERR(old_dentry)) { + error = PTR_ERR(old_dentry); + goto out_dput; + } + + new_parent = sysfs_get_dentry(new_parent_sd); + if (IS_ERR(new_parent)) { + error = PTR_ERR(new_parent); + goto out_dput; + } - down_write(&sysfs_rename_sem); + /* lock new_parent and get dentry for new name */ mutex_lock(&new_parent->d_inode->i_mutex); new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); - if (!IS_ERR(new_dentry)) { - /* By allowing two different directories with the - * same d_parent we allow this routine to move - * between different shadows of the same directory - */ - if (kobj->dentry->d_parent->d_inode != new_parent->d_inode) - return -EINVAL; - else if (new_dentry->d_parent->d_inode != new_parent->d_inode) - error = -EINVAL; - else if (new_dentry == kobj->dentry) - error = -EINVAL; - else if (!new_dentry->d_inode) { - error = kobject_set_name(kobj, "%s", new_name); - if (!error) { - struct sysfs_dirent *sd, *parent_sd; - - d_add(new_dentry, NULL); - d_move(kobj->dentry, new_dentry); - - sd = kobj->dentry->d_fsdata; - parent_sd = new_parent->d_fsdata; - - list_del_init(&sd->s_sibling); - list_add(&sd->s_sibling, &parent_sd->s_children); - } - else - d_drop(new_dentry); - } else - error = -EEXIST; - dput(new_dentry); + if (IS_ERR(new_dentry)) { + error = PTR_ERR(new_dentry); + goto out_unlock; } - mutex_unlock(&new_parent->d_inode->i_mutex); - up_write(&sysfs_rename_sem); + /* By allowing two different directories with the same + * d_parent we allow this routine to move between different + * shadows of the same directory + */ + error = -EINVAL; + if (old_dentry->d_parent->d_inode != new_parent->d_inode || + new_dentry->d_parent->d_inode != new_parent->d_inode || + old_dentry == new_dentry) + goto out_unlock; + + error = -EEXIST; + if (new_dentry->d_inode) + goto out_unlock; + + /* rename kobject and sysfs_dirent */ + error = -ENOMEM; + new_name = dup_name = kstrdup(new_name, GFP_KERNEL); + if (!new_name) + goto out_drop; + + error = kobject_set_name(kobj, "%s", new_name); + if (error) + goto out_drop; + + dup_name = sd->s_name; + sd->s_name = new_name; + + /* move under the new parent */ + d_add(new_dentry, NULL); + d_move(sd->s_dentry, new_dentry); + + mutex_lock(&sysfs_mutex); + + sysfs_unlink_sibling(sd); + sysfs_get(new_parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = new_parent_sd; + sysfs_link_sibling(sd); + + mutex_unlock(&sysfs_mutex); + + error = 0; + goto out_unlock; + + out_drop: + d_drop(new_dentry); + out_unlock: + mutex_unlock(&new_parent->d_inode->i_mutex); + out_dput: + kfree(dup_name); + dput(new_parent); + dput(old_dentry); + dput(new_dentry); return error; } -int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) +int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) { - struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; - struct sysfs_dirent *new_parent_sd, *sd; + struct sysfs_dirent *sd = kobj->sd; + struct sysfs_dirent *new_parent_sd; + struct dentry *old_parent, *new_parent = NULL; + struct dentry *old_dentry = NULL, *new_dentry = NULL; int error; - old_parent_dentry = kobj->parent ? - kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; - new_parent_dentry = new_parent ? - new_parent->dentry : sysfs_mount->mnt_sb->s_root; + BUG_ON(!sd->s_parent); + new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; + + /* get dentries */ + old_dentry = sysfs_get_dentry(sd); + if (IS_ERR(old_dentry)) { + error = PTR_ERR(old_dentry); + goto out_dput; + } + old_parent = sd->s_parent->s_dentry; + + new_parent = sysfs_get_dentry(new_parent_sd); + if (IS_ERR(new_parent)) { + error = PTR_ERR(new_parent); + goto out_dput; + } - if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) - return 0; /* nothing to move */ + if (old_parent->d_inode == new_parent->d_inode) { + error = 0; + goto out_dput; /* nothing to move */ + } again: - mutex_lock(&old_parent_dentry->d_inode->i_mutex); - if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { - mutex_unlock(&old_parent_dentry->d_inode->i_mutex); + mutex_lock(&old_parent->d_inode->i_mutex); + if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { + mutex_unlock(&old_parent->d_inode->i_mutex); goto again; } - new_parent_sd = new_parent_dentry->d_fsdata; - sd = kobj->dentry->d_fsdata; - - new_dentry = lookup_one_len(kobj->name, new_parent_dentry, - strlen(kobj->name)); + new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); if (IS_ERR(new_dentry)) { error = PTR_ERR(new_dentry); - goto out; + goto out_unlock; } else error = 0; d_add(new_dentry, NULL); - d_move(kobj->dentry, new_dentry); + d_move(sd->s_dentry, new_dentry); dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ - list_del_init(&sd->s_sibling); - list_add(&sd->s_sibling, &new_parent_sd->s_children); + mutex_lock(&sysfs_mutex); -out: - mutex_unlock(&new_parent_dentry->d_inode->i_mutex); - mutex_unlock(&old_parent_dentry->d_inode->i_mutex); + sysfs_unlink_sibling(sd); + sysfs_get(new_parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = new_parent_sd; + sysfs_link_sibling(sd); + mutex_unlock(&sysfs_mutex); + + out_unlock: + mutex_unlock(&new_parent->d_inode->i_mutex); + mutex_unlock(&old_parent->d_inode->i_mutex); + out_dput: + dput(new_parent); + dput(old_dentry); + dput(new_dentry); return error; } @@ -469,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent * sd; - mutex_lock(&dentry->d_inode->i_mutex); - file->private_data = sysfs_new_dirent(parent_sd, NULL); - mutex_unlock(&dentry->d_inode->i_mutex); - - return file->private_data ? 0 : -ENOMEM; + sd = sysfs_new_dirent("_DIR_", 0, 0); + if (sd) { + mutex_lock(&sysfs_mutex); + sd->s_parent = sysfs_get(parent_sd); + sysfs_link_sibling(sd); + mutex_unlock(&sysfs_mutex); + } + file->private_data = sd; + return sd ? 0 : -ENOMEM; } static int sysfs_dir_close(struct inode *inode, struct file *file) { - struct dentry * dentry = file->f_path.dentry; struct sysfs_dirent * cursor = file->private_data; - mutex_lock(&dentry->d_inode->i_mutex); - list_del_init(&cursor->s_sibling); - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_lock(&sysfs_mutex); + sysfs_unlink_sibling(cursor); + mutex_unlock(&sysfs_mutex); release_sysfs_dirent(cursor); @@ -503,54 +1080,65 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; struct sysfs_dirent *cursor = filp->private_data; - struct list_head *p, *q = &cursor->s_sibling; + struct sysfs_dirent **pos; ino_t ino; int i = filp->f_pos; switch (i) { case 0: - ino = dentry->d_inode->i_ino; + ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) break; filp->f_pos++; i++; /* fallthrough */ case 1: - ino = parent_ino(dentry); + if (parent_sd->s_parent) + ino = parent_sd->s_parent->s_ino; + else + ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) break; filp->f_pos++; i++; /* fallthrough */ default: + mutex_lock(&sysfs_mutex); + + pos = &parent_sd->s_children; + while (*pos != cursor) + pos = &(*pos)->s_sibling; + + /* unlink cursor */ + *pos = cursor->s_sibling; + if (filp->f_pos == 2) - list_move(q, &parent_sd->s_children); + pos = &parent_sd->s_children; - for (p=q->next; p!= &parent_sd->s_children; p=p->next) { - struct sysfs_dirent *next; + for ( ; *pos; pos = &(*pos)->s_sibling) { + struct sysfs_dirent *next = *pos; const char * name; int len; - next = list_entry(p, struct sysfs_dirent, - s_sibling); - if (!next->s_element) + if (!sysfs_type(next)) continue; - name = sysfs_get_name(next); + name = next->s_name; len = strlen(name); - if (next->s_dentry) - ino = next->s_dentry->d_inode->i_ino; - else - ino = iunique(sysfs_sb, 2); + ino = next->s_ino; if (filldir(dirent, name, len, filp->f_pos, ino, dt_type(next)) < 0) - return 0; + break; - list_move(q, p); - p = q; filp->f_pos++; } + + /* put cursor back in */ + cursor->s_sibling = *pos; + *pos = cursor; + + mutex_unlock(&sysfs_mutex); } return 0; } @@ -559,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) { struct dentry * dentry = file->f_path.dentry; - mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -567,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { + mutex_lock(&sysfs_mutex); + file->f_pos = offset; if (file->f_pos >= 2) { struct sysfs_dirent *sd = dentry->d_fsdata; struct sysfs_dirent *cursor = file->private_data; - struct list_head *p; + struct sysfs_dirent **pos; loff_t n = file->f_pos - 2; - list_del(&cursor->s_sibling); - p = sd->s_children.next; - while (n && p != &sd->s_children) { - struct sysfs_dirent *next; - next = list_entry(p, struct sysfs_dirent, - s_sibling); - if (next->s_element) + sysfs_unlink_sibling(cursor); + + pos = &sd->s_children; + while (n && *pos) { + struct sysfs_dirent *next = *pos; + if (sysfs_type(next)) n--; - p = p->next; + pos = &(*pos)->s_sibling; } - list_add_tail(&cursor->s_sibling, p); + + cursor->s_sibling = *pos; + *pos = cursor; } + + mutex_unlock(&sysfs_mutex); } - mutex_unlock(&dentry->d_inode->i_mutex); + return offset; } @@ -604,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) int sysfs_make_shadowed_dir(struct kobject *kobj, void * (*follow_link)(struct dentry *, struct nameidata *)) { + struct dentry *dentry; struct inode *inode; struct inode_operations *i_op; - inode = kobj->dentry->d_inode; - if (inode->i_op != &sysfs_dir_inode_operations) + /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */ + dentry = sysfs_get_dentry(kobj->sd); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + inode = dentry->d_inode; + if (inode->i_op != &sysfs_dir_inode_operations) { + dput(dentry); return -EINVAL; + } i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); if (!i_op) @@ -634,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj, * directory. */ -struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) +struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj) { - struct sysfs_dirent *sd; - struct dentry *parent, *dir, *shadow; + struct sysfs_dirent *parent_sd = kobj->sd->s_parent; + struct dentry *dir, *parent, *shadow; struct inode *inode; + struct sysfs_dirent *sd; + struct sysfs_addrm_cxt acxt; - dir = kobj->dentry; - inode = dir->d_inode; + dir = sysfs_get_dentry(kobj->sd); + if (IS_ERR(dir)) { + sd = (void *)dir; + goto out; + } parent = dir->d_parent; - shadow = ERR_PTR(-EINVAL); + + inode = dir->d_inode; + sd = ERR_PTR(-EINVAL); if (!sysfs_is_shadowed_inode(inode)) - goto out; + goto out_dput; shadow = d_alloc(parent, &dir->d_name); if (!shadow) goto nomem; - sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); + sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR); if (!sd) goto nomem; + sd->s_elem.dir.kobj = kobj; + sysfs_addrm_start(&acxt, parent_sd); + + /* add but don't link into children list */ + sysfs_add_one(&acxt, sd); + + /* attach and instantiate dentry */ + sysfs_attach_dentry(sd, shadow); d_instantiate(shadow, igrab(inode)); - inc_nlink(inode); - inc_nlink(parent->d_inode); - shadow->d_op = &sysfs_dentry_ops; + inc_nlink(inode); /* tj: synchronization? */ + + sysfs_addrm_finish(&acxt); dget(shadow); /* Extra count - pin the dentry in core */ -out: - return shadow; -nomem: + goto out_dput; + + nomem: dput(shadow); - shadow = ERR_PTR(-ENOMEM); - goto out; + sd = ERR_PTR(-ENOMEM); + out_dput: + dput(dir); + out: + return sd; } /** * sysfs_remove_shadow_dir - remove an object's directory. - * @shadow: dentry of shadow directory + * @shadow_sd: sysfs_dirent of shadow directory * * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ -void sysfs_remove_shadow_dir(struct dentry *shadow) +void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd) { - __sysfs_remove_dir(shadow); + __sysfs_remove_dir(shadow_sd); } const struct file_operations sysfs_dir_operations = { diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b502c7197ec0..cc497994b2a8 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = { .store = subsys_attr_store, }; -/** - * add_to_collection - add buffer to a collection - * @buffer: buffer to be added - * @node: inode of set to add to - */ - -static inline void -add_to_collection(struct sysfs_buffer *buffer, struct inode *node) -{ - struct sysfs_buffer_collection *set = node->i_private; - - mutex_lock(&node->i_mutex); - list_add(&buffer->associates, &set->associates); - mutex_unlock(&node->i_mutex); -} - -static inline void -remove_from_collection(struct sysfs_buffer *buffer, struct inode *node) -{ - mutex_lock(&node->i_mutex); - list_del(&buffer->associates); - mutex_unlock(&node->i_mutex); -} +struct sysfs_buffer { + size_t count; + loff_t pos; + char * page; + struct sysfs_ops * ops; + struct semaphore sem; + int needs_read_fill; + int event; +}; /** * fill_read_buffer - allocate and fill buffer from object. @@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node) */ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) { - struct sysfs_dirent * sd = dentry->d_fsdata; - struct attribute * attr = to_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_ops * ops = buffer->ops; int ret = 0; ssize_t count; @@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer if (!buffer->page) return -ENOMEM; - buffer->event = atomic_read(&sd->s_event); - count = ops->show(kobj,attr,buffer->page); + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + buffer->event = atomic_read(&attr_sd->s_event); + count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page); + + sysfs_put_active_two(attr_sd); + BUG_ON(count > (ssize_t)PAGE_SIZE); if (count >= 0) { buffer->needs_read_fill = 0; @@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) down(&buffer->sem); if (buffer->needs_read_fill) { - if (buffer->orphaned) - retval = -ENODEV; - else - retval = fill_read_buffer(file->f_path.dentry,buffer); + retval = fill_read_buffer(file->f_path.dentry,buffer); if (retval) goto out; } @@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t * passing the buffer that we acquired in fill_write_buffer(). */ -static int +static int flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) { - struct attribute * attr = to_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_ops * ops = buffer->ops; + int rc; + + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count); - return ops->store(kobj,attr,buffer->page,count); + sysfs_put_active_two(attr_sd); + + return rc; } @@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t ssize_t len; down(&buffer->sem); - if (buffer->orphaned) { - len = -ENODEV; - goto out; - } len = fill_write_buffer(buffer, buf, count); if (len > 0) len = flush_write_buffer(file->f_path.dentry, buffer, len); if (len > 0) *ppos += len; -out: up(&buffer->sem); return len; } static int sysfs_open_file(struct inode *inode, struct file *file) { - struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); - struct attribute * attr = to_attr(file->f_path.dentry); - struct sysfs_buffer_collection *set; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_buffer * buffer; struct sysfs_ops * ops = NULL; - int error = 0; - - if (!kobj || !attr) - goto Einval; + int error; - /* Grab the module reference for this attribute if we have one */ - if (!try_module_get(attr->owner)) { - error = -ENODEV; - goto Done; - } + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. @@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file) else ops = &subsys_sysfs_ops; + error = -EACCES; + /* No sysfs operations, either from having no subsystem, * or the subsystem have no operations. */ if (!ops) - goto Eaccess; - - /* make sure we have a collection to add our buffers to */ - mutex_lock(&inode->i_mutex); - if (!(set = inode->i_private)) { - if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) { - error = -ENOMEM; - goto Done; - } else { - INIT_LIST_HEAD(&set->associates); - } - } - mutex_unlock(&inode->i_mutex); + goto err_out; /* File needs write support. * The inode's perms must say it's ok, * and we must have a store method. */ if (file->f_mode & FMODE_WRITE) { - if (!(inode->i_mode & S_IWUGO) || !ops->store) - goto Eaccess; - + goto err_out; } /* File needs read support. @@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file) */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO) || !ops->show) - goto Eaccess; + goto err_out; } /* No error? Great, allocate a buffer for the file, and store it * it in file->private_data for easy access. */ + error = -ENOMEM; buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); - if (buffer) { - INIT_LIST_HEAD(&buffer->associates); - init_MUTEX(&buffer->sem); - buffer->needs_read_fill = 1; - buffer->ops = ops; - add_to_collection(buffer, inode); - file->private_data = buffer; - } else - error = -ENOMEM; - goto Done; - - Einval: - error = -EINVAL; - goto Done; - Eaccess: - error = -EACCES; - module_put(attr->owner); - Done: - if (error) - kobject_put(kobj); + if (!buffer) + goto err_out; + + init_MUTEX(&buffer->sem); + buffer->needs_read_fill = 1; + buffer->ops = ops; + file->private_data = buffer; + + /* open succeeded, put active references and pin attr_sd */ + sysfs_put_active_two(attr_sd); + sysfs_get(attr_sd); + return 0; + + err_out: + sysfs_put_active_two(attr_sd); return error; } static int sysfs_release(struct inode * inode, struct file * filp) { - struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); - struct attribute * attr = to_attr(filp->f_path.dentry); - struct module * owner = attr->owner; - struct sysfs_buffer * buffer = filp->private_data; + struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; + struct sysfs_buffer *buffer = filp->private_data; - if (buffer) - remove_from_collection(buffer, inode); - kobject_put(kobj); - /* After this point, attr should not be accessed. */ - module_put(owner); + sysfs_put(attr_sd); if (buffer) { if (buffer->page) @@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp) static unsigned int sysfs_poll(struct file *filp, poll_table *wait) { struct sysfs_buffer * buffer = filp->private_data; - struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); - struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; - int res = 0; + struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + + /* need parent for the kobj, grab both */ + if (!sysfs_get_active_two(attr_sd)) + goto trigger; poll_wait(filp, &kobj->poll, wait); - if (buffer->event != atomic_read(&sd->s_event)) { - res = POLLERR|POLLPRI; - buffer->needs_read_fill = 1; - } + sysfs_put_active_two(attr_sd); - return res; -} + if (buffer->event != atomic_read(&attr_sd->s_event)) + goto trigger; + return 0; -static struct dentry *step_down(struct dentry *dir, const char * name) -{ - struct dentry * de; - - if (dir == NULL || dir->d_inode == NULL) - return NULL; - - mutex_lock(&dir->d_inode->i_mutex); - de = lookup_one_len(name, dir, strlen(name)); - mutex_unlock(&dir->d_inode->i_mutex); - dput(dir); - if (IS_ERR(de)) - return NULL; - if (de->d_inode == NULL) { - dput(de); - return NULL; - } - return de; + trigger: + buffer->needs_read_fill = 1; + return POLLERR|POLLPRI; } -void sysfs_notify(struct kobject * k, char *dir, char *attr) +void sysfs_notify(struct kobject *k, char *dir, char *attr) { - struct dentry *de = k->dentry; - if (de) - dget(de); - if (de && dir) - de = step_down(de, dir); - if (de && attr) - de = step_down(de, attr); - if (de) { - struct sysfs_dirent * sd = de->d_fsdata; - if (sd) - atomic_inc(&sd->s_event); + struct sysfs_dirent *sd = k->sd; + + mutex_lock(&sysfs_mutex); + + if (sd && dir) + sd = sysfs_find_dirent(sd, dir); + if (sd && attr) + sd = sysfs_find_dirent(sd, attr); + if (sd) { + atomic_inc(&sd->s_event); wake_up_interruptible(&k->poll); - dput(de); } + + mutex_unlock(&sysfs_mutex); } EXPORT_SYMBOL_GPL(sysfs_notify); @@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = { }; -int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) +int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, + int type) { - struct sysfs_dirent * parent_sd = dir->d_fsdata; umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; - int error = -EEXIST; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; - mutex_lock(&dir->d_inode->i_mutex); - if (!sysfs_dirent_exist(parent_sd, attr->name)) - error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, - mode, type); - mutex_unlock(&dir->d_inode->i_mutex); + sd = sysfs_new_dirent(attr->name, mode, type); + if (!sd) + return -ENOMEM; + sd->s_elem.attr.attr = (void *)attr; - return error; + sysfs_addrm_start(&acxt, dir_sd); + + if (!sysfs_find_dirent(dir_sd, attr->name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + } + + if (sysfs_addrm_finish(&acxt)) + return 0; + + sysfs_put(sd); + return -EEXIST; } @@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) { - BUG_ON(!kobj || !kobj->dentry || !attr); + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); + return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); } @@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { - struct dentry *dir; + struct sysfs_dirent *dir_sd; int error; - dir = lookup_one_len(group, kobj->dentry, strlen(group)); - if (IS_ERR(dir)) - error = PTR_ERR(dir); - else { - error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); - dput(dir); - } + dir_sd = sysfs_get_dirent(kobj->sd, group); + if (!dir_sd) + return -ENOENT; + + error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); + sysfs_put(dir_sd); + return error; } EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); @@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); */ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) { - struct dentry * dir = kobj->dentry; - struct dentry * victim; - int res = -ENOENT; - - mutex_lock(&dir->d_inode->i_mutex); - victim = lookup_one_len(attr->name, dir, strlen(attr->name)); - if (!IS_ERR(victim)) { - /* make sure dentry is really there */ - if (victim->d_inode && - (victim->d_parent->d_inode == dir->d_inode)) { - victim->d_inode->i_mtime = CURRENT_TIME; - fsnotify_modify(victim); - res = 0; - } else - d_drop(victim); - - /** - * Drop the reference acquired from lookup_one_len() above. - */ - dput(victim); + struct sysfs_dirent *victim_sd = NULL; + struct dentry *victim = NULL; + int rc; + + rc = -ENOENT; + victim_sd = sysfs_get_dirent(kobj->sd, attr->name); + if (!victim_sd) + goto out; + + victim = sysfs_get_dentry(victim_sd); + if (IS_ERR(victim)) { + rc = PTR_ERR(victim); + victim = NULL; + goto out; } - mutex_unlock(&dir->d_inode->i_mutex); - return res; + mutex_lock(&victim->d_inode->i_mutex); + victim->d_inode->i_mtime = CURRENT_TIME; + fsnotify_modify(victim); + mutex_unlock(&victim->d_inode->i_mutex); + rc = 0; + out: + dput(victim); + sysfs_put(victim_sd); + return rc; } @@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) */ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) { - struct dentry *dir = kobj->dentry; - struct dentry *victim; + struct sysfs_dirent *victim_sd = NULL; + struct dentry *victim = NULL; struct inode * inode; struct iattr newattrs; - int res = -ENOENT; - - mutex_lock(&dir->d_inode->i_mutex); - victim = lookup_one_len(attr->name, dir, strlen(attr->name)); - if (!IS_ERR(victim)) { - if (victim->d_inode && - (victim->d_parent->d_inode == dir->d_inode)) { - inode = victim->d_inode; - mutex_lock(&inode->i_mutex); - newattrs.ia_mode = (mode & S_IALLUGO) | - (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - res = notify_change(victim, &newattrs); - mutex_unlock(&inode->i_mutex); - } - dput(victim); + int rc; + + rc = -ENOENT; + victim_sd = sysfs_get_dirent(kobj->sd, attr->name); + if (!victim_sd) + goto out; + + victim = sysfs_get_dentry(victim_sd); + if (IS_ERR(victim)) { + rc = PTR_ERR(victim); + victim = NULL; + goto out; } - mutex_unlock(&dir->d_inode->i_mutex); - return res; + inode = victim->d_inode; + mutex_lock(&inode->i_mutex); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + rc = notify_change(victim, &newattrs); + mutex_unlock(&inode->i_mutex); + out: + dput(victim); + sysfs_put(victim_sd); + return rc; } EXPORT_SYMBOL_GPL(sysfs_chmod_file); @@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) { - sysfs_hash_and_remove(kobj->dentry, attr->name); + sysfs_hash_and_remove(kobj->sd, attr->name); } @@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group) { - struct dentry *dir; + struct sysfs_dirent *dir_sd; - dir = lookup_one_len(group, kobj->dentry, strlen(group)); - if (!IS_ERR(dir)) { - sysfs_hash_and_remove(dir, attr->name); - dput(dir); + dir_sd = sysfs_get_dirent(kobj->sd, group); + if (dir_sd) { + sysfs_hash_and_remove(dir_sd, attr->name); + sysfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 52eed2a7a5ef..f318b73c790c 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -18,26 +18,25 @@ #include "sysfs.h" -static void remove_files(struct dentry * dir, - const struct attribute_group * grp) +static void remove_files(struct sysfs_dirent *dir_sd, + const struct attribute_group *grp) { struct attribute *const* attr; for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir,(*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name); } -static int create_files(struct dentry * dir, - const struct attribute_group * grp) +static int create_files(struct sysfs_dirent *dir_sd, + const struct attribute_group *grp) { struct attribute *const* attr; int error = 0; - for (attr = grp->attrs; *attr && !error; attr++) { - error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); - } + for (attr = grp->attrs; *attr && !error; attr++) + error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) - remove_files(dir,grp); + remove_files(dir_sd, grp); return error; } @@ -45,44 +44,44 @@ static int create_files(struct dentry * dir, int sysfs_create_group(struct kobject * kobj, const struct attribute_group * grp) { - struct dentry * dir; + struct sysfs_dirent *sd; int error; - BUG_ON(!kobj || !kobj->dentry); + BUG_ON(!kobj || !kobj->sd); if (grp->name) { - error = sysfs_create_subdir(kobj,grp->name,&dir); + error = sysfs_create_subdir(kobj, grp->name, &sd); if (error) return error; } else - dir = kobj->dentry; - dir = dget(dir); - if ((error = create_files(dir,grp))) { + sd = kobj->sd; + sysfs_get(sd); + error = create_files(sd, grp); + if (error) { if (grp->name) - sysfs_remove_subdir(dir); + sysfs_remove_subdir(sd); } - dput(dir); + sysfs_put(sd); return error; } void sysfs_remove_group(struct kobject * kobj, const struct attribute_group * grp) { - struct dentry * dir; + struct sysfs_dirent *dir_sd = kobj->sd; + struct sysfs_dirent *sd; if (grp->name) { - dir = lookup_one_len_kern(grp->name, kobj->dentry, - strlen(grp->name)); - BUG_ON(IS_ERR(dir)); - } - else - dir = dget(kobj->dentry); + sd = sysfs_get_dirent(dir_sd, grp->name); + BUG_ON(!sd); + } else + sd = sysfs_get(dir_sd); - remove_files(dir,grp); + remove_files(sd, grp); if (grp->name) - sysfs_remove_subdir(dir); - /* release the ref. taken in this routine */ - dput(dir); + sysfs_remove_subdir(sd); + + sysfs_put(sd); } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index bdd30e74de6b..3756e152285a 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -133,170 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) */ static struct lock_class_key sysfs_inode_imutex_key; -struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) +void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { - struct inode * inode = new_inode(sysfs_sb); - if (inode) { - inode->i_blocks = 0; - inode->i_mapping->a_ops = &sysfs_aops; - inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; - inode->i_op = &sysfs_inode_operations; - lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); - - if (sd->s_iattr) { - /* sysfs_dirent has non-default attributes - * get them for the new inode from persistent copy - * in sysfs_dirent - */ - set_inode_attr(inode, sd->s_iattr); - } else - set_default_inode_attr(inode, mode); - } - return inode; -} - -int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) -{ - int error = 0; - struct inode * inode = NULL; - if (dentry) { - if (!dentry->d_inode) { - struct sysfs_dirent * sd = dentry->d_fsdata; - if ((inode = sysfs_new_inode(mode, sd))) { - if (dentry->d_parent && dentry->d_parent->d_inode) { - struct inode *p_inode = dentry->d_parent->d_inode; - p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; - } - goto Proceed; - } - else - error = -ENOMEM; - } else - error = -EEXIST; - } else - error = -ENOENT; - goto Done; - - Proceed: - if (init) - error = init(inode); - if (!error) { - d_instantiate(dentry, inode); - if (S_ISDIR(mode)) - dget(dentry); /* pin only directory dentry in core */ + inode->i_blocks = 0; + inode->i_mapping->a_ops = &sysfs_aops; + inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; + inode->i_op = &sysfs_inode_operations; + inode->i_ino = sd->s_ino; + lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); + + if (sd->s_iattr) { + /* sysfs_dirent has non-default attributes + * get them for the new inode from persistent copy + * in sysfs_dirent + */ + set_inode_attr(inode, sd->s_iattr); } else - iput(inode); - Done: - return error; + set_default_inode_attr(inode, sd->s_mode); } -/* - * Get the name for corresponding element represented by the given sysfs_dirent +/** + * sysfs_get_inode - get inode for sysfs_dirent + * @sd: sysfs_dirent to allocate inode for + * + * Get inode for @sd. If such inode doesn't exist, a new inode + * is allocated and basics are initialized. New inode is + * returned locked. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * Pointer to allocated inode on success, NULL on failure. */ -const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) +struct inode * sysfs_get_inode(struct sysfs_dirent *sd) { - struct attribute * attr; - struct bin_attribute * bin_attr; - struct sysfs_symlink * sl; - - BUG_ON(!sd || !sd->s_element); - - switch (sd->s_type) { - case SYSFS_DIR: - /* Always have a dentry so use that */ - return sd->s_dentry->d_name.name; - - case SYSFS_KOBJ_ATTR: - attr = sd->s_element; - return attr->name; + struct inode *inode; - case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_element; - return bin_attr->attr.name; + inode = iget_locked(sysfs_sb, sd->s_ino); + if (inode && (inode->i_state & I_NEW)) + sysfs_init_inode(sd, inode); - case SYSFS_KOBJ_LINK: - sl = sd->s_element; - return sl->link_name; - } - return NULL; -} - -static inline void orphan_all_buffers(struct inode *node) -{ - struct sysfs_buffer_collection *set; - struct sysfs_buffer *buf; - - mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); - set = node->i_private; - if (set) { - list_for_each_entry(buf, &set->associates, associates) { - down(&buf->sem); - buf->orphaned = 1; - up(&buf->sem); - } - } - mutex_unlock(&node->i_mutex); + return inode; } - -/* - * Unhashes the dentry corresponding to given sysfs_dirent - * Called with parent inode's i_mutex held. +/** + * sysfs_instantiate - instantiate dentry + * @dentry: dentry to be instantiated + * @inode: inode associated with @sd + * + * Unlock @inode if locked and instantiate @dentry with @inode. + * + * LOCKING: + * None. */ -void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) +void sysfs_instantiate(struct dentry *dentry, struct inode *inode) { - struct dentry * dentry = sd->s_dentry; - struct inode *inode; + BUG_ON(!dentry || dentry->d_inode); - if (dentry) { - spin_lock(&dcache_lock); - spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry) && dentry->d_inode)) { - inode = dentry->d_inode; - spin_lock(&inode->i_lock); - __iget(inode); - spin_unlock(&inode->i_lock); - dget_locked(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - simple_unlink(parent->d_inode, dentry); - orphan_all_buffers(inode); - iput(inode); - } else { - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - } - } + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + d_instantiate(dentry, inode); } -int sysfs_hash_and_remove(struct dentry * dir, const char * name) +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) { - struct sysfs_dirent * sd; - struct sysfs_dirent * parent_sd; - int found = 0; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent **pos, *sd; - if (!dir) + if (!dir_sd) return -ENOENT; - if (dir->d_inode == NULL) - /* no inode means this hasn't been made visible yet */ - return -ENOENT; + sysfs_addrm_start(&acxt, dir_sd); + + for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) { + sd = *pos; - parent_sd = dir->d_fsdata; - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (!sd->s_element) + if (!sysfs_type(sd)) continue; - if (!strcmp(sysfs_get_name(sd), name)) { - list_del_init(&sd->s_sibling); - sysfs_drop_dentry(sd, dir); - sysfs_put(sd); - found = 1; + if (!strcmp(sd->s_name, name)) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + sysfs_remove_one(&acxt, sd); break; } } - mutex_unlock(&dir->d_inode->i_mutex); - return found ? 0 : -ENOENT; + if (sysfs_addrm_finish(&acxt)) + return 0; + return -ENOENT; } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 23a48a38e6af..402cc356203c 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -19,27 +19,18 @@ struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; struct kmem_cache *sysfs_dir_cachep; -static void sysfs_clear_inode(struct inode *inode); - static const struct super_operations sysfs_ops = { .statfs = simple_statfs, .drop_inode = sysfs_delete_inode, - .clear_inode = sysfs_clear_inode, }; -static struct sysfs_dirent sysfs_root = { - .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), - .s_children = LIST_HEAD_INIT(sysfs_root.s_children), - .s_element = NULL, - .s_type = SYSFS_ROOT, - .s_iattr = NULL, +struct sysfs_dirent sysfs_root = { + .s_count = ATOMIC_INIT(1), + .s_flags = SYSFS_ROOT, + .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + .s_ino = 1, }; -static void sysfs_clear_inode(struct inode *inode) -{ - kfree(inode->i_private); -} - static int sysfs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; @@ -52,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sysfs_sb = sb; - inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, - &sysfs_root); - if (inode) { - inode->i_op = &sysfs_dir_inode_operations; - inode->i_fop = &sysfs_dir_operations; - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - } else { + inode = new_inode(sysfs_sb); + if (!inode) { pr_debug("sysfs: could not get root inode\n"); return -ENOMEM; } + sysfs_init_inode(&sysfs_root, inode); + + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + root = d_alloc_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n",__FUNCTION__); iput(inode); return -ENOMEM; } + sysfs_root.s_dentry = root; root->d_fsdata = &sysfs_root; sb->s_root = root; return 0; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 7b9c5bfde920..2f86e0422290 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -11,71 +11,39 @@ #include "sysfs.h" -static int object_depth(struct kobject * kobj) +static int object_depth(struct sysfs_dirent *sd) { - struct kobject * p = kobj; int depth = 0; - do { depth++; } while ((p = p->parent)); + + for (; sd->s_parent; sd = sd->s_parent) + depth++; + return depth; } -static int object_path_length(struct kobject * kobj) +static int object_path_length(struct sysfs_dirent * sd) { - struct kobject * p = kobj; int length = 1; - do { - length += strlen(kobject_name(p)) + 1; - p = p->parent; - } while (p); + + for (; sd->s_parent; sd = sd->s_parent) + length += strlen(sd->s_name) + 1; + return length; } -static void fill_object_path(struct kobject * kobj, char * buffer, int length) +static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) { - struct kobject * p; - --length; - for (p = kobj; p; p = p->parent) { - int cur = strlen(kobject_name(p)); + for (; sd->s_parent; sd = sd->s_parent) { + int cur = strlen(sd->s_name); /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,kobject_name(p),cur); + strncpy(buffer + length, sd->s_name, cur); *(buffer + --length) = '/'; } } -static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target) -{ - struct sysfs_dirent * parent_sd = parent->d_fsdata; - struct sysfs_symlink * sl; - int error = 0; - - error = -ENOMEM; - sl = kmalloc(sizeof(*sl), GFP_KERNEL); - if (!sl) - goto exit1; - - sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL); - if (!sl->link_name) - goto exit2; - - strcpy(sl->link_name, name); - sl->target_kobj = kobject_get(target); - - error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO, - SYSFS_KOBJ_LINK); - if (!error) - return 0; - - kobject_put(target); - kfree(sl->link_name); -exit2: - kfree(sl); -exit1: - return error; -} - /** * sysfs_create_link - create symlink between two objects. * @kobj: object whose directory we're creating the link in. @@ -84,24 +52,57 @@ exit1: */ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) { - struct dentry *dentry = NULL; - int error = -EEXIST; + struct sysfs_dirent *parent_sd = NULL; + struct sysfs_dirent *target_sd = NULL; + struct sysfs_dirent *sd = NULL; + struct sysfs_addrm_cxt acxt; + int error; BUG_ON(!name); if (!kobj) { if (sysfs_mount && sysfs_mount->mnt_sb) - dentry = sysfs_mount->mnt_sb->s_root; + parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; } else - dentry = kobj->dentry; + parent_sd = kobj->sd; + + error = -EFAULT; + if (!parent_sd) + goto out_put; + + /* target->sd can go away beneath us but is protected with + * sysfs_assoc_lock. Fetch target_sd from it. + */ + spin_lock(&sysfs_assoc_lock); + if (target->sd) + target_sd = sysfs_get(target->sd); + spin_unlock(&sysfs_assoc_lock); + + error = -ENOENT; + if (!target_sd) + goto out_put; + + error = -ENOMEM; + sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); + if (!sd) + goto out_put; + sd->s_elem.symlink.target_sd = target_sd; - if (!dentry) - return -EFAULT; + sysfs_addrm_start(&acxt, parent_sd); - mutex_lock(&dentry->d_inode->i_mutex); - if (!sysfs_dirent_exist(dentry->d_fsdata, name)) - error = sysfs_add_link(dentry, name, target); - mutex_unlock(&dentry->d_inode->i_mutex); + if (!sysfs_find_dirent(parent_sd, name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + } + + if (sysfs_addrm_finish(&acxt)) + return 0; + + error = -EEXIST; + /* fall through */ + out_put: + sysfs_put(target_sd); + sysfs_put(sd); return error; } @@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char void sysfs_remove_link(struct kobject * kobj, const char * name) { - sysfs_hash_and_remove(kobj->dentry,name); + sysfs_hash_and_remove(kobj->sd, name); } -static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, - char *path) +static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, + struct sysfs_dirent * target_sd, char *path) { char * s; int depth, size; - depth = object_depth(kobj); - size = object_path_length(target) + depth * 3 - 1; + depth = object_depth(parent_sd); + size = object_path_length(target_sd) + depth * 3 - 1; if (size > PATH_MAX) return -ENAMETOOLONG; @@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, for (s = path; depth--; s += 3) strcpy(s,"../"); - fill_object_path(target, path, size); + fill_object_path(target_sd, path, size); pr_debug("%s: path = '%s'\n", __FUNCTION__, path); return 0; @@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, static int sysfs_getlink(struct dentry *dentry, char * path) { - struct kobject *kobj, *target_kobj; - int error = 0; + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *parent_sd = sd->s_parent; + struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd; + int error; - kobj = sysfs_get_kobject(dentry->d_parent); - if (!kobj) - return -EINVAL; + mutex_lock(&sysfs_mutex); + error = sysfs_get_target_path(parent_sd, target_sd, path); + mutex_unlock(&sysfs_mutex); - target_kobj = sysfs_get_kobject(dentry); - if (!target_kobj) { - kobject_put(kobj); - return -EINVAL; - } - - down_read(&sysfs_rename_sem); - error = sysfs_get_target_path(kobj, target_kobj, path); - up_read(&sysfs_rename_sem); - - kobject_put(kobj); - kobject_put(target_kobj); return error; - } static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a77c57e5a6d5..6a37f2386a8d 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -1,38 +1,101 @@ +struct sysfs_elem_dir { + struct kobject * kobj; +}; + +struct sysfs_elem_symlink { + struct sysfs_dirent * target_sd; +}; + +struct sysfs_elem_attr { + struct attribute * attr; +}; + +struct sysfs_elem_bin_attr { + struct bin_attribute * bin_attr; +}; + +/* + * As long as s_count reference is held, the sysfs_dirent itself is + * accessible. Dereferencing s_elem or any other outer entity + * requires s_active reference. + */ struct sysfs_dirent { atomic_t s_count; - struct list_head s_sibling; - struct list_head s_children; - void * s_element; - int s_type; + atomic_t s_active; + struct sysfs_dirent * s_parent; + struct sysfs_dirent * s_sibling; + struct sysfs_dirent * s_children; + const char * s_name; + + union { + struct sysfs_elem_dir dir; + struct sysfs_elem_symlink symlink; + struct sysfs_elem_attr attr; + struct sysfs_elem_bin_attr bin_attr; + } s_elem; + + unsigned int s_flags; umode_t s_mode; + ino_t s_ino; struct dentry * s_dentry; struct iattr * s_iattr; atomic_t s_event; }; +#define SD_DEACTIVATED_BIAS INT_MIN + +struct sysfs_addrm_cxt { + struct sysfs_dirent *parent_sd; + struct inode *parent_inode; + struct sysfs_dirent *removed; + int cnt; +}; + extern struct vfsmount * sysfs_mount; +extern struct sysfs_dirent sysfs_root; extern struct kmem_cache *sysfs_dir_cachep; -extern void sysfs_delete_inode(struct inode *inode); -extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); -extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); +extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); +extern void sysfs_link_sibling(struct sysfs_dirent *sd); +extern void sysfs_unlink_sibling(struct sysfs_dirent *sd); +extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); +extern void sysfs_put_active(struct sysfs_dirent *sd); +extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); +extern void sysfs_put_active_two(struct sysfs_dirent *sd); +extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd); +extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd); +extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd); +extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); -extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); -extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, - umode_t, int); - -extern int sysfs_add_file(struct dentry *, const struct attribute *, int); -extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); +extern void sysfs_delete_inode(struct inode *inode); +extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode); +extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); +extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); + +extern void release_sysfs_dirent(struct sysfs_dirent * sd); +extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name); +extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name); +extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, + int type); + +extern int sysfs_add_file(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type); +extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); -extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); -extern void sysfs_remove_subdir(struct dentry *); +extern int sysfs_create_subdir(struct kobject *kobj, const char *name, + struct sysfs_dirent **p_sd); +extern void sysfs_remove_subdir(struct sysfs_dirent *sd); -extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); -extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); -extern struct rw_semaphore sysfs_rename_sem; +extern spinlock_t sysfs_assoc_lock; +extern struct mutex sysfs_mutex; extern struct super_block * sysfs_sb; extern const struct file_operations sysfs_dir_operations; extern const struct file_operations sysfs_file_operations; @@ -40,73 +103,9 @@ extern const struct file_operations bin_fops; extern const struct inode_operations sysfs_dir_inode_operations; extern const struct inode_operations sysfs_symlink_inode_operations; -struct sysfs_symlink { - char * link_name; - struct kobject * target_kobj; -}; - -struct sysfs_buffer { - struct list_head associates; - size_t count; - loff_t pos; - char * page; - struct sysfs_ops * ops; - struct semaphore sem; - int orphaned; - int needs_read_fill; - int event; -}; - -struct sysfs_buffer_collection { - struct list_head associates; -}; - -static inline struct kobject * to_kobj(struct dentry * dentry) -{ - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct kobject *) sd->s_element); -} - -static inline struct attribute * to_attr(struct dentry * dentry) +static inline unsigned int sysfs_type(struct sysfs_dirent *sd) { - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct attribute *) sd->s_element); -} - -static inline struct bin_attribute * to_bin_attr(struct dentry * dentry) -{ - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct bin_attribute *) sd->s_element); -} - -static inline struct kobject *sysfs_get_kobject(struct dentry *dentry) -{ - struct kobject * kobj = NULL; - - spin_lock(&dcache_lock); - if (!d_unhashed(dentry)) { - struct sysfs_dirent * sd = dentry->d_fsdata; - if (sd->s_type & SYSFS_KOBJ_LINK) { - struct sysfs_symlink * sl = sd->s_element; - kobj = kobject_get(sl->target_kobj); - } else - kobj = kobject_get(sd->s_element); - } - spin_unlock(&dcache_lock); - - return kobj; -} - -static inline void release_sysfs_dirent(struct sysfs_dirent * sd) -{ - if (sd->s_type & SYSFS_KOBJ_LINK) { - struct sysfs_symlink * sl = sd->s_element; - kfree(sl->link_name); - kobject_put(sl->target_kobj); - kfree(sl); - } - kfree(sd->s_iattr); - kmem_cache_free(sysfs_dir_cachep, sd); + return sd->s_flags & SYSFS_TYPE_MASK; } static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) @@ -120,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) static inline void sysfs_put(struct sysfs_dirent * sd) { - if (atomic_dec_and_test(&sd->s_count)) + if (sd && atomic_dec_and_test(&sd->s_count)) release_sysfs_dirent(sd); } diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 0732ddb9020b..589be21d884e 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = sysv_sync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations sysv_file_inode_operations = { diff --git a/fs/udf/file.c b/fs/udf/file.c index 51b5764685e7..df070bee8d4f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = { .aio_write = udf_file_aio_write, .release = udf_release_file, .fsync = udf_fsync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; const struct inode_operations udf_file_inode_operations = { diff --git a/fs/udf/inode.c b/fs/udf/inode.c index c8461551e108..bf7de0bdbab3 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -100,14 +100,23 @@ no_delete: clear_inode(inode); } +/* + * If we are going to release inode from memory, we discard preallocation and + * truncate last inode extent to proper length. We could use drop_inode() but + * it's called under inode_lock and thus we cannot mark inode dirty there. We + * use clear_inode() but we have to make sure to write inode as it's not written + * automatically. + */ void udf_clear_inode(struct inode *inode) { if (!(inode->i_sb->s_flags & MS_RDONLY)) { lock_kernel(); + /* Discard preallocation for directories, symlinks, etc. */ udf_discard_prealloc(inode); + udf_truncate_tail_extent(inode); unlock_kernel(); + write_inode_now(inode, 1); } - kfree(UDF_I_DATA(inode)); UDF_I_DATA(inode) = NULL; } @@ -460,8 +469,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, kernel_long_ad laarr[EXTENT_MERGE_SIZE]; struct extent_position prev_epos, cur_epos, next_epos; int count = 0, startnum = 0, endnum = 0; - uint32_t elen = 0; - kernel_lb_addr eloc; + uint32_t elen = 0, tmpelen; + kernel_lb_addr eloc, tmpeloc; int c = 1; loff_t lbcount = 0, b_off = 0; uint32_t newblocknum, newblock; @@ -520,8 +529,12 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, b_off -= lbcount; offset = b_off >> inode->i_sb->s_blocksize_bits; - /* Move into indirect extent if we are at a pointer to it */ - udf_next_aext(inode, &prev_epos, &eloc, &elen, 0); + /* + * Move prev_epos and cur_epos into indirect extent if we are at + * the pointer to it + */ + udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0); + udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0); /* if the extent is allocated and recorded, return the block if the extent is not a multiple of the blocksize, round up */ diff --git a/fs/udf/super.c b/fs/udf/super.c index 3a743d854c17..6658afb41cc7 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1351,7 +1351,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) for (i=0; i<UDF_SB_NUMPARTS(sb); i++) { - switch UDF_SB_PARTTYPE(sb, i) + switch (UDF_SB_PARTTYPE(sb, i)) { case UDF_VIRTUAL_MAP15: case UDF_VIRTUAL_MAP20: diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 77975ae291a5..60d277644248 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -61,7 +61,11 @@ static void extent_trunc(struct inode * inode, struct extent_position *epos, } } -void udf_discard_prealloc(struct inode * inode) +/* + * Truncate the last extent to match i_size. This function assumes + * that preallocation extent is already truncated. + */ +void udf_truncate_tail_extent(struct inode *inode) { struct extent_position epos = { NULL, 0, {0, 0}}; kernel_lb_addr eloc; @@ -71,7 +75,10 @@ void udf_discard_prealloc(struct inode * inode) int adsize; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == UDF_I_LENEXTENTS(inode)) + inode->i_size == UDF_I_LENEXTENTS(inode)) + return; + /* Are we going to delete the file anyway? */ + if (inode->i_nlink == 0) return; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) @@ -79,36 +86,76 @@ void udf_discard_prealloc(struct inode * inode) else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else - adsize = 0; - - epos.block = UDF_I_LOCATION(inode); + BUG(); /* Find the last extent in the file */ while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { etype = netype; lbcount += elen; - if (lbcount > inode->i_size && lbcount - elen < inode->i_size) - { - WARN_ON(lbcount - inode->i_size >= inode->i_sb->s_blocksize); + if (lbcount > inode->i_size) { + if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) + printk(KERN_WARNING + "udf_truncate_tail_extent(): Too long " + "extent after EOF in inode %u: i_size: " + "%Ld lbcount: %Ld extent %u+%u\n", + (unsigned)inode->i_ino, + (long long)inode->i_size, + (long long)lbcount, + (unsigned)eloc.logicalBlockNum, + (unsigned)elen); nelen = elen - (lbcount - inode->i_size); epos.offset -= adsize; extent_trunc(inode, &epos, eloc, etype, elen, nelen); epos.offset += adsize; - lbcount = inode->i_size; + if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) + printk(KERN_ERR "udf_truncate_tail_extent(): " + "Extent after EOF in inode %u.\n", + (unsigned)inode->i_ino); + break; } } + /* This inode entry is in-memory only and thus we don't have to mark + * the inode dirty */ + UDF_I_LENEXTENTS(inode) = inode->i_size; + brelse(epos.bh); +} + +void udf_discard_prealloc(struct inode *inode) +{ + struct extent_position epos = { NULL, 0, {0, 0}}; + kernel_lb_addr eloc; + uint32_t elen; + uint64_t lbcount = 0; + int8_t etype = -1, netype; + int adsize; + + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || + inode->i_size == UDF_I_LENEXTENTS(inode)) + return; + + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + adsize = sizeof(short_ad); + else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + adsize = sizeof(long_ad); + else + adsize = 0; + + epos.block = UDF_I_LOCATION(inode); + + /* Find the last extent in the file */ + while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + etype = netype; + lbcount += elen; + } if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { epos.offset -= adsize; lbcount -= elen; extent_trunc(inode, &epos, eloc, etype, elen, 0); - if (!epos.bh) - { + if (!epos.bh) { UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode); mark_inode_dirty(inode); - } - else - { + } else { struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) @@ -118,9 +165,9 @@ void udf_discard_prealloc(struct inode * inode) mark_buffer_dirty_inode(epos.bh, inode); } } + /* This inode entry is in-memory only and thus we don't have to mark + * the inode dirty */ UDF_I_LENEXTENTS(inode) = lbcount; - - WARN_ON(lbcount != inode->i_size); brelse(epos.bh); } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 67ded289497c..f581f2f69c0f 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -146,6 +146,7 @@ extern void udf_free_inode(struct inode *); extern struct inode * udf_new_inode (struct inode *, int, int *); /* truncate.c */ +extern void udf_truncate_tail_extent(struct inode *); extern void udf_discard_prealloc(struct inode *); extern void udf_truncate_extents(struct inode *); diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 1e096323bad4..6705d74c6d2d 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .fsync = ufs_sync_file, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; diff --git a/fs/utimes.c b/fs/utimes.c index 480f7c8c29da..b3c88952465f 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (IS_IMMUTABLE(inode)) goto dput_and_out; - if (current->fsuid != inode->i_uid && - (error = vfs_permission(&nd, MAY_WRITE)) != 0) - goto dput_and_out; + if (current->fsuid != inode->i_uid) { + if (f) { + if (!(f->f_mode & FMODE_WRITE)) + goto dput_and_out; + } else { + error = vfs_permission(&nd, MAY_WRITE); + if (error) + goto dput_and_out; + } + } } mutex_lock(&inode->i_mutex); error = notify_change(dentry, &newattrs); diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index b49989bb89ad..e7a9a83f0087 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \ xfs_dir2_sf.o \ xfs_error.o \ xfs_extfree_item.o \ + xfs_filestream.o \ xfs_fsops.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ @@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \ xfs_log.o \ xfs_log_recover.o \ xfs_mount.o \ + xfs_mru_cache.o \ xfs_rename.o \ xfs_trans.o \ xfs_trans_ail.o \ diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 9ebabdf7829c..4b6470cf87f0 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -/* - * Low memory cache shrinkers - */ - -typedef struct shrinker *kmem_shaker_t; -typedef int (*kmem_shake_func_t)(int, gfp_t); - -static inline kmem_shaker_t -kmem_shake_register(kmem_shake_func_t sfunc) -{ - return set_shrinker(DEFAULT_SEEKS, sfunc); -} - -static inline void -kmem_shake_deregister(kmem_shaker_t shrinker) -{ - remove_shrinker(shrinker); -} - static inline int kmem_shake_allow(gfp_t gfp_mask) { diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7361861e3aac..fd4105d662e0 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -108,14 +108,19 @@ xfs_page_trace( /* * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. */ STATIC void xfs_finish_ioend( - xfs_ioend_t *ioend) + xfs_ioend_t *ioend, + int wait) { - if (atomic_dec_and_test(&ioend->io_remaining)) + if (atomic_dec_and_test(&ioend->io_remaining)) { queue_work(xfsdatad_workqueue, &ioend->io_work); + if (wait) + flush_workqueue(xfsdatad_workqueue); + } } /* @@ -156,6 +161,8 @@ xfs_setfilesize( xfs_fsize_t bsize; ip = xfs_vtoi(ioend->io_vnode); + if (!ip) + return; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(ioend->io_type != IOMAP_READ); @@ -334,7 +341,7 @@ xfs_end_bio( bio->bi_end_io = NULL; bio_put(bio); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); return 0; } @@ -470,7 +477,7 @@ xfs_submit_ioend( } if (bio) xfs_submit_ioend_bio(ioend, bio); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } while ((ioend = next) != NULL); } @@ -1003,6 +1010,8 @@ xfs_page_state_convert( if (buffer_unwritten(bh) || buffer_delay(bh) || ((buffer_uptodate(bh) || PageUptodate(page)) && !buffer_mapped(bh) && (unmapped || startio))) { + int new_ioend = 0; + /* * Make sure we don't use a read-only iomap */ @@ -1021,6 +1030,15 @@ xfs_page_state_convert( } if (!iomap_valid) { + /* + * if we didn't have a valid mapping then we + * need to ensure that we put the new mapping + * in a new ioend structure. This needs to be + * done to ensure that the ioends correctly + * reflect the block mappings at io completion + * for unwritten extent conversion. + */ + new_ioend = 1; if (type == IOMAP_NEW) { size = xfs_probe_cluster(inode, page, bh, head, 0); @@ -1040,7 +1058,7 @@ xfs_page_state_convert( if (startio) { xfs_add_to_ioend(inode, bh, offset, type, &ioend, - !iomap_valid); + new_ioend); } else { set_buffer_dirty(bh); unlock_buffer(bh); @@ -1416,6 +1434,13 @@ xfs_end_io_direct( * This is not necessary for synchronous direct I/O, but we do * it anyway to keep the code uniform and simpler. * + * Well, if only it were that simple. Because synchronous direct I/O + * requires extent conversion to occur *before* we return to userspace, + * we have to wait for extent conversion to complete. Look at the + * iocb that has been passed to us to determine if this is AIO or + * not. If it is synchronous, tell xfs_finish_ioend() to kick the + * workqueue and wait for it to complete. + * * The core direct I/O code might be changed to always call the * completion handler in the future, in which case all this can * go away. @@ -1423,9 +1448,9 @@ xfs_end_io_direct( ioend->io_offset = offset; ioend->io_size = size; if (ioend->io_type == IOMAP_READ) { - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } else if (private && size > 0) { - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); } else { /* * A direct I/O write ioend starts it's life in unwritten @@ -1434,7 +1459,7 @@ xfs_end_io_direct( * handler. */ INIT_WORK(&ioend->io_work, xfs_end_bio_written); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } /* diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index fe4f66a5af14..2df63622354e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -35,7 +35,7 @@ #include <linux/freezer.h> static kmem_zone_t *xfs_buf_zone; -static kmem_shaker_t xfs_buf_shake; +static struct shrinker *xfs_buf_shake; STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); @@ -314,7 +314,7 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_hash_list)); - if (bp->b_flags & _XBF_PAGE_CACHE) { + if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) @@ -323,18 +323,11 @@ xfs_buf_free( for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; - ASSERT(!PagePrivate(page)); + if (bp->b_flags & _XBF_PAGE_CACHE) + ASSERT(!PagePrivate(page)); page_cache_release(page); } _xfs_buf_free_pages(bp); - } else if (bp->b_flags & _XBF_KMEM_ALLOC) { - /* - * XXX(hch): bp->b_count_desired might be incorrect (see - * xfs_buf_associate_memory for details), but fortunately - * the Linux version of kmem_free ignores the len argument.. - */ - kmem_free(bp->b_addr, bp->b_count_desired); - _xfs_buf_free_pages(bp); } xfs_buf_deallocate(bp); @@ -764,43 +757,44 @@ xfs_buf_get_noaddr( size_t len, xfs_buftarg_t *target) { - size_t malloc_len = len; + unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; + int error, i; xfs_buf_t *bp; - void *data; - int error; bp = xfs_buf_allocate(0); if (unlikely(bp == NULL)) goto fail; _xfs_buf_initialize(bp, target, 0, len, 0); - try_again: - data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); - if (unlikely(data == NULL)) + error = _xfs_buf_get_pages(bp, page_count, 0); + if (error) goto fail_free_buf; - /* check whether alignment matches.. */ - if ((__psunsigned_t)data != - ((__psunsigned_t)data & ~target->bt_smask)) { - /* .. else double the size and try again */ - kmem_free(data, malloc_len); - malloc_len <<= 1; - goto try_again; + for (i = 0; i < page_count; i++) { + bp->b_pages[i] = alloc_page(GFP_KERNEL); + if (!bp->b_pages[i]) + goto fail_free_mem; } + bp->b_flags |= _XBF_PAGES; - error = xfs_buf_associate_memory(bp, data, len); - if (error) + error = _xfs_buf_map_pages(bp, XBF_MAPPED); + if (unlikely(error)) { + printk(KERN_WARNING "%s: failed to map pages\n", + __FUNCTION__); goto fail_free_mem; - bp->b_flags |= _XBF_KMEM_ALLOC; + } xfs_buf_unlock(bp); - XB_TRACE(bp, "no_daddr", data); + XB_TRACE(bp, "no_daddr", len); return bp; + fail_free_mem: - kmem_free(data, malloc_len); + while (--i >= 0) + __free_page(bp->b_pages[i]); + _xfs_buf_free_pages(bp); fail_free_buf: - xfs_buf_free(bp); + xfs_buf_deallocate(bp); fail: return NULL; } @@ -1453,6 +1447,7 @@ xfs_free_buftarg( int external) { xfs_flush_buftarg(btp, 1); + xfs_blkdev_issue_flush(btp); if (external) xfs_blkdev_put(btp->bt_bdev); xfs_free_bufhash(btp); @@ -1837,7 +1832,7 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); + xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup); if (!xfs_buf_shake) goto out_destroy_xfsdatad_workqueue; @@ -1859,7 +1854,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_shake_deregister(xfs_buf_shake); + remove_shrinker(xfs_buf_shake); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index b6241f6201a5..b5908a34b15d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -63,7 +63,7 @@ typedef enum { /* flags used only internally */ _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ - _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ + _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ } xfs_buf_flags_t; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index cb51dc961355..cbcd40c8c2a0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -124,30 +124,6 @@ xfs_file_aio_write_invis( } STATIC ssize_t -xfs_file_sendfile( - struct file *filp, - loff_t *pos, - size_t count, - read_actor_t actor, - void *target) -{ - return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode), - filp, pos, 0, count, actor, target, NULL); -} - -STATIC ssize_t -xfs_file_sendfile_invis( - struct file *filp, - loff_t *pos, - size_t count, - read_actor_t actor, - void *target) -{ - return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode), - filp, pos, IO_INVIS, count, actor, target, NULL); -} - -STATIC ssize_t xfs_file_splice_read( struct file *infilp, loff_t *ppos, @@ -208,15 +184,6 @@ xfs_file_open( } STATIC int -xfs_file_close( - struct file *filp, - fl_owner_t id) -{ - return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0, - file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL); -} - -STATIC int xfs_file_release( struct inode *inode, struct file *filp) @@ -452,7 +419,6 @@ const struct file_operations xfs_file_operations = { .write = do_sync_write, .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, - .sendfile = xfs_file_sendfile, .splice_read = xfs_file_splice_read, .splice_write = xfs_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, @@ -461,7 +427,6 @@ const struct file_operations xfs_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, - .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, #ifdef HAVE_FOP_OPEN_EXEC @@ -475,7 +440,6 @@ const struct file_operations xfs_invis_file_operations = { .write = do_sync_write, .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, - .sendfile = xfs_file_sendfile_invis, .splice_read = xfs_file_splice_read_invis, .splice_write = xfs_file_splice_write_invis, .unlocked_ioctl = xfs_file_ioctl_invis, @@ -484,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, - .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, }; diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ed3a5e1b4b67..bb72c3d4141f 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -46,6 +46,7 @@ xfs_param_t xfs_params = { .inherit_nosym = { 0, 0, 1 }, .rotorstep = { 1, 1, 255 }, .inherit_nodfrg = { 0, 1, 1 }, + .fstrm_timer = { 1, 50, 3600*100}, }; /* diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ff5c41ff8d40..5917808abbd6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat( if (cmd == XFS_IOC_FSINUMBERS) error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer); + bulkreq.ubuffer, xfs_inumbers_fmt); else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) error = xfs_bulkstat_single(mp, &inlast, bulkreq.ubuffer, &done); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b83cebc165f1..141cf15067c2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -23,10 +23,25 @@ #include <linux/fs.h> #include <asm/uaccess.h> #include "xfs.h" -#include "xfs_types.h" #include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir2_sf.h" #include "xfs_vfs.h" #include "xfs_vnode.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_error.h" #include "xfs_dfrag.h" #define _NATIVE_IOC(cmd, type) \ @@ -34,6 +49,7 @@ #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT +#define _PACKED __attribute__((packed)) /* on ia32 l_start is on a 32-bit boundary */ typedef struct xfs_flock64_32 { __s16 l_type; @@ -75,35 +91,276 @@ xfs_ioctl32_flock( return (unsigned long)p; } +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR ('X', 100, struct compat_xfs_fsop_geom_v1) + +STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) +{ + compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; + xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(p, p32, sizeof(*p32))) + return -EFAULT; + return (unsigned long)p; +} + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +STATIC int xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) +{ + compat_xfs_inogrp_t *p32 = ubuffer; + long i; + + for (i = 0; i < count; i++) { + if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || + put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || + put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) + return -EFAULT; + } + *written = count * sizeof(*p32); + return 0; +} + #else -typedef struct xfs_fsop_bulkreq32 { +#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#define _PACKED + +#endif + +/* XFS_IOC_FSBULKSTAT and friends */ + +typedef struct compat_xfs_bstime { + __s32 tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +STATIC int xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) +{ + __s32 sec32; + + sec32 = p->tv_sec; + if (put_user(sec32, &p32->tv_sec) || + put_user(p->tv_nsec, &p32->tv_nsec)) + return -EFAULT; + return 0; +} + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} _PACKED compat_xfs_bstat_t; + +STATIC int xfs_bulkstat_one_fmt_compat( + void __user *ubuffer, + const xfs_bstat_t *buffer) +{ + compat_xfs_bstat_t __user *p32 = ubuffer; + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || + xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || + xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || + xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_aextents, &p32->bs_aextents)) + return -EFAULT; + return sizeof(*p32); +} + + + +typedef struct compat_xfs_fsop_bulkreq { compat_uptr_t lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ compat_uptr_t ubuffer; /* user buffer for inode desc. */ - __s32 ocount; /* output count pointer */ -} xfs_fsop_bulkreq32_t; + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; -STATIC unsigned long -xfs_ioctl32_bulkstat( - unsigned long arg) +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +/* copied from xfs_ioctl.c */ +STATIC int +xfs_ioc_bulkstat_compat( + xfs_mount_t *mp, + unsigned int cmd, + void __user *arg) { - xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg; - xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p)); + compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg; u32 addr; + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (get_user(addr, &p32->lastip)) + return -EFAULT; + bulkreq.lastip = compat_ptr(addr); + if (get_user(bulkreq.icount, &p32->icount) || + get_user(addr, &p32->ubuffer)) + return -EFAULT; + bulkreq.ubuffer = compat_ptr(addr); + if (get_user(addr, &p32->ocount)) + return -EFAULT; + bulkreq.ocount = compat_ptr(addr); + + if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS) + error = xfs_inumbers(mp, &inlast, &count, + bulkreq.ubuffer, xfs_inumbers_fmt_compat); + else { + /* declare a var to get a warning in case the type changes */ + bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; + error = xfs_bulkstat(mp, &inlast, &count, + xfs_bulkstat_one, formatter, + sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, + BULKSTAT_FG_QUICK, &done); + } + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user(bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + + + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) +{ + compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; + xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); + u32 addr; - if (get_user(addr, &p32->lastip) || - put_user(compat_ptr(addr), &p->lastip) || - copy_in_user(&p->icount, &p32->icount, sizeof(s32)) || - get_user(addr, &p32->ubuffer) || - put_user(compat_ptr(addr), &p->ubuffer) || - get_user(addr, &p32->ocount) || - put_user(compat_ptr(addr), &p->ocount)) + if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || + get_user(addr, &p32->path) || + put_user(compat_ptr(addr), &p->path) || + copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || + get_user(addr, &p32->ihandle) || + put_user(compat_ptr(addr), &p->ihandle) || + copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || + get_user(addr, &p32->ohandle) || + put_user(compat_ptr(addr), &p->ohandle) || + get_user(addr, &p32->ohandlen) || + put_user(compat_ptr(addr), &p->ohandlen)) return -EFAULT; return (unsigned long)p; } -#endif + STATIC long xfs_compat_ioctl( @@ -118,7 +375,6 @@ xfs_compat_ioctl( switch (cmd) { case XFS_IOC_DIOINFO: - case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGEOMETRY: case XFS_IOC_GETVERSION: case XFS_IOC_GETXFLAGS: @@ -131,12 +387,7 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: /* not handled - case XFS_IOC_FD_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: - case XFS_IOC_OPEN_BY_HANDLE: case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_READLINK_BY_HANDLE: case XFS_IOC_ATTRLIST_BY_HANDLE: case XFS_IOC_ATTRMULTI_BY_HANDLE: */ @@ -166,6 +417,10 @@ xfs_compat_ioctl( arg = xfs_ioctl32_flock(arg); cmd = _NATIVE_IOC(cmd, struct xfs_flock64); break; + case XFS_IOC_FSGEOMETRY_V1_32: + arg = xfs_ioctl32_geom_v1(arg); + cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); + break; #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: @@ -176,18 +431,28 @@ xfs_compat_ioctl( case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: + case XFS_IOC_FSGEOMETRY_V1: break; /* xfs_bstat_t still has wrong u32 vs u64 alignment */ case XFS_IOC_SWAPEXT: break; - case XFS_IOC_FSBULKSTAT_SINGLE: - case XFS_IOC_FSBULKSTAT: - case XFS_IOC_FSINUMBERS: - arg = xfs_ioctl32_bulkstat(arg); - break; #endif + case XFS_IOC_FSBULKSTAT_32: + case XFS_IOC_FSBULKSTAT_SINGLE_32: + case XFS_IOC_FSINUMBERS_32: + cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); + return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount, + cmd, (void*)arg); + case XFS_IOC_FD_TO_HANDLE_32: + case XFS_IOC_PATH_TO_HANDLE_32: + case XFS_IOC_PATH_TO_FSHANDLE_32: + case XFS_IOC_OPEN_BY_HANDLE_32: + case XFS_IOC_READLINK_BY_HANDLE_32: + arg = xfs_ioctl32_fshandle(arg); + cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); + break; default: return -ENOIOCTLCMD; } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 715adad7dd4d..330c4ba9d404 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -101,7 +101,6 @@ * Feature macros (disable/enable) */ #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ -#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ #ifdef CONFIG_SMP #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ @@ -124,6 +123,7 @@ #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val +#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val #define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 86fb671a8bcc..765ec16a6e39 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -159,7 +159,7 @@ xfs_iozero( if (status) goto unlock; - memclear_highpage_flush(page, offset, bytes); + zero_user_page(page, offset, bytes, KM_USER0); status = mapping->a_ops->commit_write(NULL, page, offset, offset + bytes); @@ -287,50 +287,6 @@ xfs_read( } ssize_t -xfs_sendfile( - bhv_desc_t *bdp, - struct file *filp, - loff_t *offset, - int ioflags, - size_t count, - read_actor_t actor, - void *target, - cred_t *credp) -{ - xfs_inode_t *ip = XFS_BHVTOI(bdp); - xfs_mount_t *mp = ip->i_mount; - ssize_t ret; - - XFS_STATS_INC(xs_read_calls); - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_SHARED); - - if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && - (!(ioflags & IO_INVIS))) { - bhv_vrwlock_t locktype = VRWLOCK_READ; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), - *offset, count, - FILP_DELAY_FLAG(filp), &locktype); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return -error; - } - } - xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, - (void *)(unsigned long)target, count, *offset, ioflags); - ret = generic_file_sendfile(filp, offset, count, actor, target); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -ssize_t xfs_splice_read( bhv_desc_t *bdp, struct file *infilp, diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index 7ac51b1d2161..7c60a1eed88b 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); -extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, - loff_t *, int, size_t, read_actor_t, - void *, struct cred *); extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *, struct pipe_inode_info *, size_t, int, int, struct cred *); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bf9a9d5909be..06894cf00b12 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -547,7 +547,8 @@ vfs_sync_worker( if (!(vfsp->vfs_flag & VFS_RDONLY)) error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \ - SYNC_ATTR | SYNC_REFCACHE, NULL); + SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER, + NULL); vfsp->vfs_sync_seq++; wake_up(&vfsp->vfs_wait_single_sync_task); } @@ -663,7 +664,7 @@ xfs_fs_sync_super( * occur here so don't bother flushing the buftarg (i.e * SYNC_QUIESCE) because it'll just get dirty again. */ - flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT; + flags = SYNC_DATA_QUIESCE; } else flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index cd6eaa44aa2b..bb997d75c05c 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -210,6 +210,17 @@ static ctl_table xfs_table[] = { .extra1 = &xfs_params.inherit_nodfrg.min, .extra2 = &xfs_params.inherit_nodfrg.max }, + { + .ctl_name = XFS_FILESTREAM_TIMER, + .procname = "filestream_centisecs", + .data = &xfs_params.fstrm_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xfs_params.fstrm_timer.min, + .extra2 = &xfs_params.fstrm_timer.max, + }, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS { diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index a631fb8cc5ac..98b97e399d6f 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -47,6 +47,7 @@ typedef struct xfs_param { xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ + xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ } xfs_param_t; /* @@ -86,6 +87,7 @@ enum { XFS_INHERIT_NOSYM = 19, XFS_ROTORSTEP = 20, XFS_INHERIT_NODFRG = 21, + XFS_FILESTREAM_TIMER = 22, }; extern xfs_param_t xfs_params; diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index e2c2ce98ab5b..dca3481aaafa 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -92,6 +92,21 @@ typedef enum { #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ +#define SYNC_SUPER 0x0200 /* flush superblock to disk */ + +/* + * When remounting a filesystem read-only or freezing the filesystem, + * we have two phases to execute. This first phase is syncing the data + * before we quiesce the fielsystem, and the second is flushing all the + * inodes out after we've waited for all the transactions created by + * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE + * to ensure that the inodes are written to their location on disk + * rather than just existing in transactions in the log. This means + * after a quiesce there is no log replay required to write the inodes + * to disk (this is the main difference between a sync and a quiesce). + */ +#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) +#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) #define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index d1b2d01843d1..5742d65f0785 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -129,19 +129,13 @@ typedef enum bhv_vchange { VCHANGE_FLAGS_IOEXCL_COUNT = 4 } bhv_vchange_t; -typedef enum { L_FALSE, L_TRUE } lastclose_t; - typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); -typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *); typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); -typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, - loff_t *, int, size_t, read_actor_t, - void *, struct cred *); typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *, struct pipe_inode_info *, size_t, int, int, struct cred *); @@ -203,10 +197,8 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int); typedef struct bhv_vnodeops { bhv_position_t vn_position; /* position within behavior chain */ vop_open_t vop_open; - vop_close_t vop_close; vop_read_t vop_read; vop_write_t vop_write; - vop_sendfile_t vop_sendfile; vop_splice_read_t vop_splice_read; vop_splice_write_t vop_splice_write; vop_ioctl_t vop_ioctl; @@ -249,13 +241,10 @@ typedef struct bhv_vnodeops { #define VNHEAD(vp) ((vp)->v_bh.bh_first) #define VOP(op, vp) (*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op) #define bhv_vop_open(vp, cr) VOP(vop_open, vp)(VNHEAD(vp),cr) -#define bhv_vop_close(vp, f,last,cr) VOP(vop_close, vp)(VNHEAD(vp),f,last,cr) #define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr) \ VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) -#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \ - VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr) #define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \ VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) #define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 3e4a8ad8a34c..7def4c699343 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -62,10 +62,9 @@ uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -static kmem_shaker_t xfs_qm_shaker; +static struct shrinker *xfs_qm_shaker; static cred_t xfs_zerocr; -static xfs_inode_t xfs_zeroino; STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); @@ -150,7 +149,7 @@ xfs_Gqm_init(void) } else xqm->qm_dqzone = qm_dqzone; - xfs_qm_shaker = kmem_shake_register(xfs_qm_shake); + xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake); /* * The t_dqinfo portion of transactions. @@ -182,7 +181,7 @@ xfs_qm_destroy( ASSERT(xqm != NULL); ASSERT(xqm->qm_nrefs == 0); - kmem_shake_deregister(xfs_qm_shaker); + remove_shrinker(xfs_qm_shaker); hsize = xqm->qm_dqhashmask + 1; for (i = 0; i < hsize; i++) { xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); @@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc( return error; } - if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0, + if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, &xfs_zerocr, 0, 1, ip, &committed))) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index bf0a12040b13..b5a7d92c6843 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -38,6 +38,7 @@ #define XFS_RW_TRACE 1 #define XFS_BUF_TRACE 1 #define XFS_VNODE_TRACE 1 +#define XFS_FILESTREAMS_TRACE 1 #endif #include <linux-2.6/xfs_linux.h> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 9ece7f87ec5b..51c09c114a20 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -68,6 +68,7 @@ typedef struct xfs_agf { __be32 agf_flcount; /* count of blocks in freelist */ __be32 agf_freeblks; /* total free blocks */ __be32 agf_longest; /* longest free space */ + __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ } xfs_agf_t; #define XFS_AGF_MAGICNUM 0x00000001 @@ -81,7 +82,8 @@ typedef struct xfs_agf { #define XFS_AGF_FLCOUNT 0x00000100 #define XFS_AGF_FREEBLKS 0x00000200 #define XFS_AGF_LONGEST 0x00000400 -#define XFS_AGF_NUM_BITS 11 +#define XFS_AGF_BTREEBLKS 0x00000800 +#define XFS_AGF_NUM_BITS 12 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) /* disk block (xfs_daddr_t) in the AG */ @@ -186,12 +188,15 @@ typedef struct xfs_perag __uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ + __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ xfs_agino_t pagi_freecount; /* number of free inodes */ + xfs_agino_t pagi_count; /* number of allocated inodes */ + int pagb_count; /* pagb slots in use */ #ifdef __KERNEL__ lock_t pagb_lock; /* lock for pagb_list */ #endif - int pagb_count; /* pagb slots in use */ xfs_perag_busy_t *pagb_list; /* unstable blocks */ + atomic_t pagf_fstrms; /* # of filestreams active in this AG */ } xfs_perag_t; #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8e9a40aa0cd3..012a649a19c3 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp, ktrace_t *xfs_alloc_trace_buf; #define TRACE_ALLOC(s,a) \ - xfs_alloc_trace_alloc(fname, s, a, __LINE__) + xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) #define TRACE_FREE(s,a,b,x,f) \ - xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__) + xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) #define TRACE_MODAGF(s,a,f) \ - xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__) -#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \ - xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) -#define TRACE_UNBUSY(fname,s,ag,sl,tp) \ - xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) -#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \ - xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) + xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) +#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ + xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) +#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ + xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) +#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ + xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) #else #define TRACE_ALLOC(s,a) #define TRACE_FREE(s,a,b,x,f) @@ -420,7 +420,7 @@ xfs_alloc_read_agfl( */ STATIC void xfs_alloc_trace_alloc( - char *name, /* function tag string */ + const char *name, /* function tag string */ char *str, /* additional string */ xfs_alloc_arg_t *args, /* allocation argument structure */ int line) /* source line number */ @@ -453,7 +453,7 @@ xfs_alloc_trace_alloc( */ STATIC void xfs_alloc_trace_free( - char *name, /* function tag string */ + const char *name, /* function tag string */ char *str, /* additional string */ xfs_mount_t *mp, /* file system mount point */ xfs_agnumber_t agno, /* allocation group number */ @@ -479,7 +479,7 @@ xfs_alloc_trace_free( */ STATIC void xfs_alloc_trace_modagf( - char *name, /* function tag string */ + const char *name, /* function tag string */ char *str, /* additional string */ xfs_mount_t *mp, /* file system mount point */ xfs_agf_t *agf, /* new agf value */ @@ -507,7 +507,7 @@ xfs_alloc_trace_modagf( STATIC void xfs_alloc_trace_busy( - char *name, /* function tag string */ + const char *name, /* function tag string */ char *str, /* additional string */ xfs_mount_t *mp, /* file system mount point */ xfs_agnumber_t agno, /* allocation group number */ @@ -549,9 +549,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_ag_vextent"; -#endif ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact( xfs_agblock_t fbno; /* start block of found extent */ xfs_agblock_t fend; /* end block of found extent */ xfs_extlen_t flen; /* length of found extent */ -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_ag_vextent_exact"; -#endif int i; /* success/failure of operation */ xfs_agblock_t maxend; /* end of maximal extent */ xfs_agblock_t minend; /* end of minimal extent */ @@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near( xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_ag_vextent_near"; -#endif xfs_agblock_t gtbno; /* start bno of right side entry */ xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ @@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size( int error; /* error result */ xfs_agblock_t fbno; /* start of found freespace */ xfs_extlen_t flen; /* length of found freespace */ -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_ag_vextent_size"; -#endif int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ @@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small( int error; xfs_agblock_t fbno; xfs_extlen_t flen; -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_ag_vextent_small"; -#endif int i; if ((error = xfs_alloc_decrement(ccur, 0, &i))) @@ -1447,7 +1432,8 @@ xfs_alloc_ag_vextent_small( else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) > args->minleft)) { - if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); + if (error) goto error0; if (fbno != NULLAGBLOCK) { if (args->userdata) { @@ -1515,9 +1501,6 @@ xfs_free_ag_extent( xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ int error; /* error return value */ -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_free_ag_extent"; -#endif xfs_agblock_t gtbno; /* start of right neighbor block */ xfs_extlen_t gtlen; /* length of right neighbor block */ int haveleft; /* have a left neighbor block */ @@ -1923,7 +1906,8 @@ xfs_alloc_fix_freelist( while (be32_to_cpu(agf->agf_flcount) > need) { xfs_buf_t *bp; - if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) + error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); + if (error) return error; if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) return error; @@ -1973,8 +1957,9 @@ xfs_alloc_fix_freelist( * Put each allocated block on the list. */ for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { - if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, - bno))) + error = xfs_alloc_put_freelist(tp, agbp, + agflbp, bno, 0); + if (error) return error; } } @@ -1991,16 +1976,15 @@ int /* error */ xfs_alloc_get_freelist( xfs_trans_t *tp, /* transaction pointer */ xfs_buf_t *agbp, /* buffer containing the agf structure */ - xfs_agblock_t *bnop) /* block address retrieved from freelist */ + xfs_agblock_t *bnop, /* block address retrieved from freelist */ + int btreeblk) /* destination is a AGF btree */ { xfs_agf_t *agf; /* a.g. freespace structure */ xfs_agfl_t *agfl; /* a.g. freelist structure */ xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ int error; -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_get_freelist"; -#endif + int logflags; xfs_mount_t *mp; /* mount structure */ xfs_perag_t *pag; /* per allocation group data */ @@ -2032,8 +2016,16 @@ xfs_alloc_get_freelist( be32_add(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; - TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); + + logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; + if (btreeblk) { + be32_add(&agf->agf_btreeblks, 1); + pag->pagf_btreeblks++; + logflags |= XFS_AGF_BTREEBLKS; + } + + TRACE_MODAGF(NULL, agf, logflags); + xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; /* @@ -2071,6 +2063,7 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_flcount), offsetof(xfs_agf_t, agf_freeblks), offsetof(xfs_agf_t, agf_longest), + offsetof(xfs_agf_t, agf_btreeblks), sizeof(xfs_agf_t) }; @@ -2106,15 +2099,14 @@ xfs_alloc_put_freelist( xfs_trans_t *tp, /* transaction pointer */ xfs_buf_t *agbp, /* buffer for a.g. freelist header */ xfs_buf_t *agflbp,/* buffer for a.g. free block array */ - xfs_agblock_t bno) /* block being freed */ + xfs_agblock_t bno, /* block being freed */ + int btreeblk) /* block came from a AGF btree */ { xfs_agf_t *agf; /* a.g. freespace structure */ xfs_agfl_t *agfl; /* a.g. free block array */ __be32 *blockp;/* pointer to array entry */ int error; -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_put_freelist"; -#endif + int logflags; xfs_mount_t *mp; /* mount structure */ xfs_perag_t *pag; /* per allocation group data */ @@ -2132,11 +2124,22 @@ xfs_alloc_put_freelist( be32_add(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; + + logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; + if (btreeblk) { + be32_add(&agf->agf_btreeblks, -1); + pag->pagf_btreeblks--; + logflags |= XFS_AGF_BTREEBLKS; + } + + TRACE_MODAGF(NULL, agf, logflags); + xfs_alloc_log_agf(tp, agbp, logflags); + ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); - TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); + TRACE_MODAGF(NULL, agf, logflags); + xfs_alloc_log_agf(tp, agbp, logflags); xfs_trans_log_buf(tp, agflbp, (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + @@ -2196,6 +2199,7 @@ xfs_alloc_read_agf( pag = &mp->m_perag[agno]; if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); + pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); pag->pagf_longest = be32_to_cpu(agf->agf_longest); pag->pagf_levels[XFS_BTNUM_BNOi] = @@ -2235,9 +2239,6 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ -#ifdef XFS_ALLOC_TRACE - static char fname[] = "xfs_alloc_vextent"; -#endif xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 5a4256120ccc..5aec15d0651e 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -136,7 +136,8 @@ int /* error */ xfs_alloc_get_freelist( struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *agbp, /* buffer containing the agf structure */ - xfs_agblock_t *bnop); /* block address retrieved from freelist */ + xfs_agblock_t *bnop, /* block address retrieved from freelist */ + int btreeblk); /* destination is a AGF btree */ /* * Log the given fields from the agf structure. @@ -165,7 +166,8 @@ xfs_alloc_put_freelist( struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *agbp, /* buffer for a.g. freelist header */ struct xfs_buf *agflbp,/* buffer for a.g. free block array */ - xfs_agblock_t bno); /* block being freed */ + xfs_agblock_t bno, /* block being freed */ + int btreeblk); /* owner was a AGF btree */ /* * Read in the allocation group header (free/alloc section). diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 74cadf95d4e8..1603ce595853 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -226,8 +226,9 @@ xfs_alloc_delrec( /* * Put this buffer/block on the ag's freelist. */ - if ((error = xfs_alloc_put_freelist(cur->bc_tp, - cur->bc_private.a.agbp, NULL, bno))) + error = xfs_alloc_put_freelist(cur->bc_tp, + cur->bc_private.a.agbp, NULL, bno, 1); + if (error) return error; /* * Since blocks move to the free list without the @@ -549,8 +550,9 @@ xfs_alloc_delrec( /* * Free the deleting block by putting it on the freelist. */ - if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, - NULL, rbno))) + error = xfs_alloc_put_freelist(cur->bc_tp, + cur->bc_private.a.agbp, NULL, rbno, 1); + if (error) return error; /* * Since blocks move to the free list without the coordination @@ -1320,8 +1322,9 @@ xfs_alloc_newroot( /* * Get a buffer from the freelist blocks, for the new root. */ - if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, - &nbno))) + error = xfs_alloc_get_freelist(cur->bc_tp, + cur->bc_private.a.agbp, &nbno, 1); + if (error) return error; /* * None available, we fail. @@ -1604,8 +1607,9 @@ xfs_alloc_split( * Allocate the new block from the freelist. * If we can't do it, we're toast. Give up. */ - if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, - &rbno))) + error = xfs_alloc_get_freelist(cur->bc_tp, + cur->bc_private.a.agbp, &rbno, 1); + if (error) return error; if (rbno == NULLAGBLOCK) { *stat = 0; diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index 1afe07f67e3b..fab0b6d5a41b 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -66,44 +66,6 @@ static const char xfs_highbit[256] = { #endif /* - * Count of bits set in byte, 0..8. - */ -static const char xfs_countbit[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, /* 00 .. 07 */ - 1, 2, 2, 3, 2, 3, 3, 4, /* 08 .. 0f */ - 1, 2, 2, 3, 2, 3, 3, 4, /* 10 .. 17 */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 18 .. 1f */ - 1, 2, 2, 3, 2, 3, 3, 4, /* 20 .. 27 */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 28 .. 2f */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 30 .. 37 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* 38 .. 3f */ - 1, 2, 2, 3, 2, 3, 3, 4, /* 40 .. 47 */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 48 .. 4f */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 50 .. 57 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* 58 .. 5f */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 60 .. 67 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* 68 .. 6f */ - 3, 4, 4, 5, 4, 5, 5, 6, /* 70 .. 77 */ - 4, 5, 5, 6, 5, 6, 6, 7, /* 78 .. 7f */ - 1, 2, 2, 3, 2, 3, 3, 4, /* 80 .. 87 */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 88 .. 8f */ - 2, 3, 3, 4, 3, 4, 4, 5, /* 90 .. 97 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* 98 .. 9f */ - 2, 3, 3, 4, 3, 4, 4, 5, /* a0 .. a7 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* a8 .. af */ - 3, 4, 4, 5, 4, 5, 5, 6, /* b0 .. b7 */ - 4, 5, 5, 6, 5, 6, 6, 7, /* b8 .. bf */ - 2, 3, 3, 4, 3, 4, 4, 5, /* c0 .. c7 */ - 3, 4, 4, 5, 4, 5, 5, 6, /* c8 .. cf */ - 3, 4, 4, 5, 4, 5, 5, 6, /* d0 .. d7 */ - 4, 5, 5, 6, 5, 6, 6, 7, /* d8 .. df */ - 3, 4, 4, 5, 4, 5, 5, 6, /* e0 .. e7 */ - 4, 5, 5, 6, 5, 6, 6, 7, /* e8 .. ef */ - 4, 5, 5, 6, 5, 6, 6, 7, /* f0 .. f7 */ - 5, 6, 6, 7, 6, 7, 7, 8, /* f8 .. ff */ -}; - -/* * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. */ inline int @@ -167,56 +129,21 @@ xfs_highbit64( /* - * Count the number of bits set in the bitmap starting with bit - * start_bit. Size is the size of the bitmap in words. - * - * Do the counting by mapping a byte value to the number of set - * bits for that value using the xfs_countbit array, i.e. - * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1, - * xfs_countbit[3] == 2, etc. + * Return whether bitmap is empty. + * Size is number of words in the bitmap, which is padded to word boundary + * Returns 1 for empty, 0 for non-empty. */ int -xfs_count_bits(uint *map, uint size, uint start_bit) +xfs_bitmap_empty(uint *map, uint size) { - register int bits; - register unsigned char *bytep; - register unsigned char *end_map; - int byte_bit; - - bits = 0; - end_map = (char*)(map + size); - bytep = (char*)(map + (start_bit & ~0x7)); - byte_bit = start_bit & 0x7; - - /* - * If the caller fell off the end of the map, return 0. - */ - if (bytep >= end_map) { - return (0); - } - - /* - * If start_bit is not byte aligned, then process the - * first byte separately. - */ - if (byte_bit != 0) { - /* - * Shift off the bits we don't want to look at, - * before indexing into xfs_countbit. - */ - bits += xfs_countbit[(*bytep >> byte_bit)]; - bytep++; - } + uint i; + uint ret = 0; - /* - * Count the bits in each byte until the end of the bitmap. - */ - while (bytep < end_map) { - bits += xfs_countbit[*bytep]; - bytep++; + for (i = 0; i < size; i++) { + ret |= map[i]; } - return (bits); + return (ret == 0); } /* diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 0bbe56817542..082641a9782c 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v); /* Get high bit set out of 64-bit argument, -1 if none set */ extern int xfs_highbit64(__uint64_t); -/* Count set bits in map starting with start_bit */ -extern int xfs_count_bits(uint *map, uint size, uint start_bit); +/* Return whether bitmap is empty (1 == empty) */ +extern int xfs_bitmap_empty(uint *map, uint size); /* Count continuous one bits in map starting with start_bit */ extern int xfs_contig_bits(uint *map, uint size, uint start_bit); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index b1ea26e40aaf..94b5c5fe2681 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -52,6 +52,7 @@ #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_buf_item.h" +#include "xfs_filestream.h" #ifdef DEBUG @@ -277,7 +278,7 @@ xfs_bmap_isaeof( STATIC void xfs_bmap_trace_addentry( int opcode, /* operation */ - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(ies) */ @@ -291,7 +292,7 @@ xfs_bmap_trace_addentry( */ STATIC void xfs_bmap_trace_delete( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(entries) deleted */ @@ -304,7 +305,7 @@ xfs_bmap_trace_delete( */ STATIC void xfs_bmap_trace_insert( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(entries) inserted */ @@ -318,7 +319,7 @@ xfs_bmap_trace_insert( */ STATIC void xfs_bmap_trace_post_update( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry updated */ @@ -329,17 +330,25 @@ xfs_bmap_trace_post_update( */ STATIC void xfs_bmap_trace_pre_update( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry to be updated */ int whichfork); /* data or attr fork */ +#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ + xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w) +#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ + xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w) +#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ + xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w) +#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ + xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w) #else -#define xfs_bmap_trace_delete(f,d,ip,i,c,w) -#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w) -#define xfs_bmap_trace_post_update(f,d,ip,i,w) -#define xfs_bmap_trace_pre_update(f,d,ip,i,w) +#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) +#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) +#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) +#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) #endif /* XFS_BMAP_TRACE */ /* @@ -531,9 +540,6 @@ xfs_bmap_add_extent( xfs_filblks_t da_new; /* new count del alloc blocks used */ xfs_filblks_t da_old; /* old count del alloc blocks used */ int error; /* error return value */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_add_extent"; -#endif xfs_ifork_t *ifp; /* inode fork ptr */ int logflags; /* returned value */ xfs_extnum_t nextents; /* number of extents in file now */ @@ -551,8 +557,8 @@ xfs_bmap_add_extent( * already extents in the list. */ if (nextents == 0) { - xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new, - NULL, whichfork); + XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL, + whichfork); xfs_iext_insert(ifp, 0, 1, new); ASSERT(cur == NULL); ifp->if_lastex = 0; @@ -710,9 +716,6 @@ xfs_bmap_add_extent_delay_real( int diff; /* temp value */ xfs_bmbt_rec_t *ep; /* extent entry for idx */ int error; /* error return value */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_add_extent_delay_real"; -#endif int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ @@ -808,15 +811,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); - xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 2); ip->i_df.if_lastex = idx - 1; ip->i_d.di_nextents--; @@ -855,15 +857,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx - 1; - xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 1); if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -892,16 +893,13 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; - xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); xfs_iext_remove(ifp, idx + 1, 1); if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -931,11 +929,9 @@ xfs_bmap_add_extent_delay_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_startblock(ep, new->br_startblock); - xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) @@ -963,17 +959,14 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, temp); ip->i_df.if_lastex = idx - 1; if (cur == NULL) @@ -995,8 +988,7 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), STARTBLOCKVAL(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); *dnew = temp; /* DELTA: The boundary between two in-core extents moved. */ temp = LEFT.br_startoff; @@ -1009,11 +1001,11 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_startoff(ep, new_endoff); temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, + XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, XFS_DATA_FORK); xfs_iext_insert(ifp, idx, 1, new); ip->i_df.if_lastex = idx; @@ -1046,8 +1038,7 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); ep = xfs_iext_get_ext(ifp, idx + 1); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK); *dnew = temp; /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; @@ -1060,17 +1051,14 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is contiguous with the new allocation. */ temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, - XFS_DATA_FORK); - xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, temp); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, RIGHT.br_state); - xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; @@ -1091,8 +1079,7 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), STARTBLOCKVAL(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); *dnew = temp; /* DELTA: The boundary between two in-core extents moved. */ temp = PREV.br_startoff; @@ -1106,10 +1093,10 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, temp); - xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, - new, NULL, XFS_DATA_FORK); + XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, + XFS_DATA_FORK); xfs_iext_insert(ifp, idx + 1, 1, new); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; @@ -1141,7 +1128,7 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); *dnew = temp; /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; @@ -1155,7 +1142,7 @@ xfs_bmap_add_extent_delay_real( * This case is avoided almost all the time. */ temp = new->br_startoff - PREV.br_startoff; - xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, temp); r[0] = *new; r[1].br_state = PREV.br_state; @@ -1163,7 +1150,7 @@ xfs_bmap_add_extent_delay_real( r[1].br_startoff = new_endoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_blockcount = temp2; - xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], + XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], XFS_DATA_FORK); xfs_iext_insert(ifp, idx + 1, 2, &r[0]); ip->i_df.if_lastex = idx + 1; @@ -1222,13 +1209,11 @@ xfs_bmap_add_extent_delay_real( } ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); - xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), NULLSTARTBLOCK((int)temp2)); - xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, - XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); *dnew = temp + temp2; /* DELTA: One in-core extent is split in three. */ temp = PREV.br_startoff; @@ -1287,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real( xfs_btree_cur_t *cur; /* btree cursor */ xfs_bmbt_rec_t *ep; /* extent entry for idx */ int error; /* error return value */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_add_extent_unwritten_real"; -#endif int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ @@ -1390,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, - XFS_DATA_FORK); - xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 2); ip->i_df.if_lastex = idx - 1; ip->i_d.di_nextents -= 2; @@ -1441,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + PREV.br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx - 1; - xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 1); ip->i_d.di_nextents--; if (cur == NULL) @@ -1484,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); xfs_bmbt_set_state(ep, newext); - xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; - xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, - XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); xfs_iext_remove(ifp, idx + 1, 1); ip->i_d.di_nextents--; if (cur == NULL) @@ -1529,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, + XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_state(ep, newext); - xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; if (cur == NULL) @@ -1559,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, + XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); - xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, + XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx - 1; if (cur == NULL) @@ -1610,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); xfs_bmbt_set_startoff(ep, new_endoff); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); - xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK); - xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, + XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, XFS_DATA_FORK); xfs_iext_insert(ifp, idx, 1, new); ip->i_df.if_lastex = idx; @@ -1653,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, + XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); - xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, + XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, newext); - xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, + XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx + 1; if (cur == NULL) @@ -1700,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); - xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, - new, NULL, XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, + XFS_DATA_FORK); xfs_iext_insert(ifp, idx + 1, 1, new); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; @@ -1744,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, new->br_startoff - PREV.br_startoff); - xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); r[0] = *new; r[1].br_startoff = new_endoff; r[1].br_blockcount = PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; r[1].br_state = oldext; - xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], + XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], XFS_DATA_FORK); xfs_iext_insert(ifp, idx + 1, 2, &r[0]); ip->i_df.if_lastex = idx + 1; @@ -1845,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay( int rsvd) /* OK to allocate reserved blocks */ { xfs_bmbt_rec_t *ep; /* extent record for idx */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_add_extent_hole_delay"; -#endif xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_filblks_t newlen=0; /* new indirect size */ @@ -1919,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay( */ temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); oldlen = STARTBLOCKVAL(left.br_startblock) + @@ -1928,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay( newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), NULLSTARTBLOCK((int)newlen)); - xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, - XFS_DATA_FORK); - xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1, + XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, XFS_DATA_FORK); + XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK); xfs_iext_remove(ifp, idx, 1); ip->i_df.if_lastex = idx - 1; /* DELTA: Two in-core extents were replaced by one. */ @@ -1946,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay( * Merge the new allocation with the left neighbor. */ temp = left.br_blockcount + new->br_blockcount; - xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, XFS_DATA_FORK); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); oldlen = STARTBLOCKVAL(left.br_startblock) + @@ -1954,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay( newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), NULLSTARTBLOCK((int)newlen)); - xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, + XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, XFS_DATA_FORK); ip->i_df.if_lastex = idx - 1; /* DELTA: One in-core extent grew into a hole. */ @@ -1968,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay( * on the right. * Merge the new allocation with the right neighbor. */ - xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK); temp = new->br_blockcount + right.br_blockcount; oldlen = STARTBLOCKVAL(new->br_startblock) + STARTBLOCKVAL(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); xfs_bmbt_set_allf(ep, new->br_startoff, NULLSTARTBLOCK((int)newlen), temp, right.br_state); - xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); + XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK); ip->i_df.if_lastex = idx; /* DELTA: One in-core extent grew into a hole. */ temp2 = temp; @@ -1989,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, + XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, XFS_DATA_FORK); xfs_iext_insert(ifp, idx, 1, new); ip->i_df.if_lastex = idx; @@ -2039,9 +2014,6 @@ xfs_bmap_add_extent_hole_real( { xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ int error; /* error return value */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_add_extent_hole_real"; -#endif int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ @@ -2118,15 +2090,14 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, + XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, whichfork); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), left.br_blockcount + new->br_blockcount + right.br_blockcount); - xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, + XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, whichfork); - xfs_bmap_trace_delete(fname, "LC|RC", ip, - idx, 1, whichfork); + XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork); xfs_iext_remove(ifp, idx, 1); ifp->if_lastex = idx - 1; XFS_IFORK_NEXT_SET(ip, whichfork, @@ -2168,10 +2139,10 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork); + XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork); xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), left.br_blockcount + new->br_blockcount); - xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); + XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork); ifp->if_lastex = idx - 1; if (cur == NULL) { rval = XFS_ILOG_FEXT(whichfork); @@ -2202,11 +2173,11 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork); + XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork); xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); + XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork); ifp->if_lastex = idx; if (cur == NULL) { rval = XFS_ILOG_FEXT(whichfork); @@ -2237,8 +2208,7 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, - whichfork); + XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork); xfs_iext_insert(ifp, idx, 1, new); ifp->if_lastex = idx; XFS_IFORK_NEXT_SET(ip, whichfork, @@ -2605,12 +2575,10 @@ xfs_bmap_rtalloc( xfs_extlen_t prod = 0; /* product factor for allocators */ xfs_extlen_t ralen = 0; /* realtime allocation length */ xfs_extlen_t align; /* minimum allocation alignment */ - xfs_rtblock_t rtx; /* realtime extent number */ xfs_rtblock_t rtb; mp = ap->ip->i_mount; - align = ap->ip->i_d.di_extsize ? - ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize; + align = xfs_get_extsz_hint(ap->ip); prod = align / mp->m_sb.sb_rextsize; error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, align, 1, ap->eof, 0, @@ -2644,6 +2612,8 @@ xfs_bmap_rtalloc( * pick an extent that will space things out in the rt area. */ if (ap->eof && ap->off == 0) { + xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ + error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); if (error) return error; @@ -2715,9 +2685,7 @@ xfs_bmap_btalloc( int error; mp = ap->ip->i_mount; - align = (ap->userdata && ap->ip->i_d.di_extsize && - (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ? - ap->ip->i_d.di_extsize : 0; + align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, align, 0, ap->eof, 0, ap->conv, @@ -2727,9 +2695,15 @@ xfs_bmap_btalloc( } nullfb = ap->firstblock == NULLFSBLOCK; fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); - if (nullfb) - ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); - else + if (nullfb) { + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { + ag = xfs_filestream_lookup_ag(ap->ip); + ag = (ag != NULLAGNUMBER) ? ag : 0; + ap->rval = XFS_AGB_TO_FSB(mp, ag, 0); + } else { + ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + } + } else ap->rval = ap->firstblock; xfs_bmap_adjacent(ap); @@ -2753,13 +2727,22 @@ xfs_bmap_btalloc( args.firstblock = ap->firstblock; blen = 0; if (nullfb) { - args.type = XFS_ALLOCTYPE_START_BNO; + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + args.type = XFS_ALLOCTYPE_NEAR_BNO; + else + args.type = XFS_ALLOCTYPE_START_BNO; args.total = ap->total; + /* - * Find the longest available space. - * We're going to try for the whole allocation at once. + * Search for an allocation group with a single extent + * large enough for the request. + * + * If one isn't found, then adjust the minimum allocation + * size to the largest space found. */ startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); + if (startag == NULLAGNUMBER) + startag = ag = 0; notinit = 0; down_read(&mp->m_peraglock); while (blen < ap->alen) { @@ -2785,6 +2768,35 @@ xfs_bmap_btalloc( blen = longest; } else notinit = 1; + + if (xfs_inode_is_filestream(ap->ip)) { + if (blen >= ap->alen) + break; + + if (ap->userdata) { + /* + * If startag is an invalid AG, we've + * come here once before and + * xfs_filestream_new_ag picked the + * best currently available. + * + * Don't continue looping, since we + * could loop forever. + */ + if (startag == NULLAGNUMBER) + break; + + error = xfs_filestream_new_ag(ap, &ag); + if (error) { + up_read(&mp->m_peraglock); + return error; + } + + /* loop again to set 'blen'*/ + startag = NULLAGNUMBER; + continue; + } + } if (++ag == mp->m_sb.sb_agcount) ag = 0; if (ag == startag) @@ -2809,17 +2821,27 @@ xfs_bmap_btalloc( */ else args.minlen = ap->alen; + + /* + * set the failure fallback case to look in the selected + * AG as the stream may have moved. + */ + if (xfs_inode_is_filestream(ap->ip)) + ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0); } else if (ap->low) { - args.type = XFS_ALLOCTYPE_START_BNO; + if (xfs_inode_is_filestream(ap->ip)) + args.type = XFS_ALLOCTYPE_FIRST_AG; + else + args.type = XFS_ALLOCTYPE_START_BNO; args.total = args.minlen = ap->minlen; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.total = ap->total; args.minlen = ap->minlen; } - if (unlikely(ap->userdata && ap->ip->i_d.di_extsize && - (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) { - args.prod = ap->ip->i_d.di_extsize; + /* apply extent size hints if obtained earlier */ + if (unlikely(align)) { + args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); } else if (mp->m_sb.sb_blocksize >= NBPP) { @@ -3051,9 +3073,6 @@ xfs_bmap_del_extent( xfs_bmbt_rec_t *ep; /* current extent entry pointer */ int error; /* error return value */ int flags; /* inode logging flags */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_del_extent"; -#endif xfs_bmbt_irec_t got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ @@ -3147,7 +3166,7 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ - xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork); + XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork); xfs_iext_remove(ifp, idx, 1); ifp->if_lastex = idx; if (delay) @@ -3168,7 +3187,7 @@ xfs_bmap_del_extent( /* * Deleting the first part of the extent. */ - xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork); + XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); @@ -3177,13 +3196,13 @@ xfs_bmap_del_extent( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "2", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); - xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork); + XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); if (!cur) { flags |= XFS_ILOG_FEXT(whichfork); break; @@ -3199,19 +3218,19 @@ xfs_bmap_del_extent( * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; - xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork); + XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork); xfs_bmbt_set_blockcount(ep, temp); ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); - xfs_bmap_trace_post_update(fname, "1", ip, idx, + XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); da_new = temp; break; } - xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork); + XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); if (!cur) { flags |= XFS_ILOG_FEXT(whichfork); break; @@ -3228,7 +3247,7 @@ xfs_bmap_del_extent( * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; - xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork); + XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; @@ -3315,8 +3334,8 @@ xfs_bmap_del_extent( } } } - xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork); - xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL, + XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork); + XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL, whichfork); xfs_iext_insert(ifp, idx + 1, 1, &new); ifp->if_lastex = idx + 1; @@ -3556,9 +3575,6 @@ xfs_bmap_local_to_extents( { int error; /* error return value */ int flags; /* logging flags returned */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_local_to_extents"; -#endif xfs_ifork_t *ifp; /* inode fork pointer */ /* @@ -3613,7 +3629,7 @@ xfs_bmap_local_to_extents( xfs_iext_add(ifp, 0, 1); ep = xfs_iext_get_ext(ifp, 0); xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); - xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork); + XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, @@ -3736,7 +3752,7 @@ ktrace_t *xfs_bmap_trace_buf; STATIC void xfs_bmap_trace_addentry( int opcode, /* operation */ - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(ies) */ @@ -3795,7 +3811,7 @@ xfs_bmap_trace_addentry( */ STATIC void xfs_bmap_trace_delete( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(entries) deleted */ @@ -3817,7 +3833,7 @@ xfs_bmap_trace_delete( */ STATIC void xfs_bmap_trace_insert( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry(entries) inserted */ @@ -3846,7 +3862,7 @@ xfs_bmap_trace_insert( */ STATIC void xfs_bmap_trace_post_update( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry updated */ @@ -3864,7 +3880,7 @@ xfs_bmap_trace_post_update( */ STATIC void xfs_bmap_trace_pre_update( - char *fname, /* function name */ + const char *fname, /* function name */ char *desc, /* operation description */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* index of entry to be updated */ @@ -4481,9 +4497,6 @@ xfs_bmap_read_extents( xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_bmap_read_extents"; -#endif xfs_extnum_t i, j; /* index into the extents list */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ @@ -4600,7 +4613,7 @@ xfs_bmap_read_extents( } ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); - xfs_bmap_trace_exlist(fname, ip, i, whichfork); + XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; error0: xfs_trans_brelse(tp, bp); @@ -4613,7 +4626,7 @@ error0: */ void xfs_bmap_trace_exlist( - char *fname, /* function name */ + const char *fname, /* function name */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ int whichfork) /* data or attr fork */ @@ -4628,7 +4641,7 @@ xfs_bmap_trace_exlist( for (idx = 0; idx < cnt; idx++) { ep = xfs_iext_get_ext(ifp, idx); xfs_bmbt_get_all(ep, &s); - xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL, + XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL, whichfork); } } @@ -4868,12 +4881,7 @@ xfs_bmapi( xfs_extlen_t extsz; /* Figure out the extent size, adjust alen */ - if (rt) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } + extsz = xfs_get_extsz_hint(ip); if (extsz) { error = xfs_bmap_extsize_align(mp, &got, &prev, extsz, @@ -5219,10 +5227,10 @@ xfs_bmapi( * Else go on to the next record. */ ep = xfs_iext_get_ext(ifp, ++lastx); - if (lastx >= nextents) { + prev = got; + if (lastx >= nextents) eof = 1; - prev = got; - } else + else xfs_bmbt_get_all(ep, &got); } ifp->if_lastex = lastx; @@ -5813,8 +5821,7 @@ xfs_getbmap( ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); if (whichfork == XFS_DATA_FORK) { - if ((ip->i_d.di_extsize && (ip->i_d.di_flags & - (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) || + if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; fixlen = XFS_MAXIOFFSET(mp); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 4f24c7e39b31..524b1c9d5246 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -144,12 +144,14 @@ extern ktrace_t *xfs_bmap_trace_buf; */ void xfs_bmap_trace_exlist( - char *fname, /* function name */ + const char *fname, /* function name */ struct xfs_inode *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in list */ int whichfork); /* data or attr fork */ +#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ + xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w) #else -#define xfs_bmap_trace_exlist(f,ip,c,w) +#define XFS_BMAP_TRACE_EXLIST(ip,c,w) #endif /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 0bf192fea3eb..89b891f51cfb 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -76,7 +76,7 @@ static char EXIT[] = "exit"; */ STATIC void xfs_bmbt_trace_enter( - char *func, + const char *func, xfs_btree_cur_t *cur, char *s, int type, @@ -117,7 +117,7 @@ xfs_bmbt_trace_enter( */ STATIC void xfs_bmbt_trace_argbi( - char *func, + const char *func, xfs_btree_cur_t *cur, xfs_buf_t *b, int i, @@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi( */ STATIC void xfs_bmbt_trace_argbii( - char *func, + const char *func, xfs_btree_cur_t *cur, xfs_buf_t *b, int i0, @@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii( */ STATIC void xfs_bmbt_trace_argfffi( - char *func, + const char *func, xfs_btree_cur_t *cur, xfs_dfiloff_t o, xfs_dfsbno_t b, @@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi( */ STATIC void xfs_bmbt_trace_argi( - char *func, + const char *func, xfs_btree_cur_t *cur, int i, int line) @@ -188,7 +188,7 @@ xfs_bmbt_trace_argi( */ STATIC void xfs_bmbt_trace_argifk( - char *func, + const char *func, xfs_btree_cur_t *cur, int i, xfs_fsblock_t f, @@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk( */ STATIC void xfs_bmbt_trace_argifr( - char *func, + const char *func, xfs_btree_cur_t *cur, int i, xfs_fsblock_t f, @@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr( */ STATIC void xfs_bmbt_trace_argik( - char *func, + const char *func, xfs_btree_cur_t *cur, int i, xfs_bmbt_key_t *k, @@ -255,7 +255,7 @@ xfs_bmbt_trace_argik( */ STATIC void xfs_bmbt_trace_cursor( - char *func, + const char *func, xfs_btree_cur_t *cur, char *s, int line) @@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor( } #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ - xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__) + xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__) #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ - xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__) + xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__) #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ - xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__) + xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__) #define XFS_BMBT_TRACE_ARGI(c,i) \ - xfs_bmbt_trace_argi(fname, c, i, __LINE__) + xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__) #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ - xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__) + xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__) #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ - xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__) + xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__) #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ - xfs_bmbt_trace_argik(fname, c, i, k, __LINE__) + xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__) #define XFS_BMBT_TRACE_CURSOR(c,s) \ - xfs_bmbt_trace_cursor(fname, c, s, __LINE__) + xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__) #else #define XFS_BMBT_TRACE_ARGBI(c,b,i) #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) @@ -318,9 +318,6 @@ xfs_bmbt_delrec( xfs_fsblock_t bno; /* fs-relative block number */ xfs_buf_t *bp; /* buffer for block */ int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_delrec"; -#endif int i; /* loop counter */ int j; /* temp state */ xfs_bmbt_key_t key; /* bmap btree key */ @@ -694,9 +691,6 @@ xfs_bmbt_insrec( xfs_bmbt_block_t *block; /* bmap btree block */ xfs_buf_t *bp; /* buffer for block */ int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_insrec"; -#endif int i; /* loop index */ xfs_bmbt_key_t key; /* bmap btree key */ xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ @@ -881,9 +875,6 @@ xfs_bmbt_killroot( #ifdef DEBUG int error; #endif -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_killroot"; -#endif int i; xfs_bmbt_key_t *kp; xfs_inode_t *ip; @@ -973,9 +964,6 @@ xfs_bmbt_log_keys( int kfirst, int klast) { -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_log_keys"; -#endif xfs_trans_t *tp; XFS_BMBT_TRACE_CURSOR(cur, ENTRY); @@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs( int pfirst, int plast) { -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_log_ptrs"; -#endif xfs_trans_t *tp; XFS_BMBT_TRACE_CURSOR(cur, ENTRY); @@ -1055,9 +1040,6 @@ xfs_bmbt_lookup( xfs_daddr_t d; xfs_sfiloff_t diff; int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_lookup"; -#endif xfs_fsblock_t fsbno=0; int high; int i; @@ -1195,9 +1177,6 @@ xfs_bmbt_lshift( int *stat) /* success/failure */ { int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_lshift"; -#endif #ifdef DEBUG int i; /* loop counter */ #endif @@ -1331,9 +1310,6 @@ xfs_bmbt_rshift( int *stat) /* success/failure */ { int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_rshift"; -#endif int i; /* loop counter */ xfs_bmbt_key_t key; /* bmap btree key */ xfs_buf_t *lbp; /* left buffer pointer */ @@ -1492,9 +1468,6 @@ xfs_bmbt_split( { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_split"; -#endif int i; /* loop counter */ xfs_fsblock_t lbno; /* left sibling block number */ xfs_buf_t *lbp; /* left buffer pointer */ @@ -1641,9 +1614,6 @@ xfs_bmbt_updkey( #ifdef DEBUG int error; #endif -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_updkey"; -#endif xfs_bmbt_key_t *kp; int ptr; @@ -1712,9 +1682,6 @@ xfs_bmbt_decrement( xfs_bmbt_block_t *block; xfs_buf_t *bp; int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_decrement"; -#endif xfs_fsblock_t fsbno; int lev; xfs_mount_t *mp; @@ -1785,9 +1752,6 @@ xfs_bmbt_delete( int *stat) /* success/failure */ { int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_delete"; -#endif int i; int level; @@ -2000,9 +1964,6 @@ xfs_bmbt_increment( xfs_bmbt_block_t *block; xfs_buf_t *bp; int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_increment"; -#endif xfs_fsblock_t fsbno; int lev; xfs_mount_t *mp; @@ -2080,9 +2041,6 @@ xfs_bmbt_insert( int *stat) /* success/failure */ { int error; /* error return value */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_insert"; -#endif int i; int level; xfs_fsblock_t nbno; @@ -2142,9 +2100,6 @@ xfs_bmbt_log_block( int fields) { int first; -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_log_block"; -#endif int last; xfs_trans_t *tp; static const short offsets[] = { @@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs( { xfs_bmbt_block_t *block; int first; -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_log_recs"; -#endif int last; xfs_bmbt_rec_t *rp; xfs_trans_t *tp; @@ -2245,9 +2197,6 @@ xfs_bmbt_newroot( xfs_bmbt_key_t *ckp; /* child key pointer */ xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ int error; /* error return code */ -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_newroot"; -#endif #ifdef DEBUG int i; /* loop counter */ #endif @@ -2630,9 +2579,6 @@ xfs_bmbt_update( xfs_bmbt_block_t *block; xfs_buf_t *bp; int error; -#ifdef XFS_BMBT_TRACE - static char fname[] = "xfs_bmbt_update"; -#endif xfs_bmbt_key_t key; int ptr; xfs_bmbt_rec_t *rp; diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 4e27d55a1e73..6e40a0a198ff 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -444,30 +444,14 @@ xfs_btree_setbuf( /* * Min and max functions for extlen, agblock, fileoff, and filblks types. */ -#define XFS_EXTLEN_MIN(a,b) \ - ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \ - (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) -#define XFS_EXTLEN_MAX(a,b) \ - ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \ - (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) -#define XFS_AGBLOCK_MIN(a,b) \ - ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \ - (xfs_agblock_t)(a) : (xfs_agblock_t)(b)) -#define XFS_AGBLOCK_MAX(a,b) \ - ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \ - (xfs_agblock_t)(a) : (xfs_agblock_t)(b)) -#define XFS_FILEOFF_MIN(a,b) \ - ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \ - (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b)) -#define XFS_FILEOFF_MAX(a,b) \ - ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \ - (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b)) -#define XFS_FILBLKS_MIN(a,b) \ - ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \ - (xfs_filblks_t)(a) : (xfs_filblks_t)(b)) -#define XFS_FILBLKS_MAX(a,b) \ - ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \ - (xfs_filblks_t)(a) : (xfs_filblks_t)(b)) +#define XFS_EXTLEN_MIN(a,b) min_t(xfs_extlen_t, (a), (b)) +#define XFS_EXTLEN_MAX(a,b) max_t(xfs_extlen_t, (a), (b)) +#define XFS_AGBLOCK_MIN(a,b) min_t(xfs_agblock_t, (a), (b)) +#define XFS_AGBLOCK_MAX(a,b) max_t(xfs_agblock_t, (a), (b)) +#define XFS_FILEOFF_MIN(a,b) min_t(xfs_fileoff_t, (a), (b)) +#define XFS_FILEOFF_MAX(a,b) max_t(xfs_fileoff_t, (a), (b)) +#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) +#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) #define XFS_FSB_SANITY_CHECK(mp,fsb) \ (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 6c1bddc04e31..b0667cb27d66 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -580,8 +580,8 @@ xfs_buf_item_unlock( * If the buf item isn't tracking any data, free it. * Otherwise, if XFS_BLI_HOLD is set clear it. */ - if (xfs_count_bits(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size, 0) == 0) { + if (xfs_bitmap_empty(bip->bli_format.blf_data_map, + bip->bli_format.blf_map_size)) { xfs_buf_item_relse(bp); } else if (hold) { bip->bli_flags &= ~XFS_BLI_HOLD; diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index 5b7eb81453be..f89196cb08d2 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h @@ -99,5 +99,7 @@ struct xfs_mount_args { */ #define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred * I/O size in stat(2) */ +#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams + * allocator */ #endif /* __XFS_CLNT_H__ */ diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index b33826961c45..fefd0116bac9 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ +#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) +#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ - XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) + XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM) #endif /* __XFS_DINODE_H__ */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 8e8e5279334a..29e091914df4 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -55,9 +55,9 @@ xfs_dir_mount( XFS_MAX_BLOCKSIZE); mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; - mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp)); - mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); - mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp)); + mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); + mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); + mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) / (uint)sizeof(xfs_da_node_entry_t); @@ -554,7 +554,7 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp, sizeof(*mapp) * count); - *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno); + *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. */ @@ -706,7 +706,7 @@ xfs_dir2_shrink_inode( dp = args->dp; mp = dp->i_mount; tp = args->trans; - da = XFS_DIR2_DB_TO_DA(mp, db); + da = xfs_dir2_db_to_da(mp, db); /* * Unmap the fsblock(s). */ @@ -742,7 +742,7 @@ xfs_dir2_shrink_inode( /* * If the block isn't the last one in the directory, we're done. */ - if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0)) + if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0)) return 0; bno = da; if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 3accc1dcd6c9..e4df1aaae2a2 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -115,13 +115,13 @@ xfs_dir2_block_addname( xfs_da_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } - len = XFS_DIR2_DATA_ENTSIZE(args->namelen); + len = xfs_dir2_data_entsize(args->namelen); /* * Set up pointers to parts of the block. */ bf = block->hdr.bestfree; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * No stale entries? Need space for entry and new leaf. */ @@ -396,7 +396,7 @@ xfs_dir2_block_addname( * Fill in the leaf entry. */ blp[mid].hashval = cpu_to_be32(args->hashval); - blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, + blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* @@ -411,7 +411,7 @@ xfs_dir2_block_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); /* * Clean up the bestfree array and log the header, tail, and entry. @@ -455,7 +455,7 @@ xfs_dir2_block_getdents( /* * If the block number in the offset is out of range, we're done. */ - if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) { + if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) { *eofp = 1; return 0; } @@ -471,15 +471,15 @@ xfs_dir2_block_getdents( * Extract the byte offset we start at from the seek pointer. * We'll skip entries before this. */ - wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset); + wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset); block = bp->data; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. */ - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + btp = xfs_dir2_block_tail_p(mp, block); ptr = (char *)block->u; - endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); + endptr = (char *)xfs_dir2_block_leaf_p(btp); p.dbp = dbp; p.put = put; p.uio = uio; @@ -502,7 +502,7 @@ xfs_dir2_block_getdents( /* * Bump pointer for the next iteration. */ - ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + ptr += xfs_dir2_data_entsize(dep->namelen); /* * The entry is before the desired starting point, skip it. */ @@ -513,7 +513,7 @@ xfs_dir2_block_getdents( */ p.namelen = dep->namelen; - p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, ptr - (char *)block); p.ino = be64_to_cpu(dep->inumber); #if XFS_BIG_INUMS @@ -531,7 +531,7 @@ xfs_dir2_block_getdents( */ if (!p.done) { uio->uio_offset = - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (char *)dep - (char *)block); xfs_da_brelse(tp, bp); return error; @@ -545,7 +545,7 @@ xfs_dir2_block_getdents( *eofp = 1; uio->uio_offset = - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0); xfs_da_brelse(tp, bp); @@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf( mp = tp->t_mountp; block = bp->data; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), (uint)((char *)&blp[last + 1] - (char *)block - 1)); } @@ -589,7 +589,7 @@ xfs_dir2_block_log_tail( mp = tp->t_mountp; block = bp->data; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + btp = xfs_dir2_block_tail_p(mp, block); xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), (uint)((char *)(btp + 1) - (char *)block - 1)); } @@ -623,13 +623,13 @@ xfs_dir2_block_lookup( mp = dp->i_mount; block = bp->data; xfs_dir2_data_check(dp, bp); - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, release the block. */ @@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int( ASSERT(bp != NULL); block = bp->data; xfs_dir2_data_check(dp, bp); - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. * Find our entry, ENOENT if it's not there. @@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Compare, if it's right give back buffer & entry number. */ @@ -768,20 +768,20 @@ xfs_dir2_block_removename( tp = args->trans; mp = dp->i_mount; block = bp->data; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), - XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. */ @@ -843,13 +843,13 @@ xfs_dir2_block_replace( dp = args->dp; mp = dp->i_mount; block = bp->data; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address))); + ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. @@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block( mp = dp->i_mount; leaf = lbp->data; ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * If there are data blocks other than the first one, take this * opportunity to remove trailing empty data blocks that may have @@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block( * These will show up in the leaf bests table. */ while (dp->i_d.di_size > mp->m_dirblksize) { - bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); + bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == mp->m_dirblksize - (uint)sizeof(block->hdr)) { if ((error = @@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block( /* * Initialize the block tail. */ - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + btp = xfs_dir2_block_tail_p(mp, block); btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); /* * Initialize the block leaf area. We compact out stale entries. */ - lep = XFS_DIR2_BLOCK_LEAF_P(btp); + lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) continue; @@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Copy the directory into the stack buffer. * Then pitch the incore inode data so we can make extents. @@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block( /* * Fill in the tail. */ - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + btp = xfs_dir2_block_tail_p(mp, block); btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ btp->stale = 0; - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + blp = xfs_dir2_block_leaf_p(btp); endoffset = (uint)((char *)blp - (char *)block); /* * Remove the freespace, we'll manage it. @@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block( dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); - blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, + blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); - dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); - blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, + blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* @@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block( if ((i = 0) == sfp->hdr.count) sfep = NULL; else - sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + sfep = xfs_dir2_sf_firstentry(sfp); /* * Need to preserve the existing offset values in the sf directory. * Insert holes (unused entries) where necessary. @@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block( if (sfep == NULL) newoffset = endoffset; else - newoffset = XFS_DIR2_SF_GET_OFFSET(sfep); + newoffset = xfs_dir2_sf_get_offset(sfep); /* * There should be a hole here, make one. */ @@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block( ((char *)block + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); - *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16( + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)block)); xfs_dir2_data_log_unused(tp, bp, dup); (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, @@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block( * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); - dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, - XFS_DIR2_SF_INUMBERP(sfep))); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep))); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( (char *)sfep->name, sfep->namelen)); - blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp, + blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); offset = (int)((char *)(tagp + 1) - (char *)block); if (++i == sfp->hdr.count) sfep = NULL; else - sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ kmem_free(buf, buf_len); diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h index 6722effd0b20..e7c2606161e9 100644 --- a/fs/xfs/xfs_dir2_block.h +++ b/fs/xfs/xfs_dir2_block.h @@ -60,7 +60,6 @@ typedef struct xfs_dir2_block { /* * Pointer to the leaf header embedded in a data block (1-block format) */ -#define XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block) static inline xfs_dir2_block_tail_t * xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) { @@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) /* * Pointer to the leaf entries embedded in a data block (1-block format) */ -#define XFS_DIR2_BLOCK_LEAF_P(btp) xfs_dir2_block_leaf_p(btp) static inline struct xfs_dir2_leaf_entry * xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) { diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index c211c37ef67c..7ebe295bd6d3 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -72,8 +72,8 @@ xfs_dir2_data_check( bf = d->hdr.bestfree; p = (char *)d->u; if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); - lep = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; } else endp = (char *)d + mp->m_dirblksize; @@ -107,7 +107,7 @@ xfs_dir2_data_check( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(lastfree == 0); - ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) == + ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)d); dfp = xfs_dir2_data_freefind(d, dup); if (dfp) { @@ -131,12 +131,12 @@ xfs_dir2_data_check( dep = (xfs_dir2_data_entry_t *)p; ASSERT(dep->namelen != 0); ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); - ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) == + ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == (char *)dep - (char *)d); count++; lastfree = 0; if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) ((char *)dep - (char *)d)); hash = xfs_da_hashname((char *)dep->name, dep->namelen); @@ -147,7 +147,7 @@ xfs_dir2_data_check( } ASSERT(i < be32_to_cpu(btp->count)); } - p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + p += xfs_dir2_data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. @@ -346,8 +346,8 @@ xfs_dir2_data_freescan( */ p = (char *)d->u; if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); - endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + endp = (char *)xfs_dir2_block_leaf_p(btp); } else endp = (char *)d + mp->m_dirblksize; /* @@ -360,7 +360,7 @@ xfs_dir2_data_freescan( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT((char *)dup - (char *)d == - be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); xfs_dir2_data_freeinsert(d, dup, loghead); p += be16_to_cpu(dup->length); } @@ -370,8 +370,8 @@ xfs_dir2_data_freescan( else { dep = (xfs_dir2_data_entry_t *)p; ASSERT((char *)dep - (char *)d == - be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep))); - p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); + p += xfs_dir2_data_entsize(dep->namelen); } } } @@ -402,7 +402,7 @@ xfs_dir2_data_init( /* * Get the buffer set up for the block. */ - error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp, + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, XFS_DATA_FORK); if (error) { return error; @@ -427,7 +427,7 @@ xfs_dir2_data_init( t=mp->m_dirblksize - (uint)sizeof(d->hdr); d->hdr.bestfree[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); - *XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); /* * Log it and return it. */ @@ -452,7 +452,7 @@ xfs_dir2_data_log_entry( ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), - (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) - + (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - (char *)d - 1)); } @@ -497,8 +497,8 @@ xfs_dir2_data_log_unused( * Log the end (tag) of the unused entry. */ xfs_da_log_buf(tp, bp, - (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d), - (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d + + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + sizeof(xfs_dir2_data_off_t) - 1)); } @@ -535,8 +535,8 @@ xfs_dir2_data_make_free( xfs_dir2_block_tail_t *btp; /* block tail */ ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); - endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to @@ -587,7 +587,7 @@ xfs_dir2_data_make_free( * Fix up the new big freespace. */ be16_add(&prevdup->length, len + be16_to_cpu(postdup->length)); - *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = + *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); if (!needscan) { @@ -621,7 +621,7 @@ xfs_dir2_data_make_free( else if (prevdup) { dfp = xfs_dir2_data_freefind(d, prevdup); be16_add(&prevdup->length, len); - *XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) = + *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); /* @@ -649,7 +649,7 @@ xfs_dir2_data_make_free( newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = + *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* @@ -676,7 +676,7 @@ xfs_dir2_data_make_free( newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = + *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); @@ -712,7 +712,7 @@ xfs_dir2_data_use_free( ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); ASSERT(offset >= (char *)dup - (char *)d); ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); - ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup))); + ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); /* * Look up the entry in the bestfree table. */ @@ -745,7 +745,7 @@ xfs_dir2_data_use_free( newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = + *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* @@ -772,7 +772,7 @@ xfs_dir2_data_use_free( else if (matchback) { newdup = dup; newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = + *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); /* @@ -799,13 +799,13 @@ xfs_dir2_data_use_free( else { newdup = dup; newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup) = + *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup); newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); - *XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) = + *xfs_dir2_data_unused_tag_p(newdup2) = cpu_to_be16((char *)newdup2 - (char *)d); xfs_dir2_data_log_unused(tp, bp, newdup2); /* diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index c94c9099cfb1..b816e0252739 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h @@ -44,7 +44,7 @@ struct xfs_trans; #define XFS_DIR2_DATA_SPACE 0 #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) #define XFS_DIR2_DATA_FIRSTDB(mp) \ - XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET) + xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) /* * Offsets of . and .. in data space (always block 0) @@ -52,9 +52,9 @@ struct xfs_trans; #define XFS_DIR2_DATA_DOT_OFFSET \ ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) #define XFS_DIR2_DATA_DOTDOT_OFFSET \ - (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1)) + (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) #define XFS_DIR2_DATA_FIRST_OFFSET \ - (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2)) + (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) /* * Structures. @@ -123,7 +123,6 @@ typedef struct xfs_dir2_data { /* * Size of a data entry. */ -#define XFS_DIR2_DATA_ENTSIZE(n) xfs_dir2_data_entsize(n) static inline int xfs_dir2_data_entsize(int n) { return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ @@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n) /* * Pointer to an entry's tag word. */ -#define XFS_DIR2_DATA_ENTRY_TAG_P(dep) xfs_dir2_data_entry_tag_p(dep) static inline __be16 * xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) { return (__be16 *)((char *)dep + - XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16)); + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); } /* * Pointer to a freespace's tag word. */ -#define XFS_DIR2_DATA_UNUSED_TAG_P(dup) \ - xfs_dir2_data_unused_tag_p(dup) static inline __be16 * xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) { diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index db14ea71459f..1b73c9ad646a 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf( if ((error = xfs_da_grow_inode(args, &blkno))) { return error; } - ldb = XFS_DIR2_DA_TO_DB(mp, blkno); + ldb = xfs_dir2_da_to_db(mp, blkno); ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); /* * Initialize the leaf block, get a buffer for it. @@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf( leaf = lbp->data; block = dbp->data; xfs_dir2_data_check(dp, dbp); - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Set the counts in the leaf header. */ @@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf( /* * Set up leaf tail and bests table. */ - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = cpu_to_be32(1); - bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); + bestsp = xfs_dir2_leaf_bests_p(ltp); bestsp[0] = block->hdr.bestfree[0].length; /* * Log the data header and leaf bests table. @@ -209,9 +209,9 @@ xfs_dir2_leaf_addname( */ index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->data; - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); - bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); - length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); + length = xfs_dir2_data_entsize(args->namelen); /* * See if there are any entries with the same hash value * and space in their block for the new entry. @@ -223,7 +223,7 @@ xfs_dir2_leaf_addname( index++, lep++) { if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) continue; - i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); + i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); if (be16_to_cpu(bestsp[i]) >= length) { @@ -378,7 +378,7 @@ xfs_dir2_leaf_addname( */ else { if ((error = - xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block), + xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block), -1, &dbp, XFS_DATA_FORK))) { xfs_da_brelse(tp, lbp); return error; @@ -407,7 +407,7 @@ xfs_dir2_leaf_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)data); /* * Need to scan fix up the bestfree table. @@ -529,7 +529,7 @@ xfs_dir2_leaf_addname( * Fill in the new leaf entry. */ lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, + lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block, be16_to_cpu(*tagp))); /* * Log the leaf fields and give up the buffers. @@ -567,13 +567,13 @@ xfs_dir2_leaf_check( * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * Leaves and bests don't overlap. */ ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <= - (char *)XFS_DIR2_LEAF_BESTS_P(ltp)); + (char *)xfs_dir2_leaf_bests_p(ltp)); /* * Check hash value order, count stale entries. */ @@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents( * Inside the loop we keep the main offset value as a byte offset * in the directory file. */ - curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset); + curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset); /* * Force this conversion through db so we truncate the offset * down to get the start of the data block. */ - map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff)); + map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff)); /* * Loop over directory entries until we reach the end offset. * Get more blocks and readahead as necessary. @@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents( */ if (1 + ra_want > map_blocks && map_off < - XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) { + xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { /* * Get more bmaps, fill in after the ones * we already have in the table. @@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents( nmap = map_size - map_valid; error = xfs_bmapi(tp, dp, map_off, - XFS_DIR2_BYTE_TO_DA(mp, + xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - map_off, XFS_BMAPI_METADATA, NULL, 0, &map[map_valid], &nmap, NULL, NULL); @@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents( map[map_valid + nmap - 1].br_blockcount; else map_off = - XFS_DIR2_BYTE_TO_DA(mp, + xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET); /* * Look for holes in the mapping, and @@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents( * No valid mappings, so no more data blocks. */ if (!map_valid) { - curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off); + curoff = xfs_dir2_da_to_byte(mp, map_off); break; } /* * Read the directory block starting at the first * mapping. */ - curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff); + curdb = xfs_dir2_da_to_db(mp, map->br_startoff); error = xfs_da_read_buf(tp, dp, map->br_startoff, map->br_blockcount >= mp->m_dirblkfsbs ? XFS_FSB_TO_DADDR(mp, map->br_startblock) : @@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents( /* * Having done a read, we need to set a new offset. */ - newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0); + newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0); /* * Start of the current block. */ @@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents( * Make sure we're in the right block. */ else if (curoff > newoff) - ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) == + ASSERT(xfs_dir2_byte_to_db(mp, curoff) == curdb); data = bp->data; xfs_dir2_data_check(dp, bp); @@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents( * Find our position in the block. */ ptr = (char *)&data->u; - byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff); + byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ @@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents( } dep = (xfs_dir2_data_entry_t *)ptr; length = - XFS_DIR2_DATA_ENTSIZE(dep->namelen); + xfs_dir2_data_entsize(dep->namelen); ptr += length; } /* * Now set our real offset. */ curoff = - XFS_DIR2_DB_OFF_TO_BYTE(mp, - XFS_DIR2_BYTE_TO_DB(mp, curoff), + xfs_dir2_db_off_to_byte(mp, + xfs_dir2_byte_to_db(mp, curoff), (char *)ptr - (char *)data); if (ptr >= (char *)data + mp->m_dirblksize) { continue; @@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents( p->namelen = dep->namelen; - length = XFS_DIR2_DATA_ENTSIZE(p->namelen); + length = xfs_dir2_data_entsize(p->namelen); - p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length); + p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length); p->ino = be64_to_cpu(dep->inumber); #if XFS_BIG_INUMS @@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents( * All done. Set output offset value to current offset. */ *eofp = eof; - if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR)) + if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) uio->uio_offset = XFS_DIR2_MAX_DATAPTR; else - uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff); + uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff); kmem_free(map, map_size * sizeof(*map)); kmem_free(p, sizeof(*p)); if (bp) @@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init( /* * Get the buffer for the block. */ - error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp, + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, XFS_DATA_FORK); if (error) { return error; @@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init( * the block. */ if (magic == XFS_DIR2_LEAF1_MAGIC) { - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = 0; xfs_dir2_leaf_log_tail(tp, bp); } @@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests( leaf = bp->data; ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); - ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf); - firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first; - lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last; + ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); + firstb = xfs_dir2_leaf_bests_p(ltp) + first; + lastb = xfs_dir2_leaf_bests_p(ltp) + last; xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); } @@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail( mp = tp->t_mountp; leaf = bp->data; ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); } @@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->data + - XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); /* * Return the found inode number. */ @@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int( /* * Get the new data block number. */ - newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); /* * If it's not the same as the old data block number, * need to pitch the old one and read the new one. @@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int( xfs_da_brelse(tp, dbp); if ((error = xfs_da_read_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp, + xfs_dir2_db_to_da(mp, newdb), -1, &dbp, XFS_DATA_FORK))) { xfs_da_brelse(tp, lbp); return error; @@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->data + - XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * If it matches then return it. */ @@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename( * Point to the leaf entry, use that to point to the data entry. */ lep = &leaf->ents[index]; - db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); + db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *) - ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); + ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); needscan = needlog = 0; oldbest = be16_to_cpu(data->hdr.bestfree[0].length); - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); - bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); /* * Mark the former data entry unused. */ xfs_dir2_data_make_free(tp, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), - XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. */ @@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->data + - XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* * Put the new inode number in, log it. @@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data( /* * Read the offending data block. We need its buffer. */ - if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp, + if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp, XFS_DATA_FORK))) { return error; } @@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data( */ leaf = lbp->data; - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == mp->m_dirblksize - (uint)sizeof(data->hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); @@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data( /* * Eliminate the last bests entry from the table. */ - bestsp = XFS_DIR2_LEAF_BESTS_P(ltp); + bestsp = xfs_dir2_leaf_bests_p(ltp); be32_add(<p->bestcount, -1); memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); xfs_dir2_leaf_log_tail(tp, lbp); @@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf tail from the freespace block. */ - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = free->hdr.nvalid; /* * Set up the leaf bests table. */ - memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests, + memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir2_leaf_log_tail(tp, lbp); diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h index f57ca1162412..70c97f3f815e 100644 --- a/fs/xfs/xfs_dir2_leaf.h +++ b/fs/xfs/xfs_dir2_leaf.h @@ -32,7 +32,7 @@ struct xfs_trans; #define XFS_DIR2_LEAF_SPACE 1 #define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) #define XFS_DIR2_LEAF_FIRSTDB(mp) \ - XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET) + xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) /* * Offset in data space of a data entry. @@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf { * DB blocks here are logical directory block numbers, not filesystem blocks. */ -#define XFS_DIR2_MAX_LEAF_ENTS(mp) xfs_dir2_max_leaf_ents(mp) static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) { return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / @@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) /* * Get address of the bestcount field in the single-leaf block. */ -#define XFS_DIR2_LEAF_TAIL_P(mp,lp) xfs_dir2_leaf_tail_p(mp, lp) static inline xfs_dir2_leaf_tail_t * xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) { @@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) /* * Get address of the bests array in the single-leaf block. */ -#define XFS_DIR2_LEAF_BESTS_P(ltp) xfs_dir2_leaf_bests_p(ltp) static inline __be16 * xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) { @@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) /* * Convert dataptr to byte in file space */ -#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp) static inline xfs_dir2_off_t xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) { @@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) /* * Convert byte in file space to dataptr. It had better be aligned. */ -#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by) static inline xfs_dir2_dataptr_t xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) { @@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) /* * Convert byte in space to (DB) block */ -#define XFS_DIR2_BYTE_TO_DB(mp,by) xfs_dir2_byte_to_db(mp, by) static inline xfs_dir2_db_t xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) { @@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) /* * Convert dataptr to a block number */ -#define XFS_DIR2_DATAPTR_TO_DB(mp,dp) xfs_dir2_dataptr_to_db(mp, dp) static inline xfs_dir2_db_t xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) { - return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); + return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); } /* * Convert byte in space to offset in a block */ -#define XFS_DIR2_BYTE_TO_OFF(mp,by) xfs_dir2_byte_to_off(mp, by) static inline xfs_dir2_data_aoff_t xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) { @@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) /* * Convert dataptr to a byte offset in a block */ -#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp) xfs_dir2_dataptr_to_off(mp, dp) static inline xfs_dir2_data_aoff_t xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) { - return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)); + return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); } /* * Convert block and offset to byte in space */ -#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o) \ - xfs_dir2_db_off_to_byte(mp, db, o) static inline xfs_dir2_off_t xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, xfs_dir2_data_aoff_t o) @@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, /* * Convert block (DB) to block (dablk) */ -#define XFS_DIR2_DB_TO_DA(mp,db) xfs_dir2_db_to_da(mp, db) static inline xfs_dablk_t xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) { @@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) /* * Convert byte in space to (DA) block */ -#define XFS_DIR2_BYTE_TO_DA(mp,by) xfs_dir2_byte_to_da(mp, by) static inline xfs_dablk_t xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) { - return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by)); + return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); } /* * Convert block and offset to dataptr */ -#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o) \ - xfs_dir2_db_off_to_dataptr(mp, db, o) static inline xfs_dir2_dataptr_t xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, xfs_dir2_data_aoff_t o) { - return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o)); + return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); } /* * Convert block (dablk) to block (DB) */ -#define XFS_DIR2_DA_TO_DB(mp,da) xfs_dir2_da_to_db(mp, da) static inline xfs_dir2_db_t xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) { @@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) /* * Convert block (dablk) to byte offset in space */ -#define XFS_DIR2_DA_TO_BYTE(mp,da) xfs_dir2_da_to_byte(mp, da) static inline xfs_dir2_off_t xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) { - return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0); + return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); } /* diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index d083c3819934..91c61d9632c8 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node( /* * Get the buffer for the new freespace block. */ - if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp, + if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, XFS_DATA_FORK))) { return error; } ASSERT(fbp != NULL); free = fbp->data; leaf = lbp->data; - ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * Initialize the freespace block header. */ @@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node( * Copy freespace entries from the leaf block to the new block. * Count active entries. */ - for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests; + for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { if ((off = be16_to_cpu(*from)) != NULLDATAOFF) n++; @@ -215,7 +215,7 @@ xfs_dir2_leafn_add( * a compact. */ - if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) { + if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) { if (!leaf->hdr.stale) return XFS_ERROR(ENOSPC); compact = be16_to_cpu(leaf->hdr.stale) > 1; @@ -327,7 +327,7 @@ xfs_dir2_leafn_add( * Insert the new entry, log everything. */ lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, + lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); xfs_dir2_leaf_log_header(tp, bp); xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); @@ -352,7 +352,7 @@ xfs_dir2_leafn_check( leaf = bp->data; mp = dp->i_mount; ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); + ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { if (i + 1 < be16_to_cpu(leaf->hdr.count)) { ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= @@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int( if (args->addname) { curfdb = curbp ? state->extrablk.blkno : -1; curdb = -1; - length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + length = xfs_dir2_data_entsize(args->namelen); if ((free = (curbp ? curbp->data : NULL))) ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } @@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int( /* * Pull the data block number from the entry. */ - newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); /* * For addname, we're looking for a place to put the new entry. * We want to use a data block with an entry of equal @@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int( * Convert the data block to the free block * holding its freespace information. */ - newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb); + newfdb = xfs_dir2_db_to_fdb(mp, newdb); /* * If it's not the one we have in hand, * read it in. @@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int( * Read the free block. */ if ((error = xfs_da_read_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, + xfs_dir2_db_to_da(mp, newfdb), -1, &curbp, XFS_DATA_FORK))) { @@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int( /* * Get the index for our entry. */ - fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb); + fi = xfs_dir2_db_to_fdindex(mp, curdb); /* * If it has room, return it. */ @@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int( */ if ((error = xfs_da_read_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, newdb), -1, + xfs_dir2_db_to_da(mp, newdb), -1, &curbp, XFS_DATA_FORK))) { return error; } @@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int( */ dep = (xfs_dir2_data_entry_t *) ((char *)curbp->data + - XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* * Compare the entry, return it if it matches. */ @@ -876,9 +876,9 @@ xfs_dir2_leafn_remove( /* * Extract the data block and offset from the entry. */ - db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address)); + db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(dblk->blkno == db); - off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)); + off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); ASSERT(dblk->index == off); /* * Kill the leaf entry by marking it stale. @@ -898,7 +898,7 @@ xfs_dir2_leafn_remove( longest = be16_to_cpu(data->hdr.bestfree[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(tp, dbp, off, - XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Rescan the data block freespaces for bestfree. * Log the data block header if needed. @@ -924,8 +924,8 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = XFS_DIR2_DB_TO_FDB(mp, db); - if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), + fdb = xfs_dir2_db_to_fdb(mp, db); + if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, XFS_DATA_FORK))) { return error; } @@ -937,7 +937,7 @@ xfs_dir2_leafn_remove( /* * Calculate which entry we need to fix. */ - findex = XFS_DIR2_DB_TO_FDINDEX(mp, db); + findex = xfs_dir2_db_to_fdindex(mp, db); longest = be16_to_cpu(data->hdr.bestfree[0].length); /* * If the data block is now empty we can get rid of it @@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split( /* * Initialize the new leaf block. */ - error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno), + error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno), &newblk->bp, XFS_DIR2_LEAFN_MAGIC); if (error) { return error; @@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int( dp = args->dp; mp = dp->i_mount; tp = args->trans; - length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + length = xfs_dir2_data_entsize(args->namelen); /* * If we came in with a freespace block that means that lookup * found an entry with our hash value. This is the freespace @@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int( if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) return error; - lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo); + lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo); fbno = ifbno; } /* @@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int( * to avoid it. */ if ((error = xfs_da_read_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, + xfs_dir2_db_to_da(mp, fbno), -2, &fbp, XFS_DATA_FORK))) { return error; } @@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = XFS_DIR2_DB_TO_FDB(mp, dbno); + fbno = xfs_dir2_db_to_fdb(mp, dbno); if (unlikely(error = xfs_da_read_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp, + xfs_dir2_db_to_da(mp, fbno), -2, &fbp, XFS_DATA_FORK))) { xfs_da_buf_done(dbp); return error; @@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int( return error; } - if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) { + if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { cmn_err(CE_ALERT, "xfs_dir2_node_addname_int: dir ino " "%llu needed freesp block %lld for\n" " data block %lld, got %lld\n" " ifbno %llu lastfbno %d\n", (unsigned long long)dp->i_ino, - (long long)XFS_DIR2_DB_TO_FDB(mp, dbno), + (long long)xfs_dir2_db_to_fdb(mp, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { @@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int( * Get a buffer for the new block. */ if ((error = xfs_da_get_buf(tp, dp, - XFS_DIR2_DB_TO_DA(mp, fbno), + xfs_dir2_db_to_da(mp, fbno), -1, &fbp, XFS_DATA_FORK))) { return error; } @@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int( /* * Set the freespace block index from the data block number. */ - findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno); + findex = xfs_dir2_db_to_fdindex(mp, dbno); /* * If it's after the end of the current entries in the * freespace block, extend that table. @@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int( * Read the data block in. */ if (unlikely( - error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno), + error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno), -1, &dbp, XFS_DATA_FORK))) { if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) xfs_da_buf_done(fbp); @@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)data); xfs_dir2_data_log_entry(tp, dbp, dep); /* @@ -1904,7 +1904,7 @@ xfs_dir2_node_replace( ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); dep = (xfs_dir2_data_entry_t *) ((char *)data + - XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* * Fill in the new inode number and log the entry. @@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free( * Blow the block away. */ if ((error = - xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo), + xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo), bp))) { /* * Can't fail with ENOSPC since that only happens with no diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h index c7c870ee7857..dde72db3d695 100644 --- a/fs/xfs/xfs_dir2_node.h +++ b/fs/xfs/xfs_dir2_node.h @@ -36,7 +36,7 @@ struct xfs_trans; #define XFS_DIR2_FREE_SPACE 2 #define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) #define XFS_DIR2_FREE_FIRSTDB(mp) \ - XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET) + xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) #define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ @@ -60,7 +60,6 @@ typedef struct xfs_dir2_free { /* * Convert data space db to the corresponding free db. */ -#define XFS_DIR2_DB_TO_FDB(mp,db) xfs_dir2_db_to_fdb(mp, db) static inline xfs_dir2_db_t xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) { @@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) /* * Convert data space db to the corresponding index in a free db. */ -#define XFS_DIR2_DB_TO_FDINDEX(mp,db) xfs_dir2_db_to_fdindex(mp, db) static inline int xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) { diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 0cd77b17bf92..38fc4f22b76d 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -89,8 +89,8 @@ xfs_dir2_block_sfsize( mp = dp->i_mount; count = i8count = namelen = 0; - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); - blp = XFS_DIR2_BLOCK_LEAF_P(btp); + btp = xfs_dir2_block_tail_p(mp, block); + blp = xfs_dir2_block_leaf_p(btp); /* * Iterate over the block's data entries by using the leaf pointers. @@ -102,7 +102,7 @@ xfs_dir2_block_sfsize( * Calculate the pointer to the entry at hand. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); + ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -124,7 +124,7 @@ xfs_dir2_block_sfsize( /* * Calculate the new size, see if we should give up yet. */ - size = XFS_DIR2_SF_HDR_SIZE(i8count) + /* header */ + size = xfs_dir2_sf_hdr_size(i8count) + /* header */ count + /* namelen */ count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ namelen + /* name */ @@ -139,7 +139,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); + xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); return size; } @@ -199,15 +199,15 @@ xfs_dir2_block_to_sf( * Copy the header into the newly allocate local space. */ sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count)); + memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); dp->i_d.di_size = size; /* * Set up to loop over the block's entries. */ - btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + btp = xfs_dir2_block_tail_p(mp, block); ptr = (char *)block->u; - endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); - sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + endptr = (char *)xfs_dir2_block_leaf_p(btp); + sfep = xfs_dir2_sf_firstentry(sfp); /* * Loop over the active and unused entries. * Stop when we reach the leaf/tail portion of the block. @@ -233,22 +233,22 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent)); + xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); /* * Normal entry, copy it into shortform. */ else { sfep->namelen = dep->namelen; - XFS_DIR2_SF_PUT_OFFSET(sfep, + xfs_dir2_sf_put_offset(sfep, (xfs_dir2_data_aoff_t) ((char *)dep - (char *)block)); memcpy(sfep->name, dep->name, dep->namelen); temp = be64_to_cpu(dep->inumber); - XFS_DIR2_SF_PUT_INUMBER(sfp, &temp, - XFS_DIR2_SF_INUMBERP(sfep)); - sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + xfs_dir2_sf_put_inumber(sfp, &temp, + xfs_dir2_sf_inumberp(sfep)); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); } - ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + ptr += xfs_dir2_data_entsize(dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); xfs_dir2_sf_check(args); @@ -294,11 +294,11 @@ xfs_dir2_sf_addname( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Compute entry (and change in) size. */ - add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); + add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS @@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy( /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. @@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy( * Fill in the new entry. */ sfep->namelen = args->namelen; - XFS_DIR2_SF_PUT_OFFSET(sfep, offset); + xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, - XFS_DIR2_SF_INUMBERP(sfep)); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); /* * Update the header and inode. */ @@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard( * If it's going to end up at the end then oldsfep will point there. */ for (offset = XFS_DIR2_DATA_FIRST_OFFSET, - oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp), - add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen), + oldsfep = xfs_dir2_sf_firstentry(oldsfp), + add_datasize = xfs_dir2_data_entsize(args->namelen), eof = (char *)oldsfep == &buf[old_isize]; !eof; - offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen), - oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep), + offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen), + oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep), eof = (char *)oldsfep == &buf[old_isize]) { - new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep); + new_offset = xfs_dir2_sf_get_offset(oldsfep); if (offset + add_datasize <= new_offset) break; } @@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard( * Fill in the new entry, and update the header counts. */ sfep->namelen = args->namelen; - XFS_DIR2_SF_PUT_OFFSET(sfep, offset); + xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, - XFS_DIR2_SF_INUMBERP(sfep)); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); sfp->hdr.count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) @@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard( * If there's more left to copy, do that. */ if (!eof) { - sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } kmem_free(buf, old_isize); @@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick( mp = dp->i_mount; sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - size = XFS_DIR2_DATA_ENTSIZE(args->namelen); + size = xfs_dir2_data_entsize(args->namelen); offset = XFS_DIR2_DATA_FIRST_OFFSET; - sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* * Loop over sf entries. @@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick( */ for (i = 0; i < sfp->hdr.count; i++) { if (!holefit) - holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep); - offset = XFS_DIR2_SF_GET_OFFSET(sfep) + - XFS_DIR2_DATA_ENTSIZE(sfep->namelen); - sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); + offset = xfs_dir2_sf_get_offset(sfep) + + xfs_dir2_data_entsize(sfep->namelen); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* * Calculate data bytes used excluding the new entry, if this @@ -617,18 +617,18 @@ xfs_dir2_sf_check( sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; offset = XFS_DIR2_DATA_FIRST_OFFSET; - ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { - ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset); - ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { + ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); + ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = - XFS_DIR2_SF_GET_OFFSET(sfep) + - XFS_DIR2_DATA_ENTSIZE(sfep->namelen); + xfs_dir2_sf_get_offset(sfep) + + xfs_dir2_data_entsize(sfep->namelen); } ASSERT(i8count == sfp->hdr.i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); @@ -671,7 +671,7 @@ xfs_dir2_sf_create( ASSERT(dp->i_df.if_flags & XFS_IFINLINE); ASSERT(dp->i_df.if_bytes == 0); i8count = pino > XFS_DIR2_MAX_SHORT_INUM; - size = XFS_DIR2_SF_HDR_SIZE(i8count); + size = xfs_dir2_sf_hdr_size(i8count); /* * Make a buffer for the data. */ @@ -684,7 +684,7 @@ xfs_dir2_sf_create( /* * Now can put in the inode number, since i8count is set. */ - XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); sfp->hdr.count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); @@ -727,12 +727,12 @@ xfs_dir2_sf_getdents( sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * If the block number in the offset is out of range, we're done. */ - if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) { + if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) { *eofp = 1; return 0; } @@ -747,9 +747,9 @@ xfs_dir2_sf_getdents( * Put . entry unless we're starting past it. */ if (dir_offset <= - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, XFS_DIR2_DATA_DOT_OFFSET)) { - p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0, + p.cook = xfs_dir2_db_off_to_dataptr(mp, 0, XFS_DIR2_DATA_DOTDOT_OFFSET); p.ino = dp->i_ino; #if XFS_BIG_INUMS @@ -762,7 +762,7 @@ xfs_dir2_sf_getdents( if (!p.done) { uio->uio_offset = - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, XFS_DIR2_DATA_DOT_OFFSET); return error; } @@ -772,11 +772,11 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (dir_offset <= - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, XFS_DIR2_DATA_DOTDOT_OFFSET)) { - p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, XFS_DIR2_DATA_FIRST_OFFSET); - p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); + p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); #if XFS_BIG_INUMS p.ino += mp->m_inoadd; #endif @@ -787,7 +787,7 @@ xfs_dir2_sf_getdents( if (!p.done) { uio->uio_offset = - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, XFS_DIR2_DATA_DOTDOT_OFFSET); return error; } @@ -796,23 +796,23 @@ xfs_dir2_sf_getdents( /* * Loop while there are more entries and put'ing works. */ - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { - off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, - XFS_DIR2_SF_GET_OFFSET(sfep)); + off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + xfs_dir2_sf_get_offset(sfep)); if (dir_offset > off) continue; p.namelen = sfep->namelen; - p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, - XFS_DIR2_SF_GET_OFFSET(sfep) + - XFS_DIR2_DATA_ENTSIZE(p.namelen)); + p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + xfs_dir2_sf_get_offset(sfep) + + xfs_dir2_data_entsize(p.namelen)); - p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep)); + p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); #if XFS_BIG_INUMS p.ino += mp->m_inoadd; #endif @@ -832,7 +832,7 @@ xfs_dir2_sf_getdents( *eofp = 1; uio->uio_offset = - XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0); + xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0); return 0; } @@ -865,7 +865,7 @@ xfs_dir2_sf_lookup( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Special case for . */ @@ -878,21 +878,21 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); + args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); return XFS_ERROR(EEXIST); } /* * Loop over all the entries trying to match ours. */ - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (sfep->namelen == args->namelen && sfep->name[0] == args->name[0] && memcmp(args->name, sfep->name, args->namelen) == 0) { args->inumber = - XFS_DIR2_SF_GET_INUMBER(sfp, - XFS_DIR2_SF_INUMBERP(sfep)); + xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); return XFS_ERROR(EEXIST); } } @@ -934,19 +934,19 @@ xfs_dir2_sf_removename( ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); /* * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (sfep->namelen == args->namelen && sfep->name[0] == args->name[0] && memcmp(sfep->name, args->name, args->namelen) == 0) { - ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp, - XFS_DIR2_SF_INUMBERP(sfep)) == + ASSERT(xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)) == args->inumber); break; } @@ -961,7 +961,7 @@ xfs_dir2_sf_removename( * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); #if XFS_BIG_INUMS /* * New inode number is large, and need to convert to 8-byte inodes. @@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); ASSERT(args->inumber != ino); #endif - XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); } /* * Normal entry, look for the name. */ else { - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (sfep->namelen == args->namelen && sfep->name[0] == args->name[0] && memcmp(args->name, sfep->name, args->namelen) == 0) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = XFS_DIR2_SF_GET_INUMBER(sfp, - XFS_DIR2_SF_INUMBERP(sfep)); + ino = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); ASSERT(args->inumber != ino); #endif - XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, - XFS_DIR2_SF_INUMBERP(sfep)); + xfs_dir2_sf_put_inumber(sfp, &args->inumber, + xfs_dir2_sf_inumberp(sfep)); break; } } @@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4( */ sfp->hdr.count = oldsfp->hdr.count; sfp->hdr.i8count = 0; - ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); - XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), - oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), + oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), - oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), + oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, - XFS_DIR2_SF_INUMBERP(oldsfep)); - XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. @@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8( */ sfp->hdr.count = oldsfp->hdr.count; sfp->hdr.i8count = 1; - ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent); - XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); + xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); /* * Copy the entries field by field. */ - for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), - oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), + oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->hdr.count; - i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), - oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), + oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, - XFS_DIR2_SF_INUMBERP(oldsfep)); - XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep)); + ino = xfs_dir2_sf_get_inumber(oldsfp, + xfs_dir2_sf_inumberp(oldsfep)); + xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); } /* * Clean up the inode. diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h index 42f015b70018..11e503209afa 100644 --- a/fs/xfs/xfs_dir2_sf.h +++ b/fs/xfs/xfs_dir2_sf.h @@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf { xfs_dir2_sf_entry_t list[1]; /* shortform entries */ } xfs_dir2_sf_t; -#define XFS_DIR2_SF_HDR_SIZE(i8count) xfs_dir2_sf_hdr_size(i8count) static inline int xfs_dir2_sf_hdr_size(int i8count) { return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ @@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count) ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); } -#define XFS_DIR2_SF_INUMBERP(sfep) xfs_dir2_sf_inumberp(sfep) static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) { return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; } -#define XFS_DIR2_SF_GET_INUMBER(sfp, from) \ - xfs_dir2_sf_get_inumber(sfp, from) static inline xfs_intino_t xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) { @@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); } -#define XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \ - xfs_dir2_sf_put_inumber(sfp,from,to) static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, xfs_dir2_inou_t *to) { @@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, XFS_PUT_DIR_INO8(*(from), (to)->i8); } -#define XFS_DIR2_SF_GET_OFFSET(sfep) \ - xfs_dir2_sf_get_offset(sfep) static inline xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) { return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); } -#define XFS_DIR2_SF_PUT_OFFSET(sfep,off) \ - xfs_dir2_sf_put_offset(sfep,off) static inline void xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) { INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); } -#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len) \ - xfs_dir2_sf_entsize_byname(sfp,len) static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) { return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ @@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); } -#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep) \ - xfs_dir2_sf_entsize_byentry(sfp,sfep) static inline int xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) { @@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); } -#define XFS_DIR2_SF_FIRSTENTRY(sfp) xfs_dir2_sf_firstentry(sfp) static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) { return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count))); + ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count))); } -#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) xfs_dir2_sf_nextentry(sfp,sfep) static inline xfs_dir2_sf_entry_t * xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) { return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep))); + ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep))); } /* diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c new file mode 100644 index 000000000000..ce2278611bb7 --- /dev/null +++ b/fs/xfs/xfs_filestream.c @@ -0,0 +1,771 @@ +/* + * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_bmap_btree.h" +#include "xfs_inum.h" +#include "xfs_dir2.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ag.h" +#include "xfs_dmapi.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_bmap.h" +#include "xfs_alloc.h" +#include "xfs_utils.h" +#include "xfs_mru_cache.h" +#include "xfs_filestream.h" + +#ifdef XFS_FILESTREAMS_TRACE + +ktrace_t *xfs_filestreams_trace_buf; + +STATIC void +xfs_filestreams_trace( + xfs_mount_t *mp, /* mount point */ + int type, /* type of trace */ + const char *func, /* source function */ + int line, /* source line number */ + __psunsigned_t arg0, + __psunsigned_t arg1, + __psunsigned_t arg2, + __psunsigned_t arg3, + __psunsigned_t arg4, + __psunsigned_t arg5) +{ + ktrace_enter(xfs_filestreams_trace_buf, + (void *)(__psint_t)(type | (line << 16)), + (void *)func, + (void *)(__psunsigned_t)current_pid(), + (void *)mp, + (void *)(__psunsigned_t)arg0, + (void *)(__psunsigned_t)arg1, + (void *)(__psunsigned_t)arg2, + (void *)(__psunsigned_t)arg3, + (void *)(__psunsigned_t)arg4, + (void *)(__psunsigned_t)arg5, + NULL, NULL, NULL, NULL, NULL, NULL); +} + +#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) +#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) +#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) +#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) +#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) +#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) +#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ + xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \ + (__psunsigned_t)a0, (__psunsigned_t)a1, \ + (__psunsigned_t)a2, (__psunsigned_t)a3, \ + (__psunsigned_t)a4, (__psunsigned_t)a5) + +#define TRACE_AG_SCAN(mp, ag, ag2) \ + TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); +#define TRACE_AG_PICK1(mp, max_ag, maxfree) \ + TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); +#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ + TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ + cnt, free, scan, flag) +#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ + TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) +#define TRACE_FREE(mp, ip, pip, ag, cnt) \ + TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) +#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ + TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) +#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ + TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) +#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ + TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) +#define TRACE_ORPHAN(mp, ip, ag) \ + TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); + + +#else +#define TRACE_AG_SCAN(mp, ag, ag2) +#define TRACE_AG_PICK1(mp, max_ag, maxfree) +#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) +#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) +#define TRACE_FREE(mp, ip, pip, ag, cnt) +#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) +#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) +#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) +#define TRACE_ORPHAN(mp, ip, ag) +#endif + +static kmem_zone_t *item_zone; + +/* + * Structure for associating a file or a directory with an allocation group. + * The parent directory pointer is only needed for files, but since there will + * generally be vastly more files than directories in the cache, using the same + * data structure simplifies the code with very little memory overhead. + */ +typedef struct fstrm_item +{ + xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ + xfs_inode_t *ip; /* inode self-pointer. */ + xfs_inode_t *pip; /* Parent directory inode pointer. */ +} fstrm_item_t; + + +/* + * Scan the AGs starting at startag looking for an AG that isn't in use and has + * at least minlen blocks free. + */ +static int +_xfs_filestream_pick_ag( + xfs_mount_t *mp, + xfs_agnumber_t startag, + xfs_agnumber_t *agp, + int flags, + xfs_extlen_t minlen) +{ + int err, trylock, nscan; + xfs_extlen_t delta, longest, need, free, minfree, maxfree = 0; + xfs_agnumber_t ag, max_ag = NULLAGNUMBER; + struct xfs_perag *pag; + + /* 2% of an AG's blocks must be free for it to be chosen. */ + minfree = mp->m_sb.sb_agblocks / 50; + + ag = startag; + *agp = NULLAGNUMBER; + + /* For the first pass, don't sleep trying to init the per-AG. */ + trylock = XFS_ALLOC_FLAG_TRYLOCK; + + for (nscan = 0; 1; nscan++) { + + TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); + + pag = mp->m_perag + ag; + + if (!pag->pagf_init) { + err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); + if (err && !trylock) + return err; + } + + /* Might fail sometimes during the 1st pass with trylock set. */ + if (!pag->pagf_init) + goto next_ag; + + /* Keep track of the AG with the most free blocks. */ + if (pag->pagf_freeblks > maxfree) { + maxfree = pag->pagf_freeblks; + max_ag = ag; + } + + /* + * The AG reference count does two things: it enforces mutual + * exclusion when examining the suitability of an AG in this + * loop, and it guards against two filestreams being established + * in the same AG as each other. + */ + if (xfs_filestream_get_ag(mp, ag) > 1) { + xfs_filestream_put_ag(mp, ag); + goto next_ag; + } + + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + + if (((minlen && longest >= minlen) || + (!minlen && pag->pagf_freeblks >= minfree)) && + (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || + (flags & XFS_PICK_LOWSPACE))) { + + /* Break out, retaining the reference on the AG. */ + free = pag->pagf_freeblks; + *agp = ag; + break; + } + + /* Drop the reference on this AG, it's not usable. */ + xfs_filestream_put_ag(mp, ag); +next_ag: + /* Move to the next AG, wrapping to AG 0 if necessary. */ + if (++ag >= mp->m_sb.sb_agcount) + ag = 0; + + /* If a full pass of the AGs hasn't been done yet, continue. */ + if (ag != startag) + continue; + + /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */ + if (trylock != 0) { + trylock = 0; + continue; + } + + /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ + if (!(flags & XFS_PICK_LOWSPACE)) { + flags |= XFS_PICK_LOWSPACE; + continue; + } + + /* + * Take the AG with the most free space, regardless of whether + * it's already in use by another filestream. + */ + if (max_ag != NULLAGNUMBER) { + xfs_filestream_get_ag(mp, max_ag); + TRACE_AG_PICK1(mp, max_ag, maxfree); + free = maxfree; + *agp = max_ag; + break; + } + + /* take AG 0 if none matched */ + TRACE_AG_PICK1(mp, max_ag, maxfree); + *agp = 0; + return 0; + } + + TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), + free, nscan, flags); + + return 0; +} + +/* + * Set the allocation group number for a file or a directory, updating inode + * references and per-AG references as appropriate. Must be called with the + * m_peraglock held in read mode. + */ +static int +_xfs_filestream_update_ag( + xfs_inode_t *ip, + xfs_inode_t *pip, + xfs_agnumber_t ag) +{ + int err = 0; + xfs_mount_t *mp; + xfs_mru_cache_t *cache; + fstrm_item_t *item; + xfs_agnumber_t old_ag; + xfs_inode_t *old_pip; + + /* + * Either ip is a regular file and pip is a directory, or ip is a + * directory and pip is NULL. + */ + ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip && + (pip->i_d.di_mode & S_IFDIR)) || + ((ip->i_d.di_mode & S_IFDIR) && !pip))); + + mp = ip->i_mount; + cache = mp->m_filestream; + + item = xfs_mru_cache_lookup(cache, ip->i_ino); + if (item) { + ASSERT(item->ip == ip); + old_ag = item->ag; + item->ag = ag; + old_pip = item->pip; + item->pip = pip; + xfs_mru_cache_done(cache); + + /* + * If the AG has changed, drop the old ref and take a new one, + * effectively transferring the reference from old to new AG. + */ + if (ag != old_ag) { + xfs_filestream_put_ag(mp, old_ag); + xfs_filestream_get_ag(mp, ag); + } + + /* + * If ip is a file and its pip has changed, drop the old ref and + * take a new one. + */ + if (pip && pip != old_pip) { + IRELE(old_pip); + IHOLD(pip); + } + + TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), + ag, xfs_filestream_peek_ag(mp, ag)); + return 0; + } + + item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); + if (!item) + return ENOMEM; + + item->ag = ag; + item->ip = ip; + item->pip = pip; + + err = xfs_mru_cache_insert(cache, ip->i_ino, item); + if (err) { + kmem_zone_free(item_zone, item); + return err; + } + + /* Take a reference on the AG. */ + xfs_filestream_get_ag(mp, ag); + + /* + * Take a reference on the inode itself regardless of whether it's a + * regular file or a directory. + */ + IHOLD(ip); + + /* + * In the case of a regular file, take a reference on the parent inode + * as well to ensure it remains in-core. + */ + if (pip) + IHOLD(pip); + + TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), + ag, xfs_filestream_peek_ag(mp, ag)); + + return 0; +} + +/* xfs_fstrm_free_func(): callback for freeing cached stream items. */ +void +xfs_fstrm_free_func( + xfs_ino_t ino, + fstrm_item_t *item) +{ + xfs_inode_t *ip = item->ip; + int ref; + + ASSERT(ip->i_ino == ino); + + xfs_iflags_clear(ip, XFS_IFILESTREAM); + + /* Drop the reference taken on the AG when the item was added. */ + ref = xfs_filestream_put_ag(ip->i_mount, item->ag); + + ASSERT(ref >= 0); + TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, + xfs_filestream_peek_ag(ip->i_mount, item->ag)); + + /* + * _xfs_filestream_update_ag() always takes a reference on the inode + * itself, whether it's a file or a directory. Release it here. + * This can result in the inode being freed and so we must + * not hold any inode locks when freeing filesstreams objects + * otherwise we can deadlock here. + */ + IRELE(ip); + + /* + * In the case of a regular file, _xfs_filestream_update_ag() also + * takes a ref on the parent inode to keep it in-core. Release that + * too. + */ + if (item->pip) + IRELE(item->pip); + + /* Finally, free the memory allocated for the item. */ + kmem_zone_free(item_zone, item); +} + +/* + * xfs_filestream_init() is called at xfs initialisation time to set up the + * memory zone that will be used for filestream data structure allocation. + */ +int +xfs_filestream_init(void) +{ + item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); +#ifdef XFS_FILESTREAMS_TRACE + xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); +#endif + return item_zone ? 0 : -ENOMEM; +} + +/* + * xfs_filestream_uninit() is called at xfs termination time to destroy the + * memory zone that was used for filestream data structure allocation. + */ +void +xfs_filestream_uninit(void) +{ +#ifdef XFS_FILESTREAMS_TRACE + ktrace_free(xfs_filestreams_trace_buf); +#endif + kmem_zone_destroy(item_zone); +} + +/* + * xfs_filestream_mount() is called when a file system is mounted with the + * filestream option. It is responsible for allocating the data structures + * needed to track the new file system's file streams. + */ +int +xfs_filestream_mount( + xfs_mount_t *mp) +{ + int err; + unsigned int lifetime, grp_count; + + /* + * The filestream timer tunable is currently fixed within the range of + * one second to four minutes, with five seconds being the default. The + * group count is somewhat arbitrary, but it'd be nice to adhere to the + * timer tunable to within about 10 percent. This requires at least 10 + * groups. + */ + lifetime = xfs_fstrm_centisecs * 10; + grp_count = 10; + + err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, + (xfs_mru_cache_free_func_t)xfs_fstrm_free_func); + + return err; +} + +/* + * xfs_filestream_unmount() is called when a file system that was mounted with + * the filestream option is unmounted. It drains the data structures created + * to track the file system's file streams and frees all the memory that was + * allocated. + */ +void +xfs_filestream_unmount( + xfs_mount_t *mp) +{ + xfs_mru_cache_destroy(mp->m_filestream); +} + +/* + * If the mount point's m_perag array is going to be reallocated, all + * outstanding cache entries must be flushed to avoid accessing reference count + * addresses that have been freed. The call to xfs_filestream_flush() must be + * made inside the block that holds the m_peraglock in write mode to do the + * reallocation. + */ +void +xfs_filestream_flush( + xfs_mount_t *mp) +{ + /* point in time flush, so keep the reaper running */ + xfs_mru_cache_flush(mp->m_filestream, 1); +} + +/* + * Return the AG of the filestream the file or directory belongs to, or + * NULLAGNUMBER otherwise. + */ +xfs_agnumber_t +xfs_filestream_lookup_ag( + xfs_inode_t *ip) +{ + xfs_mru_cache_t *cache; + fstrm_item_t *item; + xfs_agnumber_t ag; + int ref; + + if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) { + ASSERT(0); + return NULLAGNUMBER; + } + + cache = ip->i_mount->m_filestream; + item = xfs_mru_cache_lookup(cache, ip->i_ino); + if (!item) { + TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); + return NULLAGNUMBER; + } + + ASSERT(ip == item->ip); + ag = item->ag; + ref = xfs_filestream_peek_ag(ip->i_mount, ag); + xfs_mru_cache_done(cache); + + TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); + return ag; +} + +/* + * xfs_filestream_associate() should only be called to associate a regular file + * with its parent directory. Calling it with a child directory isn't + * appropriate because filestreams don't apply to entire directory hierarchies. + * Creating a file in a child directory of an existing filestream directory + * starts a new filestream with its own allocation group association. + * + * Returns < 0 on error, 0 if successful association occurred, > 0 if + * we failed to get an association because of locking issues. + */ +int +xfs_filestream_associate( + xfs_inode_t *pip, + xfs_inode_t *ip) +{ + xfs_mount_t *mp; + xfs_mru_cache_t *cache; + fstrm_item_t *item; + xfs_agnumber_t ag, rotorstep, startag; + int err = 0; + + ASSERT(pip->i_d.di_mode & S_IFDIR); + ASSERT(ip->i_d.di_mode & S_IFREG); + if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG)) + return -EINVAL; + + mp = pip->i_mount; + cache = mp->m_filestream; + down_read(&mp->m_peraglock); + + /* + * We have a problem, Houston. + * + * Taking the iolock here violates inode locking order - we already + * hold the ilock. Hence if we block getting this lock we may never + * wake. Unfortunately, that means if we can't get the lock, we're + * screwed in terms of getting a stream association - we can't spin + * waiting for the lock because someone else is waiting on the lock we + * hold and we cannot drop that as we are in a transaction here. + * + * Lucky for us, this inversion is rarely a problem because it's a + * directory inode that we are trying to lock here and that means the + * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is + * used. i.e. freeze, remount-ro, quotasync or unmount. + * + * So, if we can't get the iolock without sleeping then just give up + */ + if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { + up_read(&mp->m_peraglock); + return 1; + } + + /* If the parent directory is already in the cache, use its AG. */ + item = xfs_mru_cache_lookup(cache, pip->i_ino); + if (item) { + ASSERT(item->ip == pip); + ag = item->ag; + xfs_mru_cache_done(cache); + + TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); + err = _xfs_filestream_update_ag(ip, pip, ag); + + goto exit; + } + + /* + * Set the starting AG using the rotor for inode32, otherwise + * use the directory inode's AG. + */ + if (mp->m_flags & XFS_MOUNT_32BITINODES) { + rotorstep = xfs_rotorstep; + startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; + mp->m_agfrotor = (mp->m_agfrotor + 1) % + (mp->m_sb.sb_agcount * rotorstep); + } else + startag = XFS_INO_TO_AGNO(mp, pip->i_ino); + + /* Pick a new AG for the parent inode starting at startag. */ + err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); + if (err || ag == NULLAGNUMBER) + goto exit_did_pick; + + /* Associate the parent inode with the AG. */ + err = _xfs_filestream_update_ag(pip, NULL, ag); + if (err) + goto exit_did_pick; + + /* Associate the file inode with the AG. */ + err = _xfs_filestream_update_ag(ip, pip, ag); + if (err) + goto exit_did_pick; + + TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); + +exit_did_pick: + /* + * If _xfs_filestream_pick_ag() returned a valid AG, remove the + * reference it took on it, since the file and directory will have taken + * their own now if they were successfully cached. + */ + if (ag != NULLAGNUMBER) + xfs_filestream_put_ag(mp, ag); + +exit: + xfs_iunlock(pip, XFS_IOLOCK_EXCL); + up_read(&mp->m_peraglock); + return -err; +} + +/* + * Pick a new allocation group for the current file and its file stream. This + * function is called by xfs_bmap_filestreams() with the mount point's per-ag + * lock held. + */ +int +xfs_filestream_new_ag( + xfs_bmalloca_t *ap, + xfs_agnumber_t *agp) +{ + int flags, err; + xfs_inode_t *ip, *pip = NULL; + xfs_mount_t *mp; + xfs_mru_cache_t *cache; + xfs_extlen_t minlen; + fstrm_item_t *dir, *file; + xfs_agnumber_t ag = NULLAGNUMBER; + + ip = ap->ip; + mp = ip->i_mount; + cache = mp->m_filestream; + minlen = ap->alen; + *agp = NULLAGNUMBER; + + /* + * Look for the file in the cache, removing it if it's found. Doing + * this allows it to be held across the dir lookup that follows. + */ + file = xfs_mru_cache_remove(cache, ip->i_ino); + if (file) { + ASSERT(ip == file->ip); + + /* Save the file's parent inode and old AG number for later. */ + pip = file->pip; + ag = file->ag; + + /* Look for the file's directory in the cache. */ + dir = xfs_mru_cache_lookup(cache, pip->i_ino); + if (dir) { + ASSERT(pip == dir->ip); + + /* + * If the directory has already moved on to a new AG, + * use that AG as the new AG for the file. Don't + * forget to twiddle the AG refcounts to match the + * movement. + */ + if (dir->ag != file->ag) { + xfs_filestream_put_ag(mp, file->ag); + xfs_filestream_get_ag(mp, dir->ag); + *agp = file->ag = dir->ag; + } + + xfs_mru_cache_done(cache); + } + + /* + * Put the file back in the cache. If this fails, the free + * function needs to be called to tidy up in the same way as if + * the item had simply expired from the cache. + */ + err = xfs_mru_cache_insert(cache, ip->i_ino, file); + if (err) { + xfs_fstrm_free_func(ip->i_ino, file); + return err; + } + + /* + * If the file's AG was moved to the directory's new AG, there's + * nothing more to be done. + */ + if (*agp != NULLAGNUMBER) { + TRACE_MOVEAG(mp, ip, pip, + ag, xfs_filestream_peek_ag(mp, ag), + *agp, xfs_filestream_peek_ag(mp, *agp)); + return 0; + } + } + + /* + * If the file's parent directory is known, take its iolock in exclusive + * mode to prevent two sibling files from racing each other to migrate + * themselves and their parent to different AGs. + */ + if (pip) + xfs_ilock(pip, XFS_IOLOCK_EXCL); + + /* + * A new AG needs to be found for the file. If the file's parent + * directory is also known, it will be moved to the new AG as well to + * ensure that files created inside it in future use the new AG. + */ + ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; + flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | + (ap->low ? XFS_PICK_LOWSPACE : 0); + + err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); + if (err || *agp == NULLAGNUMBER) + goto exit; + + /* + * If the file wasn't found in the file cache, then its parent directory + * inode isn't known. For this to have happened, the file must either + * be pre-existing, or it was created long enough ago that its cache + * entry has expired. This isn't the sort of usage that the filestreams + * allocator is trying to optimise, so there's no point trying to track + * its new AG somehow in the filestream data structures. + */ + if (!pip) { + TRACE_ORPHAN(mp, ip, *agp); + goto exit; + } + + /* Associate the parent inode with the AG. */ + err = _xfs_filestream_update_ag(pip, NULL, *agp); + if (err) + goto exit; + + /* Associate the file inode with the AG. */ + err = _xfs_filestream_update_ag(ip, pip, *agp); + if (err) + goto exit; + + TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, + *agp, xfs_filestream_peek_ag(mp, *agp)); + +exit: + /* + * If _xfs_filestream_pick_ag() returned a valid AG, remove the + * reference it took on it, since the file and directory will have taken + * their own now if they were successfully cached. + */ + if (*agp != NULLAGNUMBER) + xfs_filestream_put_ag(mp, *agp); + else + *agp = 0; + + if (pip) + xfs_iunlock(pip, XFS_IOLOCK_EXCL); + + return err; +} + +/* + * Remove an association between an inode and a filestream object. + * Typically this is done on last close of an unlinked file. + */ +void +xfs_filestream_deassociate( + xfs_inode_t *ip) +{ + xfs_mru_cache_t *cache = ip->i_mount->m_filestream; + + xfs_mru_cache_delete(cache, ip->i_ino); +} diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h new file mode 100644 index 000000000000..f655f7dc334c --- /dev/null +++ b/fs/xfs/xfs_filestream.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_FILESTREAM_H__ +#define __XFS_FILESTREAM_H__ + +#ifdef __KERNEL__ + +struct xfs_mount; +struct xfs_inode; +struct xfs_perag; +struct xfs_bmalloca; + +#ifdef XFS_FILESTREAMS_TRACE +#define XFS_FSTRM_KTRACE_INFO 1 +#define XFS_FSTRM_KTRACE_AGSCAN 2 +#define XFS_FSTRM_KTRACE_AGPICK1 3 +#define XFS_FSTRM_KTRACE_AGPICK2 4 +#define XFS_FSTRM_KTRACE_UPDATE 5 +#define XFS_FSTRM_KTRACE_FREE 6 +#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7 +#define XFS_FSTRM_KTRACE_ASSOCIATE 8 +#define XFS_FSTRM_KTRACE_MOVEAG 9 +#define XFS_FSTRM_KTRACE_ORPHAN 10 + +#define XFS_FSTRM_KTRACE_SIZE 16384 +extern ktrace_t *xfs_filestreams_trace_buf; + +#endif + +/* + * Allocation group filestream associations are tracked with per-ag atomic + * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a + * particular AG already has active filestreams associated with it. The mount + * point's m_peraglock is used to protect these counters from per-ag array + * re-allocation during a growfs operation. When xfs_growfs_data_private() is + * about to reallocate the array, it calls xfs_filestream_flush() with the + * m_peraglock held in write mode. + * + * Since xfs_mru_cache_flush() guarantees that all the free functions for all + * the cache elements have finished executing before it returns, it's safe for + * the free functions to use the atomic counters without m_peraglock protection. + * This allows the implementation of xfs_fstrm_free_func() to be agnostic about + * whether it was called with the m_peraglock held in read mode, write mode or + * not held at all. The race condition this addresses is the following: + * + * - The work queue scheduler fires and pulls a filestream directory cache + * element off the LRU end of the cache for deletion, then gets pre-empted. + * - A growfs operation grabs the m_peraglock in write mode, flushes all the + * remaining items from the cache and reallocates the mount point's per-ag + * array, resetting all the counters to zero. + * - The work queue thread resumes and calls the free function for the element + * it started cleaning up earlier. In the process it decrements the + * filestreams counter for an AG that now has no references. + * + * With a shrinkfs feature, the above scenario could panic the system. + * + * All other uses of the following macros should be protected by either the + * m_peraglock held in read mode, or the cache's internal locking exposed by the + * interval between a call to xfs_mru_cache_lookup() and a call to + * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode + * when new elements are added to the cache. + * + * Combined, these locking rules ensure that no associations will ever exist in + * the cache that reference per-ag array elements that have since been + * reallocated. + */ +STATIC_INLINE int +xfs_filestream_peek_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + return atomic_read(&mp->m_perag[agno].pagf_fstrms); +} + +STATIC_INLINE int +xfs_filestream_get_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); +} + +STATIC_INLINE int +xfs_filestream_put_ag( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); +} + +/* allocation selection flags */ +typedef enum xfs_fstrm_alloc { + XFS_PICK_USERDATA = 1, + XFS_PICK_LOWSPACE = 2, +} xfs_fstrm_alloc_t; + +/* prototypes for filestream.c */ +int xfs_filestream_init(void); +void xfs_filestream_uninit(void); +int xfs_filestream_mount(struct xfs_mount *mp); +void xfs_filestream_unmount(struct xfs_mount *mp); +void xfs_filestream_flush(struct xfs_mount *mp); +xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); +int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); +void xfs_filestream_deassociate(struct xfs_inode *ip); +int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); + + +/* filestreams for the inode? */ +STATIC_INLINE int +xfs_inode_is_filestream( + struct xfs_inode *ip) +{ + return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || + xfs_iflags_test(ip, XFS_IFILESTREAM) || + (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); +} + +#endif /* __KERNEL__ */ + +#endif /* __XFS_FILESTREAM_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 1335449841cd..ec3c9c27e0de 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -66,6 +66,7 @@ struct fsxattr { #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* @@ -238,6 +239,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ +#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ /* diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b599e6be9ec1..432e82347ed6 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -44,6 +44,7 @@ #include "xfs_trans_space.h" #include "xfs_rtalloc.h" #include "xfs_rw.h" +#include "xfs_filestream.h" /* * File system operations @@ -94,6 +95,8 @@ xfs_fs_geometry( XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | (XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | + (xfs_sb_version_haslazysbcount(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | (XFS_SB_VERSION_HASATTR2(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ? @@ -140,6 +143,8 @@ xfs_growfs_data_private( pct = in->imaxpct; if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) return XFS_ERROR(EINVAL); + if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) + return error; dpct = pct - mp->m_sb.sb_imax_pct; error = xfs_read_buf(mp, mp->m_ddev_targp, XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), @@ -161,6 +166,7 @@ xfs_growfs_data_private( new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; if (nagcount > oagcount) { + xfs_filestream_flush(mp); down_write(&mp->m_peraglock); mp->m_perag = kmem_realloc(mp->m_perag, sizeof(xfs_perag_t) * nagcount, @@ -173,6 +179,7 @@ xfs_growfs_data_private( up_write(&mp->m_peraglock); } tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); + tp->t_flags |= XFS_TRANS_RESERVE; if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) { xfs_trans_cancel(tp, 0); @@ -328,6 +335,7 @@ xfs_growfs_data_private( be32_add(&agf->agf_length, new); ASSERT(be32_to_cpu(agf->agf_length) == be32_to_cpu(agi->agi_length)); + xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); /* * Free the new space. */ @@ -494,8 +502,9 @@ xfs_reserve_blocks( unsigned long s; /* If inval is null, report current values and return */ - if (inval == (__uint64_t *)NULL) { + if (!outval) + return EINVAL; outval->resblks = mp->m_resblks; outval->resblks_avail = mp->m_resblks_avail; return 0; @@ -558,8 +567,10 @@ retry: } } out: - outval->resblks = mp->m_resblks; - outval->resblks_avail = mp->m_resblks_avail; + if (outval) { + outval->resblks = mp->m_resblks; + outval->resblks_avail = mp->m_resblks_avail; + } XFS_SB_UNLOCK(mp, s); if (fdblks_delta) { diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index b5feb3e77116..f943368c9b93 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc( int blks_per_cluster; /* fs blocks per inode cluster */ xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_daddr_t d; /* disk addr of buffer */ + xfs_agnumber_t agno; int error; xfs_buf_t *fbuf; /* new free inodes' buffer */ xfs_dinode_t *free; /* new free inode structure */ @@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc( } be32_add(&agi->agi_count, newlen); be32_add(&agi->agi_freecount, newlen); + agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); - args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen; + args.mp->m_perag[agno].pagi_freecount += newlen; up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* * Insert records describing the new inode chunk into the btree. */ - cur = xfs_btree_init_cursor(args.mp, tp, agbp, - be32_to_cpu(agi->agi_seqno), + cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, XFS_BTNUM_INO, (xfs_inode_t *)0, 0); for (thisino = newino; thisino < newino + newlen; @@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi( pag = &mp->m_perag[agno]; if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); + pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_init = 1; } else { /* @@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi( *bpp = bp; return 0; } + +/* + * Read in the agi to initialise the per-ag data in the mount structure + */ +int +xfs_ialloc_pagi_init( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno) /* allocation group number */ +{ + xfs_buf_t *bp = NULL; + int error; + + error = xfs_ialloc_read_agi(mp, tp, agno, &bp); + if (error) + return error; + if (bp) + xfs_trans_brelse(tp, bp); + return 0; +} diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 7f5debe1acb6..97f4040931ca 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -149,6 +149,16 @@ xfs_ialloc_read_agi( xfs_agnumber_t agno, /* allocation group number */ struct xfs_buf **bpp); /* allocation group hdr buf */ +/* + * Read in the allocation group header to initialise the per-ag data + * in the mount structure + */ +int +xfs_ialloc_pagi_init( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno); /* allocation group number */ + #endif /* __KERNEL__ */ #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3ca5d43b8345..cdc4c28926d0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -48,7 +48,9 @@ #include "xfs_dir2_trace.h" #include "xfs_quota.h" #include "xfs_acl.h" +#include "xfs_filestream.h" +#include <linux/log2.h> kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -643,8 +645,7 @@ xfs_iformat_extents( ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), ARCH_CONVERT); } - xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, - whichfork); + XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); if (whichfork != XFS_DATA_FORK || XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) if (unlikely(xfs_check_nostate_extents( @@ -817,6 +818,8 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_EXTSZINHERIT; if (di_flags & XFS_DIFLAG_NODEFRAG) flags |= XFS_XFLAG_NODEFRAG; + if (di_flags & XFS_DIFLAG_FILESTREAM) + flags |= XFS_XFLAG_FILESTREAM; } return flags; @@ -1074,6 +1077,11 @@ xfs_iread_extents( * also returns the [locked] bp pointing to the head of the freelist * as ialloc_context. The caller should hold this buffer across * the commit and pass it back into this routine on the second call. + * + * If we are allocating quota inodes, we do not have a parent inode + * to attach to or associate with (i.e. pip == NULL) because they + * are not linked into the directory structure - they are attached + * directly to the superblock - and so have no parent. */ int xfs_ialloc( @@ -1099,7 +1107,7 @@ xfs_ialloc( * Call the space management code to pick * the on-disk inode to be allocated. */ - error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, + error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, ialloc_context, call_again, &ino); if (error != 0) { return error; @@ -1150,10 +1158,10 @@ xfs_ialloc( /* * Project ids won't be stored on disk if we are using a version 1 inode. */ - if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) + if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) xfs_bump_ino_vers2(tp, ip); - if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { + if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) { ip->i_d.di_gid = pip->i_d.di_gid; if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { ip->i_d.di_mode |= S_ISGID; @@ -1195,8 +1203,16 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: + if (pip && xfs_inode_is_filestream(pip)) { + error = xfs_filestream_associate(pip, ip); + if (error < 0) + return -error; + if (!error) + xfs_iflags_set(ip, XFS_IFILESTREAM); + } + /* fall through */ case S_IFDIR: - if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { + if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { uint di_flags = 0; if ((mode & S_IFMT) == S_IFDIR) { @@ -1233,6 +1249,8 @@ xfs_ialloc( if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && xfs_inherit_nodefrag) di_flags |= XFS_DIFLAG_NODEFRAG; + if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ @@ -2875,9 +2893,6 @@ xfs_iextents_copy( int copied; xfs_bmbt_rec_t *dest_ep; xfs_bmbt_rec_t *ep; -#ifdef XFS_BMAP_TRACE - static char fname[] = "xfs_iextents_copy"; -#endif int i; xfs_ifork_t *ifp; int nrecs; @@ -2888,7 +2903,7 @@ xfs_iextents_copy( ASSERT(ifp->if_bytes > 0); nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); + XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); ASSERT(nrecs > 0); /* @@ -4184,7 +4199,7 @@ xfs_iext_realloc_direct( ifp->if_bytes = new_size; return; } - if ((new_size & (new_size - 1)) != 0) { + if (!is_power_of_2(new_size)){ rnew_size = xfs_iroundup(new_size); } if (rnew_size != ifp->if_real_bytes) { @@ -4207,7 +4222,7 @@ xfs_iext_realloc_direct( */ else { new_size += ifp->if_bytes; - if ((new_size & (new_size - 1)) != 0) { + if (!is_power_of_2(new_size)) { rnew_size = xfs_iroundup(new_size); } xfs_iext_inline_to_direct(ifp, rnew_size); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f75afecef8e7..012dfd4a958c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) #define XFS_ISTALE 0x0010 /* inode has been staled */ #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ #define XFS_INEW 0x0040 +#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ /* * Flags for inode locking. @@ -414,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) * gets a lockdep subclass of 1 and the second lock will have a lockdep * subclass of 0. * - * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time + * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. * So the first lock acquired will have a lockdep subclass of 2, the - * second lock will have a lockdep subclass of 3, and so on. + * second lock will have a lockdep subclass of 3, and so on. It is + * the responsibility of the class builder to shift this to the correct + * portion of the lock_mode lockdep mask. */ +#define XFS_LOCK_PARENT 1 +#define XFS_LOCK_INUMORDER 2 + #define XFS_IOLOCK_SHIFT 16 -#define XFS_IOLOCK_PARENT (1 << XFS_IOLOCK_SHIFT) -#define XFS_IOLOCK_INUMORDER (2 << XFS_IOLOCK_SHIFT) +#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_ILOCK_SHIFT 24 -#define XFS_ILOCK_PARENT (1 << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_INUMORDER (2 << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) #define XFS_IOLOCK_DEP_MASK 0x00ff0000 #define XFS_ILOCK_DEP_MASK 0xff000000 diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3f2b9f2a7b94..bf57b75acb90 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -451,19 +451,14 @@ xfs_iomap_write_direct( return XFS_ERROR(error); rt = XFS_IS_REALTIME_INODE(ip); - if (unlikely(rt)) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } + extsz = xfs_get_extsz_hint(ip); isize = ip->i_size; if (io->io_new_size > isize) isize = io->io_new_size; - offset_fsb = XFS_B_TO_FSBT(mp, offset); - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > isize) { error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, &last_fsb); @@ -489,13 +484,13 @@ xfs_iomap_write_direct( if (unlikely(rt)) { resrtextents = qblocks = resaligned; resrtextents /= mp->m_sb.sb_rextsize; - resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); - quota_flag = XFS_QMOPT_RES_RTBLKS; - } else { - resrtextents = 0; + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + quota_flag = XFS_QMOPT_RES_RTBLKS; + } else { + resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); - quota_flag = XFS_QMOPT_RES_REGBLKS; - } + quota_flag = XFS_QMOPT_RES_REGBLKS; + } /* * Allocate and setup the transaction @@ -666,13 +661,7 @@ xfs_iomap_write_delay( if (error) return XFS_ERROR(error); - if (XFS_IS_REALTIME_INODE(ip)) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } - + extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); retry: @@ -788,18 +777,12 @@ xfs_iomap_write_allocate( nimaps = 0; while (nimaps == 0) { tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); - if (error == ENOSPC) { - error = xfs_trans_reserve(tp, 0, - XFS_WRITE_LOG_RES(mp), - 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); - } if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); @@ -917,8 +900,8 @@ xfs_iomap_write_unwritten( * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. */ - tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index e725ddd3de5f..4c2454bcc714 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode( return 0; } +STATIC int +xfs_bulkstat_one_fmt( + void __user *ubuffer, + const xfs_bstat_t *buffer) +{ + if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) + return -EFAULT; + return sizeof(*buffer); +} + /* * Return stat information for one inode. * Return 0 if ok, else errno. @@ -221,6 +231,7 @@ xfs_bulkstat_one( xfs_bstat_t *buf; /* return buffer */ int error = 0; /* error value */ xfs_dinode_t *dip; /* dinode inode pointer */ + bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; dip = (xfs_dinode_t *)dibuff; *stat = BULKSTAT_RV_NOTHING; @@ -243,14 +254,15 @@ xfs_bulkstat_one( xfs_bulkstat_one_dinode(mp, ino, dip, buf); } - if (copy_to_user(buffer, buf, sizeof(*buf))) { + error = formatter(buffer, buf); + if (error < 0) { error = EFAULT; goto out_free; } *stat = BULKSTAT_RV_DIDONE; if (ubused) - *ubused = sizeof(*buf); + *ubused = error; out_free: kmem_free(buf, sizeof(*buf)); @@ -748,6 +760,19 @@ xfs_bulkstat_single( return 0; } +int +xfs_inumbers_fmt( + void __user *ubuffer, /* buffer to write to */ + const xfs_inogrp_t *buffer, /* buffer to read from */ + long count, /* # of elements to read */ + long *written) /* # of bytes written */ +{ + if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) + return -EFAULT; + *written = count * sizeof(*buffer); + return 0; +} + /* * Return inode number table for the filesystem. */ @@ -756,7 +781,8 @@ xfs_inumbers( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t *lastino, /* last inode returned */ int *count, /* size of buffer/count returned */ - xfs_inogrp_t __user *ubuffer)/* buffer with inode descriptions */ + void __user *ubuffer,/* buffer with inode descriptions */ + inumbers_fmt_pf formatter) { xfs_buf_t *agbp; xfs_agino_t agino; @@ -835,12 +861,12 @@ xfs_inumbers( bufidx++; left--; if (bufidx == bcount) { - if (copy_to_user(ubuffer, buffer, - bufidx * sizeof(*buffer))) { + long written; + if (formatter(ubuffer, buffer, bufidx, &written)) { error = XFS_ERROR(EFAULT); break; } - ubuffer += bufidx; + ubuffer += written; *count += bufidx; bufidx = 0; } @@ -862,8 +888,8 @@ xfs_inumbers( } if (!error) { if (bufidx) { - if (copy_to_user(ubuffer, buffer, - bufidx * sizeof(*buffer))) + long written; + if (formatter(ubuffer, buffer, bufidx, &written)) error = XFS_ERROR(EFAULT); else *count += bufidx; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index f25a28862a17..a1f18fce9b70 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -69,6 +69,10 @@ xfs_bulkstat_single( char __user *buffer, int *done); +typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ + void __user *ubuffer, /* buffer to write to */ + const xfs_bstat_t *buffer); /* buffer to read from */ + int xfs_bulkstat_one( xfs_mount_t *mp, @@ -86,11 +90,25 @@ xfs_internal_inum( xfs_mount_t *mp, xfs_ino_t ino); +typedef int (*inumbers_fmt_pf)( + void __user *ubuffer, /* buffer to write to */ + const xfs_inogrp_t *buffer, /* buffer to read from */ + long count, /* # of elements to read */ + long *written); /* # of bytes written */ + +int +xfs_inumbers_fmt( + void __user *ubuffer, /* buffer to write to */ + const xfs_inogrp_t *buffer, /* buffer to read from */ + long count, /* # of elements to read */ + long *written); /* # of bytes written */ + int /* error status */ xfs_inumbers( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t *last, /* last inode returned */ int *count, /* size of buffer/count returned */ - xfs_inogrp_t __user *buffer);/* buffer with inode info */ + void __user *buffer, /* buffer with inode info */ + inumbers_fmt_pf formatter); #endif /* __XFS_ITABLE_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c48bf61f17bd..9d4c4fbeb3ee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp) SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; - bhv_vfs_t *vfsp = XFS_MTOVFS(mp); - if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || - (vfsp->vfs_flag & VFS_RDONLY)) + if (!xfs_fs_writable(mp)) return 0; s = LOG_LOCK(log); @@ -967,14 +965,16 @@ xlog_iodone(xfs_buf_t *bp) } else if (iclog->ic_state & XLOG_STATE_IOERROR) { aborted = XFS_LI_ABORTED; } + + /* log I/O is always issued ASYNC */ + ASSERT(XFS_BUF_ISASYNC(bp)); xlog_state_done_syncing(iclog, aborted); - if (!(XFS_BUF_ISASYNC(bp))) { - /* - * Corresponding psema() will be done in bwrite(). If we don't - * vsema() here, panic. - */ - XFS_BUF_V_IODONESEMA(bp); - } + /* + * do not reference the buffer (bp) here as we could race + * with it being freed after writing the unmount record to the + * log. + */ + } /* xlog_iodone */ /* @@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp, *iclogp = (xlog_in_core_t *) kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); iclog = *iclogp; - iclog->hic_data = (xlog_in_core_2_t *) - kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE); - iclog->ic_prev = prev_iclog; prev_iclog = iclog; + + bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!XFS_BUF_CPSEMA(bp)) + ASSERT(0); + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); + XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + iclog->ic_bp = bp; + iclog->hic_data = bp->b_addr; + log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); head = &iclog->ic_header; @@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp, INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); - iclog->ic_bp = bp; iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; @@ -1432,7 +1434,7 @@ xlog_sync(xlog_t *log, } else { iclog->ic_bwritecnt = 1; } - XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); + XFS_BUF_SET_COUNT(bp, count); XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); @@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log) } #endif next_iclog = iclog->ic_next; - kmem_free(iclog->hic_data, log->l_iclog_size); kmem_free(iclog, sizeof(xlog_in_core_t)); iclog = next_iclog; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 080fabf61c92..fddbb091a86f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -927,6 +927,14 @@ xlog_find_tail( ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, after_umount_blk); *tail_blk = after_umount_blk; + + /* + * Note that the unmount was clean. If the unmount + * was not clean, we need to know this to rebuild the + * superblock counters from the perag headers if we + * have a filesystem using non-persistent counters. + */ + log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; } } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a96bde6df96d..a66b39805176 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -202,6 +202,27 @@ xfs_mount_free( kmem_free(mp, sizeof(xfs_mount_t)); } +/* + * Check size of device based on the (data/realtime) block count. + * Note: this check is used by the growfs code as well as mount. + */ +int +xfs_sb_validate_fsb_count( + xfs_sb_t *sbp, + __uint64_t nblocks) +{ + ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); + ASSERT(sbp->sb_blocklog >= BBSHIFT); + +#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ + if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) + return E2BIG; +#else /* Limited by UINT_MAX of sectors */ + if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) + return E2BIG; +#endif + return 0; +} /* * Check the validity of the SB found. @@ -284,18 +305,8 @@ xfs_mount_validate_sb( return XFS_ERROR(EFSCORRUPTED); } - ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); - ASSERT(sbp->sb_blocklog >= BBSHIFT); - -#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ - if (unlikely( - (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || - (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { -#else /* Limited by UINT_MAX of sectors */ - if (unlikely( - (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || - (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { -#endif + if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || + xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_fs_mount_cmn_err(flags, "file system too large to be mounted on this system."); return XFS_ERROR(E2BIG); @@ -632,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) sbp->sb_inopblock); mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; } + +/* + * xfs_initialize_perag_data + * + * Read in each per-ag structure so we can count up the number of + * allocated inodes, free inodes and used filesystem blocks as this + * information is no longer persistent in the superblock. Once we have + * this information, write it into the in-core superblock structure. + */ +STATIC int +xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) +{ + xfs_agnumber_t index; + xfs_perag_t *pag; + xfs_sb_t *sbp = &mp->m_sb; + uint64_t ifree = 0; + uint64_t ialloc = 0; + uint64_t bfree = 0; + uint64_t bfreelst = 0; + uint64_t btree = 0; + int error; + int s; + + for (index = 0; index < agcount; index++) { + /* + * read the agf, then the agi. This gets us + * all the inforamtion we need and populates the + * per-ag structures for us. + */ + error = xfs_alloc_pagf_init(mp, NULL, index, 0); + if (error) + return error; + + error = xfs_ialloc_pagi_init(mp, NULL, index); + if (error) + return error; + pag = &mp->m_perag[index]; + ifree += pag->pagi_freecount; + ialloc += pag->pagi_count; + bfree += pag->pagf_freeblks; + bfreelst += pag->pagf_flcount; + btree += pag->pagf_btreeblks; + } + /* + * Overwrite incore superblock counters with just-read data + */ + s = XFS_SB_LOCK(mp); + sbp->sb_ifree = ifree; + sbp->sb_icount = ialloc; + sbp->sb_fdblocks = bfree + bfreelst + btree; + XFS_SB_UNLOCK(mp, s); + + /* Fixup the per-cpu counters as well. */ + xfs_icsb_reinit_counters(mp); + + return 0; +} + /* * xfs_mountfs * @@ -656,7 +725,7 @@ xfs_mountfs( bhv_vnode_t *rvp = NULL; int readio_log, writeio_log; xfs_daddr_t d; - __uint64_t ret64; + __uint64_t resblks; __int64_t update_flags; uint quotamount, quotaflags; int agno; @@ -773,6 +842,7 @@ xfs_mountfs( */ if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { + __uint64_t ret64; if (xfs_uuid_mount(mp)) { error = XFS_ERROR(EINVAL); goto error1; @@ -976,6 +1046,34 @@ xfs_mountfs( } /* + * Now the log is mounted, we know if it was an unclean shutdown or + * not. If it was, with the first phase of recovery has completed, we + * have consistent AG blocks on disk. We have not recovered EFIs yet, + * but they are recovered transactionally in the second recovery phase + * later. + * + * Hence we can safely re-initialise incore superblock counters from + * the per-ag data. These may not be correct if the filesystem was not + * cleanly unmounted, so we need to wait for recovery to finish before + * doing this. + * + * If the filesystem was cleanly unmounted, then we can trust the + * values in the superblock to be correct and we don't need to do + * anything here. + * + * If we are currently making the filesystem, the initialisation will + * fail as the perag data is in an undefined state. + */ + + if (xfs_sb_version_haslazysbcount(&mp->m_sb) && + !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && + !mp->m_sb.sb_inprogress) { + error = xfs_initialize_perag_data(mp, sbp->sb_agcount); + if (error) { + goto error2; + } + } + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ @@ -1044,6 +1142,23 @@ xfs_mountfs( if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) goto error4; + /* + * Now we are mounted, reserve a small amount of unused space for + * privileged transactions. This is needed so that transaction + * space required for critical operations can dip into this pool + * when at ENOSPC. This is needed for operations like create with + * attr, unwritten extent conversion at ENOSPC, etc. Data allocations + * are not allowed to use this reserved space. + * + * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. + * This may drive us straight to ENOSPC on mount, but that implies + * we were already there on the last unmount. + */ + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 1024); + xfs_reserve_blocks(mp, &resblks, NULL); + return 0; error4: @@ -1083,7 +1198,19 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) #if defined(DEBUG) || defined(INDUCE_IO_ERROR) int64_t fsid; #endif + __uint64_t resblks; + /* + * We can potentially deadlock here if we have an inode cluster + * that has been freed has it's buffer still pinned in memory because + * the transaction is still sitting in a iclog. The stale inodes + * on that buffer will have their flush locks held until the + * transaction hits the disk and the callbacks run. the inode + * flush takes the flush lock unconditionally and with nothing to + * push out the iclog we will never get that unlocked. hence we + * need to force the log first. + */ + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); xfs_iflush_all(mp); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); @@ -1100,10 +1227,26 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) xfs_binval(mp->m_rtdev_targp); } - xfs_unmountfs_writesb(mp); + /* + * Unreserve any blocks we have so that when we unmount we don't account + * the reserved free space as used. This is really only necessary for + * lazy superblock counting because it trusts the incore superblock + * counters to be aboslutely correct on clean unmount. + * + * We don't bother correcting this elsewhere for lazy superblock + * counting because on mount of an unclean filesystem we reconstruct the + * correct counter value and this is irrelevant. + * + * For non-lazy counter filesystems, this doesn't matter at all because + * we only every apply deltas to the superblock and hence the incore + * value does not matter.... + */ + resblks = 0; + xfs_reserve_blocks(mp, &resblks, NULL); + xfs_log_sbcount(mp, 1); + xfs_unmountfs_writesb(mp); xfs_unmountfs_wait(mp); /* wait for async bufs */ - xfs_log_unmount(mp); /* Done! No more fs ops. */ xfs_freesb(mp); @@ -1150,6 +1293,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp) } int +xfs_fs_writable(xfs_mount_t *mp) +{ + bhv_vfs_t *vfsp = XFS_MTOVFS(mp); + + return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) || + (vfsp->vfs_flag & VFS_RDONLY)); +} + +/* + * xfs_log_sbcount + * + * Called either periodically to keep the on disk superblock values + * roughly up to date or from unmount to make sure the values are + * correct on a clean unmount. + * + * Note this code can be called during the process of freezing, so + * we may need to use the transaction allocator which does not not + * block when the transaction subsystem is in its frozen state. + */ +int +xfs_log_sbcount( + xfs_mount_t *mp, + uint sync) +{ + xfs_trans_t *tp; + int error; + + if (!xfs_fs_writable(mp)) + return 0; + + xfs_icsb_sync_counters(mp); + + /* + * we don't need to do this if we are updating the superblock + * counters on every modification. + */ + if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) + return 0; + + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); + if (sync) + xfs_trans_set_sync(tp); + xfs_trans_commit(tp, 0); + + return 0; +} + +int xfs_unmountfs_writesb(xfs_mount_t *mp) { xfs_buf_t *sbp; @@ -1160,16 +1359,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) * skip superblock write if fs is read-only, or * if we are doing a forced umount. */ - sbp = xfs_getsb(mp, 0); if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || XFS_FORCED_SHUTDOWN(mp))) { - xfs_icsb_sync_counters(mp); + sbp = xfs_getsb(mp, 0); + sb = XFS_BUF_TO_SBP(sbp); /* * mark shared-readonly if desired */ - sb = XFS_BUF_TO_SBP(sbp); if (mp->m_mk_sharedro) { if (!(sb->sb_flags & XFS_SBF_READONLY)) sb->sb_flags |= XFS_SBF_READONLY; @@ -1178,6 +1376,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) xfs_fs_cmn_err(CE_NOTE, mp, "Unmounting, marking shared read-only"); } + XFS_BUF_UNDONE(sbp); XFS_BUF_UNREAD(sbp); XFS_BUF_UNDELAYWRITE(sbp); @@ -1192,8 +1391,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) mp, sbp, XFS_BUF_ADDR(sbp)); if (error && mp->m_mk_sharedro) xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); + xfs_buf_relse(sbp); } - xfs_buf_relse(sbp); return error; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 82304b94646d..76ad74758696 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -66,6 +66,7 @@ struct xfs_bmbt_irec; struct xfs_bmap_free; struct xfs_extdelta; struct xfs_swapext; +struct xfs_mru_cache; extern struct bhv_vfsops xfs_vfsops; extern struct bhv_vnodeops xfs_vnodeops; @@ -424,17 +425,18 @@ typedef struct xfs_mount { struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ struct mutex m_icsb_mutex; /* balancer sync lock */ #endif + struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ } xfs_mount_t; /* * Flags for m_flags. */ -#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops +#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops must be synchronous except for space allocations */ -#define XFS_MOUNT_INO64 (1ULL << 1) +#define XFS_MOUNT_INO64 (1ULL << 1) /* (1ULL << 2) -- currently unused */ - /* (1ULL << 3) -- currently unused */ +#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for disk errors in metadata */ @@ -463,6 +465,8 @@ typedef struct xfs_mount { * I/O size in stat() */ #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock counters */ +#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams + allocator */ /* @@ -511,6 +515,8 @@ xfs_preferred_iosize(xfs_mount_t *mp) #define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) +#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ + ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) #define xfs_force_shutdown(m,f) \ bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__) @@ -602,6 +608,7 @@ typedef struct xfs_mod_sb { extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); +extern int xfs_log_sbcount(xfs_mount_t *, uint); extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); extern int xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); @@ -618,12 +625,14 @@ extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); +extern int xfs_fs_writable(xfs_mount_t *); extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); extern int xfs_syncsub(xfs_mount_t *, int, int *); extern int xfs_sync_inodes(xfs_mount_t *, int, int *); extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *, xfs_agnumber_t); extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t); +extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern struct xfs_dmops xfs_dmcore_stub; extern struct xfs_qmops xfs_qmcore_stub; diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c new file mode 100644 index 000000000000..7deb9e3cbbd3 --- /dev/null +++ b/fs/xfs/xfs_mru_cache.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_mru_cache.h" + +/* + * The MRU Cache data structure consists of a data store, an array of lists and + * a lock to protect its internal state. At initialisation time, the client + * supplies an element lifetime in milliseconds and a group count, as well as a + * function pointer to call when deleting elements. A data structure for + * queueing up work in the form of timed callbacks is also included. + * + * The group count controls how many lists are created, and thereby how finely + * the elements are grouped in time. When reaping occurs, all the elements in + * all the lists whose time has expired are deleted. + * + * To give an example of how this works in practice, consider a client that + * initialises an MRU Cache with a lifetime of ten seconds and a group count of + * five. Five internal lists will be created, each representing a two second + * period in time. When the first element is added, time zero for the data + * structure is initialised to the current time. + * + * All the elements added in the first two seconds are appended to the first + * list. Elements added in the third second go into the second list, and so on. + * If an element is accessed at any point, it is removed from its list and + * inserted at the head of the current most-recently-used list. + * + * The reaper function will have nothing to do until at least twelve seconds + * have elapsed since the first element was added. The reason for this is that + * if it were called at t=11s, there could be elements in the first list that + * have only been inactive for nine seconds, so it still does nothing. If it is + * called anywhere between t=12 and t=14 seconds, it will delete all the + * elements that remain in the first list. It's therefore possible for elements + * to remain in the data store even after they've been inactive for up to + * (t + t/g) seconds, where t is the inactive element lifetime and g is the + * number of groups. + * + * The above example assumes that the reaper function gets called at least once + * every (t/g) seconds. If it is called less frequently, unused elements will + * accumulate in the reap list until the reaper function is eventually called. + * The current implementation uses work queue callbacks to carefully time the + * reaper function calls, so this should happen rarely, if at all. + * + * From a design perspective, the primary reason for the choice of a list array + * representing discrete time intervals is that it's only practical to reap + * expired elements in groups of some appreciable size. This automatically + * introduces a granularity to element lifetimes, so there's no point storing an + * individual timeout with each element that specifies a more precise reap time. + * The bonus is a saving of sizeof(long) bytes of memory per element stored. + * + * The elements could have been stored in just one list, but an array of + * counters or pointers would need to be maintained to allow them to be divided + * up into discrete time groups. More critically, the process of touching or + * removing an element would involve walking large portions of the entire list, + * which would have a detrimental effect on performance. The additional memory + * requirement for the array of list heads is minimal. + * + * When an element is touched or deleted, it needs to be removed from its + * current list. Doubly linked lists are used to make the list maintenance + * portion of these operations O(1). Since reaper timing can be imprecise, + * inserts and lookups can occur when there are no free lists available. When + * this happens, all the elements on the LRU list need to be migrated to the end + * of the reap list. To keep the list maintenance portion of these operations + * O(1) also, list tails need to be accessible without walking the entire list. + * This is the reason why doubly linked list heads are used. + */ + +/* + * An MRU Cache is a dynamic data structure that stores its elements in a way + * that allows efficient lookups, but also groups them into discrete time + * intervals based on insertion time. This allows elements to be efficiently + * and automatically reaped after a fixed period of inactivity. + * + * When a client data pointer is stored in the MRU Cache it needs to be added to + * both the data store and to one of the lists. It must also be possible to + * access each of these entries via the other, i.e. to: + * + * a) Walk a list, removing the corresponding data store entry for each item. + * b) Look up a data store entry, then access its list entry directly. + * + * To achieve both of these goals, each entry must contain both a list entry and + * a key, in addition to the user's data pointer. Note that it's not a good + * idea to have the client embed one of these structures at the top of their own + * data structure, because inserting the same item more than once would most + * likely result in a loop in one of the lists. That's a sure-fire recipe for + * an infinite loop in the code. + */ +typedef struct xfs_mru_cache_elem +{ + struct list_head list_node; + unsigned long key; + void *value; +} xfs_mru_cache_elem_t; + +static kmem_zone_t *xfs_mru_elem_zone; +static struct workqueue_struct *xfs_mru_reap_wq; + +/* + * When inserting, destroying or reaping, it's first necessary to update the + * lists relative to a particular time. In the case of destroying, that time + * will be well in the future to ensure that all items are moved to the reap + * list. In all other cases though, the time will be the current time. + * + * This function enters a loop, moving the contents of the LRU list to the reap + * list again and again until either a) the lists are all empty, or b) time zero + * has been advanced sufficiently to be within the immediate element lifetime. + * + * Case a) above is detected by counting how many groups are migrated and + * stopping when they've all been moved. Case b) is detected by monitoring the + * time_zero field, which is updated as each group is migrated. + * + * The return value is the earliest time that more migration could be needed, or + * zero if there's no need to schedule more work because the lists are empty. + */ +STATIC unsigned long +_xfs_mru_cache_migrate( + xfs_mru_cache_t *mru, + unsigned long now) +{ + unsigned int grp; + unsigned int migrated = 0; + struct list_head *lru_list; + + /* Nothing to do if the data store is empty. */ + if (!mru->time_zero) + return 0; + + /* While time zero is older than the time spanned by all the lists. */ + while (mru->time_zero <= now - mru->grp_count * mru->grp_time) { + + /* + * If the LRU list isn't empty, migrate its elements to the tail + * of the reap list. + */ + lru_list = mru->lists + mru->lru_grp; + if (!list_empty(lru_list)) + list_splice_init(lru_list, mru->reap_list.prev); + + /* + * Advance the LRU group number, freeing the old LRU list to + * become the new MRU list; advance time zero accordingly. + */ + mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count; + mru->time_zero += mru->grp_time; + + /* + * If reaping is so far behind that all the elements on all the + * lists have been migrated to the reap list, it's now empty. + */ + if (++migrated == mru->grp_count) { + mru->lru_grp = 0; + mru->time_zero = 0; + return 0; + } + } + + /* Find the first non-empty list from the LRU end. */ + for (grp = 0; grp < mru->grp_count; grp++) { + + /* Check the grp'th list from the LRU end. */ + lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count); + if (!list_empty(lru_list)) + return mru->time_zero + + (mru->grp_count + grp) * mru->grp_time; + } + + /* All the lists must be empty. */ + mru->lru_grp = 0; + mru->time_zero = 0; + return 0; +} + +/* + * When inserting or doing a lookup, an element needs to be inserted into the + * MRU list. The lists must be migrated first to ensure that they're + * up-to-date, otherwise the new element could be given a shorter lifetime in + * the cache than it should. + */ +STATIC void +_xfs_mru_cache_list_insert( + xfs_mru_cache_t *mru, + xfs_mru_cache_elem_t *elem) +{ + unsigned int grp = 0; + unsigned long now = jiffies; + + /* + * If the data store is empty, initialise time zero, leave grp set to + * zero and start the work queue timer if necessary. Otherwise, set grp + * to the number of group times that have elapsed since time zero. + */ + if (!_xfs_mru_cache_migrate(mru, now)) { + mru->time_zero = now; + if (!mru->next_reap) + mru->next_reap = mru->grp_count * mru->grp_time; + } else { + grp = (now - mru->time_zero) / mru->grp_time; + grp = (mru->lru_grp + grp) % mru->grp_count; + } + + /* Insert the element at the tail of the corresponding list. */ + list_add_tail(&elem->list_node, mru->lists + grp); +} + +/* + * When destroying or reaping, all the elements that were migrated to the reap + * list need to be deleted. For each element this involves removing it from the + * data store, removing it from the reap list, calling the client's free + * function and deleting the element from the element zone. + */ +STATIC void +_xfs_mru_cache_clear_reap_list( + xfs_mru_cache_t *mru) +{ + xfs_mru_cache_elem_t *elem, *next; + struct list_head tmp; + + INIT_LIST_HEAD(&tmp); + list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) { + + /* Remove the element from the data store. */ + radix_tree_delete(&mru->store, elem->key); + + /* + * remove to temp list so it can be freed without + * needing to hold the lock + */ + list_move(&elem->list_node, &tmp); + } + mutex_spinunlock(&mru->lock, 0); + + list_for_each_entry_safe(elem, next, &tmp, list_node) { + + /* Remove the element from the reap list. */ + list_del_init(&elem->list_node); + + /* Call the client's free function with the key and value pointer. */ + mru->free_func(elem->key, elem->value); + + /* Free the element structure. */ + kmem_zone_free(xfs_mru_elem_zone, elem); + } + + mutex_spinlock(&mru->lock); +} + +/* + * We fire the reap timer every group expiry interval so + * we always have a reaper ready to run. This makes shutdown + * and flushing of the reaper easy to do. Hence we need to + * keep when the next reap must occur so we can determine + * at each interval whether there is anything we need to do. + */ +STATIC void +_xfs_mru_cache_reap( + struct work_struct *work) +{ + xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); + unsigned long now; + + ASSERT(mru && mru->lists); + if (!mru || !mru->lists) + return; + + mutex_spinlock(&mru->lock); + now = jiffies; + if (mru->reap_all || + (mru->next_reap && time_after(now, mru->next_reap))) { + if (mru->reap_all) + now += mru->grp_count * mru->grp_time * 2; + mru->next_reap = _xfs_mru_cache_migrate(mru, now); + _xfs_mru_cache_clear_reap_list(mru); + } + + /* + * the process that triggered the reap_all is responsible + * for restating the periodic reap if it is required. + */ + if (!mru->reap_all) + queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); + mru->reap_all = 0; + mutex_spinunlock(&mru->lock, 0); +} + +int +xfs_mru_cache_init(void) +{ + xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), + "xfs_mru_cache_elem"); + if (!xfs_mru_elem_zone) + return ENOMEM; + + xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); + if (!xfs_mru_reap_wq) { + kmem_zone_destroy(xfs_mru_elem_zone); + return ENOMEM; + } + + return 0; +} + +void +xfs_mru_cache_uninit(void) +{ + destroy_workqueue(xfs_mru_reap_wq); + kmem_zone_destroy(xfs_mru_elem_zone); +} + +/* + * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create() + * with the address of the pointer, a lifetime value in milliseconds, a group + * count and a free function to use when deleting elements. This function + * returns 0 if the initialisation was successful. + */ +int +xfs_mru_cache_create( + xfs_mru_cache_t **mrup, + unsigned int lifetime_ms, + unsigned int grp_count, + xfs_mru_cache_free_func_t free_func) +{ + xfs_mru_cache_t *mru = NULL; + int err = 0, grp; + unsigned int grp_time; + + if (mrup) + *mrup = NULL; + + if (!mrup || !grp_count || !lifetime_ms || !free_func) + return EINVAL; + + if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) + return EINVAL; + + if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) + return ENOMEM; + + /* An extra list is needed to avoid reaping up to a grp_time early. */ + mru->grp_count = grp_count + 1; + mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); + + if (!mru->lists) { + err = ENOMEM; + goto exit; + } + + for (grp = 0; grp < mru->grp_count; grp++) + INIT_LIST_HEAD(mru->lists + grp); + + /* + * We use GFP_KERNEL radix tree preload and do inserts under a + * spinlock so GFP_ATOMIC is appropriate for the radix tree itself. + */ + INIT_RADIX_TREE(&mru->store, GFP_ATOMIC); + INIT_LIST_HEAD(&mru->reap_list); + spinlock_init(&mru->lock, "xfs_mru_cache"); + INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap); + + mru->grp_time = grp_time; + mru->free_func = free_func; + + /* start up the reaper event */ + mru->next_reap = 0; + mru->reap_all = 0; + queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); + + *mrup = mru; + +exit: + if (err && mru && mru->lists) + kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); + if (err && mru) + kmem_free(mru, sizeof(*mru)); + + return err; +} + +/* + * Call xfs_mru_cache_flush() to flush out all cached entries, calling their + * free functions as they're deleted. When this function returns, the caller is + * guaranteed that all the free functions for all the elements have finished + * executing. + * + * While we are flushing, we stop the periodic reaper event from triggering. + * Normally, we want to restart this periodic event, but if we are shutting + * down the cache we do not want it restarted. hence the restart parameter + * where 0 = do not restart reaper and 1 = restart reaper. + */ +void +xfs_mru_cache_flush( + xfs_mru_cache_t *mru, + int restart) +{ + if (!mru || !mru->lists) + return; + + cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); + + mutex_spinlock(&mru->lock); + mru->reap_all = 1; + mutex_spinunlock(&mru->lock, 0); + + queue_work(xfs_mru_reap_wq, &mru->work.work); + flush_workqueue(xfs_mru_reap_wq); + + mutex_spinlock(&mru->lock); + WARN_ON_ONCE(mru->reap_all != 0); + mru->reap_all = 0; + if (restart) + queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); + mutex_spinunlock(&mru->lock, 0); +} + +void +xfs_mru_cache_destroy( + xfs_mru_cache_t *mru) +{ + if (!mru || !mru->lists) + return; + + /* we don't want the reaper to restart here */ + xfs_mru_cache_flush(mru, 0); + + kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); + kmem_free(mru, sizeof(*mru)); +} + +/* + * To insert an element, call xfs_mru_cache_insert() with the data store, the + * element's key and the client data pointer. This function returns 0 on + * success or ENOMEM if memory for the data element couldn't be allocated. + */ +int +xfs_mru_cache_insert( + xfs_mru_cache_t *mru, + unsigned long key, + void *value) +{ + xfs_mru_cache_elem_t *elem; + + ASSERT(mru && mru->lists); + if (!mru || !mru->lists) + return EINVAL; + + elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); + if (!elem) + return ENOMEM; + + if (radix_tree_preload(GFP_KERNEL)) { + kmem_zone_free(xfs_mru_elem_zone, elem); + return ENOMEM; + } + + INIT_LIST_HEAD(&elem->list_node); + elem->key = key; + elem->value = value; + + mutex_spinlock(&mru->lock); + + radix_tree_insert(&mru->store, key, elem); + radix_tree_preload_end(); + _xfs_mru_cache_list_insert(mru, elem); + + mutex_spinunlock(&mru->lock, 0); + + return 0; +} + +/* + * To remove an element without calling the free function, call + * xfs_mru_cache_remove() with the data store and the element's key. On success + * the client data pointer for the removed element is returned, otherwise this + * function will return a NULL pointer. + */ +void * +xfs_mru_cache_remove( + xfs_mru_cache_t *mru, + unsigned long key) +{ + xfs_mru_cache_elem_t *elem; + void *value = NULL; + + ASSERT(mru && mru->lists); + if (!mru || !mru->lists) + return NULL; + + mutex_spinlock(&mru->lock); + elem = radix_tree_delete(&mru->store, key); + if (elem) { + value = elem->value; + list_del(&elem->list_node); + } + + mutex_spinunlock(&mru->lock, 0); + + if (elem) + kmem_zone_free(xfs_mru_elem_zone, elem); + + return value; +} + +/* + * To remove and element and call the free function, call xfs_mru_cache_delete() + * with the data store and the element's key. + */ +void +xfs_mru_cache_delete( + xfs_mru_cache_t *mru, + unsigned long key) +{ + void *value = xfs_mru_cache_remove(mru, key); + + if (value) + mru->free_func(key, value); +} + +/* + * To look up an element using its key, call xfs_mru_cache_lookup() with the + * data store and the element's key. If found, the element will be moved to the + * head of the MRU list to indicate that it's been touched. + * + * The internal data structures are protected by a spinlock that is STILL HELD + * when this function returns. Call xfs_mru_cache_done() to release it. Note + * that it is not safe to call any function that might sleep in the interim. + * + * The implementation could have used reference counting to avoid this + * restriction, but since most clients simply want to get, set or test a member + * of the returned data structure, the extra per-element memory isn't warranted. + * + * If the element isn't found, this function returns NULL and the spinlock is + * released. xfs_mru_cache_done() should NOT be called when this occurs. + */ +void * +xfs_mru_cache_lookup( + xfs_mru_cache_t *mru, + unsigned long key) +{ + xfs_mru_cache_elem_t *elem; + + ASSERT(mru && mru->lists); + if (!mru || !mru->lists) + return NULL; + + mutex_spinlock(&mru->lock); + elem = radix_tree_lookup(&mru->store, key); + if (elem) { + list_del(&elem->list_node); + _xfs_mru_cache_list_insert(mru, elem); + } + else + mutex_spinunlock(&mru->lock, 0); + + return elem ? elem->value : NULL; +} + +/* + * To look up an element using its key, but leave its location in the internal + * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this + * function returns NULL. + * + * See the comments above the declaration of the xfs_mru_cache_lookup() function + * for important locking information pertaining to this call. + */ +void * +xfs_mru_cache_peek( + xfs_mru_cache_t *mru, + unsigned long key) +{ + xfs_mru_cache_elem_t *elem; + + ASSERT(mru && mru->lists); + if (!mru || !mru->lists) + return NULL; + + mutex_spinlock(&mru->lock); + elem = radix_tree_lookup(&mru->store, key); + if (!elem) + mutex_spinunlock(&mru->lock, 0); + + return elem ? elem->value : NULL; +} + +/* + * To release the internal data structure spinlock after having performed an + * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() + * with the data store pointer. + */ +void +xfs_mru_cache_done( + xfs_mru_cache_t *mru) +{ + mutex_spinunlock(&mru->lock, 0); +} diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h new file mode 100644 index 000000000000..624fd10ee8e5 --- /dev/null +++ b/fs/xfs/xfs_mru_cache.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_MRU_CACHE_H__ +#define __XFS_MRU_CACHE_H__ + + +/* Function pointer type for callback to free a client's data pointer. */ +typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); + +typedef struct xfs_mru_cache +{ + struct radix_tree_root store; /* Core storage data structure. */ + struct list_head *lists; /* Array of lists, one per grp. */ + struct list_head reap_list; /* Elements overdue for reaping. */ + spinlock_t lock; /* Lock to protect this struct. */ + unsigned int grp_count; /* Number of discrete groups. */ + unsigned int grp_time; /* Time period spanned by grps. */ + unsigned int lru_grp; /* Group containing time zero. */ + unsigned long time_zero; /* Time first element was added. */ + unsigned long next_reap; /* Time that the reaper should + next do something. */ + unsigned int reap_all; /* if set, reap all lists */ + xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ + struct delayed_work work; /* Workqueue data for reaping. */ +} xfs_mru_cache_t; + +int xfs_mru_cache_init(void); +void xfs_mru_cache_uninit(void); +int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, + unsigned int grp_count, + xfs_mru_cache_free_func_t free_func); +void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); +void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); +int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, + void *value); +void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); +void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); +void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); +void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key); +void xfs_mru_cache_done(struct xfs_mru_cache *mru); + +#endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index b3a5f07bd073..47082c01872d 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1882,11 +1882,13 @@ xfs_growfs_rt( (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) return XFS_ERROR(EINVAL); + if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) + return error; /* * Read in the last block of the device, make sure it exists. */ error = xfs_read_buf(mp, mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, in->newblocks - 1), + XFS_FSB_TO_BB(mp, nrblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, &bp); if (error) return error; diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 188b296ff50c..fcf28dbded7c 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -72,6 +72,34 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb) } /* + * Flags for xfs_free_eofblocks + */ +#define XFS_FREE_EOF_LOCK (1<<0) +#define XFS_FREE_EOF_NOLOCK (1<<1) + + +/* + * helper function to extract extent size hint from inode + */ +STATIC_INLINE xfs_extlen_t +xfs_get_extsz_hint( + xfs_inode_t *ip) +{ + xfs_extlen_t extsz; + + if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) + ? ip->i_d.di_extsize + : ip->i_mount->m_sb.sb_rextsize; + ASSERT(extsz); + } else { + extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) + ? ip->i_d.di_extsize : 0; + } + return extsz; +} + +/* * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); @@ -91,10 +119,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock); extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags, - cred_t *credp); + cred_t *credp); extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf, - xfs_off_t offset, cred_t *credp, int flags); + xfs_off_t offset, cred_t *credp, int flags); extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state, - cred_t *credp); + cred_t *credp); +extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, + int flags); #endif /* __XFS_RW_H__ */ diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 467854b45c8f..ef42537a607a 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -74,12 +74,13 @@ struct xfs_mount; */ #define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 -#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 +#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ #define XFS_SB_VERSION2_OKREALFBITS \ - (XFS_SB_VERSION2_ATTR2BIT) + (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ + XFS_SB_VERSION2_ATTR2BIT) #define XFS_SB_VERSION2_OKSASHFBITS \ (0) #define XFS_SB_VERSION2_OKREALBITS \ @@ -181,6 +182,9 @@ typedef enum { #define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) #define XFS_SB_UNIT XFS_SB_MVAL(UNIT) #define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) +#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) +#define XFS_SB_IFREE XFS_SB_MVAL(IFREE) +#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) #define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) #define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) #define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) @@ -188,7 +192,7 @@ typedef enum { (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ - XFS_SB_FEATURES2) + XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2) /* @@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) */ +static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) +{ + return (XFS_SB_VERSION_HASMOREBITS(sbp) && \ + ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); +} + #define XFS_SB_VERSION_HASATTR2(sbp) xfs_sb_version_hasattr2(sbp) static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) { diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index cc2d60951e21..356d6627f581 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -427,6 +427,14 @@ undo_blocks: * * Mark the transaction structure to indicate that the superblock * needs to be updated before committing. + * + * Because we may not be keeping track of allocated/free inodes and + * used filesystem blocks in the superblock, we do not mark the + * superblock dirty in this transaction if we modify these fields. + * We still need to update the transaction deltas so that they get + * applied to the incore superblock, but we don't want them to + * cause the superblock to get locked and logged if these are the + * only fields in the superblock that the transaction modifies. */ void xfs_trans_mod_sb( @@ -434,13 +442,19 @@ xfs_trans_mod_sb( uint field, int64_t delta) { + uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY); + xfs_mount_t *mp = tp->t_mountp; switch (field) { case XFS_TRANS_SB_ICOUNT: tp->t_icount_delta += delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_IFREE: tp->t_ifree_delta += delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FDBLOCKS: /* @@ -453,6 +467,8 @@ xfs_trans_mod_sb( ASSERT(tp->t_blk_res_used <= tp->t_blk_res); } tp->t_fdblocks_delta += delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_RES_FDBLOCKS: /* @@ -462,6 +478,8 @@ xfs_trans_mod_sb( */ ASSERT(delta < 0); tp->t_res_fdblocks_delta += delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FREXTENTS: /* @@ -515,7 +533,7 @@ xfs_trans_mod_sb( return; } - tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY); + tp->t_flags |= flags; } /* @@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas( (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + tp->t_ag_btree_delta)); - if (tp->t_icount_delta != 0) { - INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta); - } - if (tp->t_ifree_delta != 0) { - INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta); - } + /* + * Only update the superblock counters if we are logging them + */ + if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { + if (tp->t_icount_delta != 0) { + INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta); + } + if (tp->t_ifree_delta != 0) { + INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta); + } - if (tp->t_fdblocks_delta != 0) { - INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta); - } - if (tp->t_res_fdblocks_delta != 0) { - INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta); + if (tp->t_fdblocks_delta != 0) { + INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta); + } + if (tp->t_res_fdblocks_delta != 0) { + INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta); + } } if (tp->t_frextents_delta != 0) { @@ -615,11 +638,23 @@ xfs_trans_apply_sb_deltas( } /* - * xfs_trans_unreserve_and_mod_sb() is called to release unused - * reservations and apply superblock counter changes to the in-core - * superblock. + * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations + * and apply superblock counter changes to the in-core superblock. The + * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT + * applied to the in-core superblock. The idea is that that has already been + * done. * * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). + * However, we have to ensure that we only modify each superblock field only + * once because the application of the delta values may not be atomic. That can + * lead to ENOSPC races occurring if we have two separate modifcations of the + * free space counter to put back the entire reservation and then take away + * what we used. + * + * If we are not logging superblock counters, then the inode allocated/free and + * used block counts are not updated in the on disk superblock. In this case, + * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we + * still need to update the incore superblock with the changes. */ STATIC void xfs_trans_unreserve_and_mod_sb( @@ -627,40 +662,49 @@ xfs_trans_unreserve_and_mod_sb( { xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ xfs_mod_sb_t *msbp; + xfs_mount_t *mp = tp->t_mountp; /* REFERENCED */ int error; int rsvd; + int64_t blkdelta = 0; + int64_t rtxdelta = 0; msbp = msb; rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* - * Release any reserved blocks. Any that were allocated - * will be taken back again by fdblocks_delta below. - */ - if (tp->t_blk_res > 0) { + /* calculate free blocks delta */ + if (tp->t_blk_res > 0) + blkdelta = tp->t_blk_res; + + if ((tp->t_fdblocks_delta != 0) && + (xfs_sb_version_haslazysbcount(&mp->m_sb) || + (tp->t_flags & XFS_TRANS_SB_DIRTY))) + blkdelta += tp->t_fdblocks_delta; + + if (blkdelta != 0) { msbp->msb_field = XFS_SBS_FDBLOCKS; - msbp->msb_delta = tp->t_blk_res; + msbp->msb_delta = blkdelta; msbp++; } - /* - * Release any reserved real time extents . Any that were - * allocated will be taken back again by frextents_delta below. - */ - if (tp->t_rtx_res > 0) { + /* calculate free realtime extents delta */ + if (tp->t_rtx_res > 0) + rtxdelta = tp->t_rtx_res; + + if ((tp->t_frextents_delta != 0) && + (tp->t_flags & XFS_TRANS_SB_DIRTY)) + rtxdelta += tp->t_frextents_delta; + + if (rtxdelta != 0) { msbp->msb_field = XFS_SBS_FREXTENTS; - msbp->msb_delta = tp->t_rtx_res; + msbp->msb_delta = rtxdelta; msbp++; } - /* - * Apply any superblock modifications to the in-core version. - * The t_res_fdblocks_delta and t_res_frextents_delta fields are - * explicitly NOT applied to the in-core superblock. - * The idea is that that has already been done. - */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { + /* apply remaining deltas */ + + if (xfs_sb_version_haslazysbcount(&mp->m_sb) || + (tp->t_flags & XFS_TRANS_SB_DIRTY)) { if (tp->t_icount_delta != 0) { msbp->msb_field = XFS_SBS_ICOUNT; msbp->msb_delta = tp->t_icount_delta; @@ -671,16 +715,9 @@ xfs_trans_unreserve_and_mod_sb( msbp->msb_delta = tp->t_ifree_delta; msbp++; } - if (tp->t_fdblocks_delta != 0) { - msbp->msb_field = XFS_SBS_FDBLOCKS; - msbp->msb_delta = tp->t_fdblocks_delta; - msbp++; - } - if (tp->t_frextents_delta != 0) { - msbp->msb_field = XFS_SBS_FREXTENTS; - msbp->msb_delta = tp->t_frextents_delta; - msbp++; - } + } + + if (tp->t_flags & XFS_TRANS_SB_DIRTY) { if (tp->t_dblocks_delta != 0) { msbp->msb_field = XFS_SBS_DBLOCKS; msbp->msb_delta = tp->t_dblocks_delta; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7dfcc450366f..0e26e729023e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -94,7 +94,8 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFSRT_ZERO 38 #define XFS_TRANS_GROWFSRT_FREE 39 #define XFS_TRANS_SWAPEXT 40 -#define XFS_TRANS_TYPE_MAX 40 +#define XFS_TRANS_SB_COUNT 41 +#define XFS_TRANS_TYPE_MAX 41 /* new transaction types need to be reflected in xfs_logprint(8) */ diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 65c561201cb8..11f5ea29a038 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -51,6 +51,8 @@ #include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_clnt.h" +#include "xfs_mru_cache.h" +#include "xfs_filestream.h" #include "xfs_fsops.h" STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); @@ -81,6 +83,8 @@ xfs_init(void) xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); + xfs_mru_cache_init(); + xfs_filestream_init(); /* * The size of the zone allocated buf log item is the maximum @@ -164,6 +168,8 @@ xfs_cleanup(void) xfs_cleanup_procfs(); xfs_sysctl_unregister(); xfs_refcache_destroy(); + xfs_filestream_uninit(); + xfs_mru_cache_uninit(); xfs_acl_zone_destroy(xfs_acl_zone); #ifdef XFS_DIR2_TRACE @@ -320,6 +326,9 @@ xfs_start_flags( else mp->m_flags &= ~XFS_MOUNT_BARRIER; + if (ap->flags2 & XFSMNT2_FILESTREAMS) + mp->m_flags |= XFS_MOUNT_FILESTREAMS; + return 0; } @@ -518,6 +527,9 @@ xfs_mount( if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_mountfs_check_barriers(mp); + if ((error = xfs_filestream_mount(mp))) + goto error2; + error = XFS_IOINIT(vfsp, args, flags); if (error) goto error2; @@ -575,6 +587,13 @@ xfs_unmount( */ xfs_refcache_purge_mp(mp); + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + XFS_bflush(mp->m_ddev_targp); error = xfs_unmount_flush(mp, 0); if (error) @@ -640,7 +659,7 @@ xfs_quiesce_fs( * we can write the unmount record. */ do { - xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL); + xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); @@ -651,6 +670,30 @@ xfs_quiesce_fs( return 0; } +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceding. + */ +STATIC void +xfs_attr_quiesce( + xfs_mount_t *mp) +{ + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); + + /* Push the superblock and write an unmount record */ + xfs_log_sbcount(mp, 1); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + STATIC int xfs_mntupdate( bhv_desc_t *bdp, @@ -670,10 +713,9 @@ xfs_mntupdate( mp->m_flags &= ~XFS_MOUNT_BARRIER; } } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ - bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL); - xfs_quiesce_fs(mp); - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); + xfs_filestream_flush(mp); + bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL); + xfs_attr_quiesce(mp); vfsp->vfs_flag |= VFS_RDONLY; } return 0; @@ -887,6 +929,9 @@ xfs_sync( { xfs_mount_t *mp = XFS_BHVTOM(bdp); + if (flags & SYNC_IOWAIT) + xfs_filestream_flush(mp); + return xfs_syncsub(mp, flags, NULL); } @@ -1128,58 +1173,41 @@ xfs_sync_inodes( * in the inode list. */ - if ((flags & SYNC_CLOSE) && (vp != NULL)) { - /* - * This is the shutdown case. We just need to - * flush and invalidate all the pages associated - * with the inode. Drop the inode lock since - * we can't hold it across calls to the buffer - * cache. - * - * We don't set the VREMAPPING bit in the vnode - * here, because we don't hold the vnode lock - * exclusively. It doesn't really matter, though, - * because we only come here when we're shutting - * down anyway. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (XFS_FORCED_SHUTDOWN(mp)) { - bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); - } else { - error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF); + /* + * If we have to flush data or wait for I/O completion + * we need to drop the ilock that we currently hold. + * If we need to drop the lock, insert a marker if we + * have not already done so. + */ + if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || + ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { + if (mount_locked) { + IPOINTER_INSERT(ip, mp); } + xfs_iunlock(ip, XFS_ILOCK_SHARED); - xfs_ilock(ip, XFS_ILOCK_SHARED); - - } else if ((flags & SYNC_DELWRI) && (vp != NULL)) { - if (VN_DIRTY(vp)) { - /* We need to have dropped the lock here, - * so insert a marker if we have not already - * done so. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - - /* - * Drop the inode lock since we can't hold it - * across calls to the buffer cache. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (flags & SYNC_CLOSE) { + /* Shutdown case. Flush and invalidate. */ + if (XFS_FORCED_SHUTDOWN(mp)) + bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); + else + error = bhv_vop_flushinval_pages(vp, 0, + -1, FI_REMAPF); + } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, fflag, FI_NONE); - xfs_ilock(ip, XFS_ILOCK_SHARED); } + /* + * When freezing, we need to wait ensure all I/O (including direct + * I/O) is complete to ensure no further data modification can take + * place after this point + */ + if (flags & SYNC_IOWAIT) + vn_iowait(vp); + + xfs_ilock(ip, XFS_ILOCK_SHARED); } - /* - * When freezing, we need to wait ensure all I/O (including direct - * I/O) is complete to ensure no further data modification can take - * place after this point - */ - if (flags & SYNC_IOWAIT) - vn_iowait(vp); if (flags & SYNC_BDFLUSH) { if ((flags & SYNC_ATTR) && @@ -1514,6 +1542,15 @@ xfs_syncsub( } /* + * If asked, update the disk superblock with incore counter values if we + * are using non-persistent counters so that they don't get too far out + * of sync if we crash or get a forced shutdown. We don't want to force + * this to disk, just get a transaction into the iclogs.... + */ + if (flags & SYNC_SUPER) + xfs_log_sbcount(mp, 0); + + /* * Now check to see if the log needs a "dummy" transaction. */ @@ -1645,6 +1682,7 @@ xfs_vget( * in stat(). */ #define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ STATIC unsigned long suffix_strtoul(char *s, char **endp, unsigned int base) @@ -1831,6 +1869,8 @@ xfs_parseargs( args->flags |= XFSMNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { args->flags &= ~XFSMNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { + args->flags2 |= XFSMNT2_FILESTREAMS; } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ cmn_err(CE_WARN, @@ -1959,9 +1999,9 @@ xfs_showargs( } /* - * Second stage of a freeze. The data is already frozen, now we have to take - * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceding. + * Second stage of a freeze. The data is already frozen so we only + * need to take care of themetadata. Once that's done write a dummy + * record to dirty the log in case of a crash while frozen. */ STATIC void xfs_freeze( @@ -1969,18 +2009,7 @@ xfs_freeze( { xfs_mount_t *mp = XFS_BHVTOM(bdp); - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - - /* flush inodes and push all remaining buffers out to disk */ - xfs_quiesce_fs(mp); - - ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); - - /* Push the superblock and write an unmount record */ - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); + xfs_attr_quiesce(mp); xfs_fs_log_dummy(mp); } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index de17aed578f0..79b522779aa4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -51,6 +51,7 @@ #include "xfs_refcache.h" #include "xfs_trans_space.h" #include "xfs_log_priv.h" +#include "xfs_filestream.h" STATIC int xfs_open( @@ -77,36 +78,6 @@ xfs_open( return 0; } -STATIC int -xfs_close( - bhv_desc_t *bdp, - int flags, - lastclose_t lastclose, - cred_t *credp) -{ - bhv_vnode_t *vp = BHV_TO_VNODE(bdp); - xfs_inode_t *ip = XFS_BHVTOI(bdp); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return XFS_ERROR(EIO); - - if (lastclose != L_TRUE || !VN_ISREG(vp)) - return 0; - - /* - * If we previously truncated this file and removed old data in - * the process, we want to initiate "early" writeout on the last - * close. This is an attempt to combat the notorious NULL files - * problem which is particularly noticable from a truncate down, - * buffered (re-)write (delalloc), followed by a crash. What we - * are effectively doing here is significantly reducing the time - * window where we'd otherwise be exposed to that problem. - */ - if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) - return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); - return 0; -} - /* * xfs_getattr */ @@ -183,9 +154,8 @@ xfs_getattr( * realtime extent size or the realtime volume's * extent size. */ - vap->va_blocksize = ip->i_d.di_extsize ? - (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : - (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); + vap->va_blocksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; } break; } @@ -814,6 +784,8 @@ xfs_setattr( di_flags |= XFS_DIFLAG_PROJINHERIT; if (vap->va_xflags & XFS_XFLAG_NODEFRAG) di_flags |= XFS_DIFLAG_NODEFRAG; + if (vap->va_xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -1201,13 +1173,15 @@ xfs_fsync( } /* - * This is called by xfs_inactive to free any blocks beyond eof, - * when the link count isn't zero. + * This is called by xfs_inactive to free any blocks beyond eof + * when the link count isn't zero and by xfs_dm_punch_hole() when + * punching a hole to EOF. */ -STATIC int -xfs_inactive_free_eofblocks( +int +xfs_free_eofblocks( xfs_mount_t *mp, - xfs_inode_t *ip) + xfs_inode_t *ip, + int flags) { xfs_trans_t *tp; int error; @@ -1216,6 +1190,7 @@ xfs_inactive_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; + int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end @@ -1256,11 +1231,14 @@ xfs_inactive_free_eofblocks( * cache and we can't * do that within a transaction. */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (use_iolock) + xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size); if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_trans_cancel(tp, 0); + if (use_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -1297,7 +1275,8 @@ xfs_inactive_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); + xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) + : XFS_ILOCK_EXCL)); } return error; } @@ -1560,6 +1539,31 @@ xfs_release( if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return 0; + if (!XFS_FORCED_SHUTDOWN(mp)) { + /* + * If we are using filestreams, and we have an unlinked + * file that we are processing the last close on, then nothing + * will be able to reopen and write to this file. Purge this + * inode from the filestreams cache so that it doesn't delay + * teardown of the inode. + */ + if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + + /* + * If we previously truncated this file and removed old data + * in the process, we want to initiate "early" writeout on + * the last close. This is an attempt to combat the notorious + * NULL files problem which is particularly noticable from a + * truncate down, buffered (re-)write (delalloc), followed by + * a crash. What we are effectively doing here is + * significantly reducing the time window where we'd otherwise + * be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); + } + #ifdef HAVE_REFCACHE /* If we are in the NFS reference cache then don't do this now */ if (ip->i_refcache) @@ -1573,7 +1577,8 @@ xfs_release( (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return error; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1654,7 +1659,8 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return VN_INACTIVE_CACHE; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1680,6 +1686,7 @@ xfs_inactive( error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); if (error) { + xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return VN_INACTIVE_CACHE; } @@ -2217,9 +2224,9 @@ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) - lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) - lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT; + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; return lock_mode; } @@ -2546,6 +2553,15 @@ xfs_remove( */ xfs_refcache_purge_ip(ip); + /* + * If we are using filestreams, kill the stream association. + * If the file is still open it may get a new one but that + * will get killed on last close in xfs_close() so we don't + * have to worry about that. + */ + if (link_zero && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); /* @@ -4047,22 +4063,16 @@ xfs_alloc_file_space( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rt = XFS_IS_REALTIME_INODE(ip); - if (unlikely(rt)) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; if (len <= 0) return XFS_ERROR(EINVAL); + rt = XFS_IS_REALTIME_INODE(ip); + extsz = xfs_get_extsz_hint(ip); + count = len; - error = 0; imapp = &imaps[0]; nimaps = 1; bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); @@ -4678,11 +4688,7 @@ xfs_change_file_space( bhv_vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, - .vop_close = xfs_close, .vop_read = xfs_read, -#ifdef HAVE_SENDFILE - .vop_sendfile = xfs_sendfile, -#endif #ifdef HAVE_SPLICE .vop_splice_read = xfs_splice_read, .vop_splice_write = xfs_splice_write, |