diff options
85 files changed, 1787 insertions, 605 deletions
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index af0aaeb..3e94811 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -124,8 +124,8 @@ static long cp_oldabi_stat64(struct kstat *stat, tmp.__st_ino = stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blocks = stat->blocks; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 0dc8543..c71eb6c 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -159,8 +159,8 @@ static int cp_hpux_stat(struct kstat *stat, struct hpux_stat64 __user *statbuf) tmp.st_ino = stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index ab64bdb..6542652 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -173,11 +173,14 @@ asmlinkage long sys32_setfsgid16(u16 gid) static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = (u16)GROUP_AT(group_info, i); + kgid = GROUP_AT(group_info, i); + group = (u16)from_kgid_munged(user_ns, kgid); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -187,13 +190,20 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = (gid_t)group; + + kgid = make_kgid(user_ns, (gid_t)group); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; @@ -537,8 +547,8 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) tmp.__st_ino = (u32)stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blksize = (u32)stat->blksize; diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 29c478f..f739233 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -139,8 +139,8 @@ static int cp_compat_stat64(struct kstat *stat, err |= put_user(stat->ino, &statbuf->st_ino); err |= put_user(stat->mode, &statbuf->st_mode); err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(stat->uid, &statbuf->st_uid); - err |= put_user(stat->gid, &statbuf->st_gid); + err |= put_user(from_kuid_munged(current_user_ns(), stat->uid), &statbuf->st_uid); + err |= put_user(from_kgid_munged(current_user_ns(), stat->gid), &statbuf->st_gid); err |= put_user(huge_encode_dev(stat->rdev), &statbuf->st_rdev); err |= put_user(0, (unsigned long __user *) &statbuf->__pad3[0]); err |= put_user(stat->size, &statbuf->st_size); diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index edca9c0..4540bec 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -71,8 +71,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { typeof(ubuf->st_uid) uid = 0; typeof(ubuf->st_gid) gid = 0; - SET_UID(uid, stat->uid); - SET_GID(gid, stat->gid); + SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid)); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || __put_user(stat->ino, &ubuf->__st_ino) || diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3ecfd1aa..76dcd9d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -582,7 +582,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, pte_t *pte = lookup_address(address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, current_uid()); + printk(nx_warning, from_kuid(&init_user_ns, current_uid())); } printk(KERN_ALERT "BUG: unable to handle kernel "); @@ -47,14 +47,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && - (current_fsuid() != inode->i_uid || - attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) + (!uid_eq(current_fsuid(), inode->i_uid) || + !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && - (current_fsuid() != inode->i_uid || - (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && + (!uid_eq(current_fsuid(), inode->i_uid) || + (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && !capable(CAP_CHOWN)) return -EPERM; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 16f7354..e658dd1 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -226,10 +226,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, cred->uid); - NEW_AUX_ENT(AT_EUID, cred->euid); - NEW_AUX_ENT(AT_GID, cred->gid); - NEW_AUX_ENT(AT_EGID, cred->egid); + NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid)); + NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid)); + NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid)); + NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid)); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1356,8 +1356,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_flag = p->flags; rcu_read_lock(); cred = __task_cred(p); - SET_UID(psinfo->pr_uid, cred->uid); - SET_GID(psinfo->pr_gid, cred->gid); + SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); + SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d390a0f..3d77cf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -627,10 +627,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid); + NEW_AUX_ENT(AT_UID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->uid)); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->euid)); + NEW_AUX_ENT(AT_GID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->gid)); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->egid)); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1421,8 +1421,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_flag = p->flags; rcu_read_lock(); cred = __task_cred(p); - SET_UID(psinfo->pr_uid, cred->uid); - SET_GID(psinfo->pr_gid, cred->gid); + SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); + SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); diff --git a/fs/compat.c b/fs/compat.c index f2944ac..0781e61 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -144,8 +144,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; - SET_UID(tmp.st_uid, stat->uid); - SET_GID(tmp.st_gid, stat->gid); + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 10f5e0b..979c1e3 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -98,8 +98,8 @@ static struct vfsmount *devpts_mnt; struct pts_mount_opts { int setuid; int setgid; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t mode; umode_t ptmxmode; int newinstance; @@ -158,11 +158,13 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) { char *p; + kuid_t uid; + kgid_t gid; opts->setuid = 0; opts->setgid = 0; - opts->uid = 0; - opts->gid = 0; + opts->uid = GLOBAL_ROOT_UID; + opts->gid = GLOBAL_ROOT_GID; opts->mode = DEVPTS_DEFAULT_MODE; opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; opts->max = NR_UNIX98_PTY_MAX; @@ -184,13 +186,19 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) case Opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - opts->uid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + opts->uid = uid; opts->setuid = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return -EINVAL; - opts->gid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + opts->gid = gid; opts->setgid = 1; break; case Opt_mode: @@ -315,9 +323,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) struct pts_mount_opts *opts = &fsi->mount_opts; if (opts->setuid) - seq_printf(seq, ",uid=%u", opts->uid); + seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, opts->uid)); if (opts->setgid) - seq_printf(seq, ",gid=%u", opts->gid); + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index ab22480..a750f95 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -303,7 +303,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns; + tsk_user_ns = __task_cred(msg_ctx->task)->user_ns; ctx_euid = task_euid(msg_ctx->task); rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); rcu_read_unlock(); @@ -1139,7 +1139,7 @@ void setup_new_exec(struct linux_binprm * bprm) /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; - if (current_euid() == current_uid() && current_egid() == current_gid()) + if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) set_dumpable(current->mm, 1); else set_dumpable(current->mm, suid_dumpable); @@ -1153,8 +1153,8 @@ void setup_new_exec(struct linux_binprm * bprm) current->mm->task_size = TASK_SIZE; /* install the new credentials */ - if (bprm->cred->uid != current_euid() || - bprm->cred->gid != current_egid()) { + if (!uid_eq(bprm->cred->uid, current_euid()) || + !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { would_dump(bprm, bprm->file); @@ -1299,8 +1299,11 @@ int prepare_binprm(struct linux_binprm *bprm) !current->no_new_privs) { /* Set-uid? */ if (mode & S_ISUID) { + if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid)) + return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = inode->i_uid; + } /* Set-gid? */ @@ -1310,6 +1313,8 @@ int prepare_binprm(struct linux_binprm *bprm) * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { + if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) + return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = inode->i_gid; } @@ -2142,7 +2147,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (__get_dumpable(cprm.mm_flags) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - cred->fsuid = 0; /* Dump root private */ + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); @@ -2243,7 +2248,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * Dont allow local users get cute and trick others to coredump * into their pre-created files. */ - if (inode->i_uid != current_fsuid()) + if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; if (!cprm.file->f_op || !cprm.file->f_op->write) goto close_fail; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index a8cbe1b..030c6d2 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1193,8 +1193,9 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current_fsuid() && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + !uid_eq(sbi->s_resuid, current_fsuid()) && + (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) || + !in_group_p (sbi->s_resgid))) { return 0; } return 1; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 0b2b4db..d9a17d0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -82,8 +82,8 @@ struct ext2_sb_info { struct buffer_head ** s_group_desc; unsigned long s_mount_opt; unsigned long s_sb_block; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; unsigned short s_mount_state; unsigned short s_pad; int s_addr_per_block_bits; @@ -637,8 +637,8 @@ static inline void verify_offsets(void) */ struct ext2_mount_options { unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; }; /* diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 740cad8..f9fa95f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1293,6 +1293,8 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) struct inode *inode; long ret = -EIO; int n; + uid_t i_uid; + gid_t i_gid; inode = iget_locked(sb, ino); if (!inode) @@ -1310,12 +1312,14 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) } inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + i_uid_write(inode, i_uid); + i_gid_write(inode, i_gid); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); @@ -1413,8 +1417,8 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = i_uid_read(inode); + gid_t gid = i_gid_read(inode); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1529,8 +1533,8 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (is_quota_modification(inode, iattr)) dquot_initialize(inode); - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { error = dquot_transfer(inode, iattr); if (error) return error; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e1025c7..38f8160 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -228,13 +228,15 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",grpid"); if (!test_opt(sb, GRPID) && (def_mount_opts & EXT2_DEFM_BSDGROUPS)) seq_puts(seq, ",nogrpid"); - if (sbi->s_resuid != EXT2_DEF_RESUID || + if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT2_DEF_RESUID)) || le16_to_cpu(es->s_def_resuid) != EXT2_DEF_RESUID) { - seq_printf(seq, ",resuid=%u", sbi->s_resuid); + seq_printf(seq, ",resuid=%u", + from_kuid_munged(&init_user_ns, sbi->s_resuid)); } - if (sbi->s_resgid != EXT2_DEF_RESGID || + if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT2_DEF_RESGID)) || le16_to_cpu(es->s_def_resgid) != EXT2_DEF_RESGID) { - seq_printf(seq, ",resgid=%u", sbi->s_resgid); + seq_printf(seq, ",resgid=%u", + from_kgid_munged(&init_user_ns, sbi->s_resgid)); } if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); @@ -436,6 +438,8 @@ static int parse_options(char *options, struct super_block *sb) struct ext2_sb_info *sbi = EXT2_SB(sb); substring_t args[MAX_OPT_ARGS]; int option; + kuid_t uid; + kgid_t gid; if (!options) return 1; @@ -462,12 +466,23 @@ static int parse_options(char *options, struct super_block *sb) case Opt_resuid: if (match_int(&args[0], &option)) return 0; - sbi->s_resuid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) { + ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option); + return -1; + + } + sbi->s_resuid = uid; break; case Opt_resgid: if (match_int(&args[0], &option)) return 0; - sbi->s_resgid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) { + ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option); + return -1; + } + sbi->s_resgid = gid; break; case Opt_sb: /* handled by get_sb_block() instead of here */ @@ -841,8 +856,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) else set_opt(sbi->s_mount_opt, ERRORS_RO); - sbi->s_resuid = le16_to_cpu(es->s_def_resuid); - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); set_opt(sbi->s_mount_opt, RESERVATION); diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index baac1b1..25cd608 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1439,8 +1439,9 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation) free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && - !use_reservation && sbi->s_resuid != current_fsuid() && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) && + (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) || + !in_group_p (sbi->s_resgid))) { return 0; } return 1; diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index b6515fd..7977973 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h @@ -243,8 +243,8 @@ struct ext3_new_group_data { */ struct ext3_mount_options { unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; unsigned long s_commit_interval; #ifdef CONFIG_QUOTA int s_jquota_fmt; @@ -637,8 +637,8 @@ struct ext3_sb_info { struct buffer_head ** s_group_desc; unsigned long s_mount_opt; ext3_fsblk_t s_sb_block; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; unsigned short s_mount_state; unsigned short s_pad; int s_addr_per_block_bits; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 10d7812..a09790a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2891,6 +2891,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) transaction_t *transaction; long ret; int block; + uid_t i_uid; + gid_t i_gid; inode = iget_locked(sb, ino); if (!inode) @@ -2907,12 +2909,14 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + i_uid_write(inode, i_uid); + i_gid_write(inode, i_gid); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); @@ -3068,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + uid_t i_uid; + gid_t i_gid; again: /* we can't allow multiple procs in here at once, its a bit racey */ @@ -3080,27 +3086,29 @@ again: ext3_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); + i_uid = i_uid_read(inode); + i_gid = i_gid_read(inode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(i_uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(i_gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(i_uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(i_gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } @@ -3262,8 +3270,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) if (is_quota_modification(inode, attr)) dquot_initialize(inode); - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, diff --git a/fs/ext3/super.c b/fs/ext3/super.c index cf0b592..94ef7e6 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -617,13 +617,15 @@ static int ext3_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",grpid"); if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) seq_puts(seq, ",nogrpid"); - if (sbi->s_resuid != EXT3_DEF_RESUID || + if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) || le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { - seq_printf(seq, ",resuid=%u", sbi->s_resuid); + seq_printf(seq, ",resuid=%u", + from_kuid_munged(&init_user_ns, sbi->s_resuid)); } - if (sbi->s_resgid != EXT3_DEF_RESGID || + if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) || le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { - seq_printf(seq, ",resgid=%u", sbi->s_resgid); + seq_printf(seq, ",resgid=%u", + from_kgid_munged(&init_user_ns, sbi->s_resgid)); } if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); @@ -967,6 +969,8 @@ static int parse_options (char *options, struct super_block *sb, substring_t args[MAX_OPT_ARGS]; int data_opt = 0; int option; + kuid_t uid; + kgid_t gid; #ifdef CONFIG_QUOTA int qfmt; #endif @@ -1000,12 +1004,23 @@ static int parse_options (char *options, struct super_block *sb, case Opt_resuid: if (match_int(&args[0], &option)) return 0; - sbi->s_resuid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) { + ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option); + return -1; + + } + sbi->s_resuid = uid; break; case Opt_resgid: if (match_int(&args[0], &option)) return 0; - sbi->s_resgid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) { + ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option); + return -1; + } + sbi->s_resgid = gid; break; case Opt_sb: /* handled by get_sb_block() instead of here */ @@ -1651,8 +1666,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) } sb->s_fs_info = sbi; sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; + sbi->s_resuid = make_kuid(&init_user_ns, EXT3_DEF_RESUID); + sbi->s_resgid = make_kgid(&init_user_ns, EXT3_DEF_RESGID); sbi->s_sb_block = sb_block; blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); @@ -1716,8 +1731,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) else set_opt(sbi->s_mount_opt, ERRORS_RO); - sbi->s_resuid = le16_to_cpu(es->s_def_resuid); - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); /* enable barriers by default */ set_opt(sbi->s_mount_opt, BARRIER); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 4bbd07a..c45c411 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -461,8 +461,8 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi, return 1; /* Hm, nope. Are (enough) root reserved clusters available? */ - if (sbi->s_resuid == current_fsuid() || - ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || + if (uid_eq(sbi->s_resuid, current_fsuid()) || + (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) || capable(CAP_SYS_RESOURCE) || (flags & EXT4_MB_USE_ROOT_BLOCKS)) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0e01e90..c21b1de5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1153,8 +1153,8 @@ struct ext4_sb_info { unsigned int s_mount_flags; unsigned int s_def_mount_opt; ext4_fsblk_t s_sb_block; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; unsigned short s_mount_state; unsigned short s_pad; int s_addr_per_block_bits; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 409c2ee..9f9acac 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -808,8 +808,8 @@ got: } if (owner) { inode->i_mode = mode; - inode->i_uid = owner[0]; - inode->i_gid = owner[1]; + i_uid_write(inode, owner[0]); + i_gid_write(inode, owner[1]); } else if (test_opt(sb, GRPID)) { inode->i_mode = mode; inode->i_uid = current_fsuid(); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c77b0bd..07eaf56 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3630,6 +3630,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) journal_t *journal = EXT4_SB(sb)->s_journal; long ret; int block; + uid_t i_uid; + gid_t i_gid; inode = iget_locked(sb, ino); if (!inode) @@ -3645,12 +3647,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt(inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + i_uid_write(inode, i_uid); + i_gid_write(inode, i_gid); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ @@ -3870,6 +3874,8 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + uid_t i_uid; + gid_t i_gid; /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ @@ -3878,27 +3884,27 @@ static int ext4_do_update_inode(handle_t *handle, ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); + i_uid = i_uid_read(inode); + i_gid = i_gid_read(inode); if (!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if (!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(i_uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(i_gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { - raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); - raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } @@ -4084,8 +4090,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (is_quota_modification(inode, attr)) dquot_initialize(inode); - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index f39f80f..f1bb32e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -466,8 +466,8 @@ int ext4_ext_migrate(struct inode *inode) } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; - owner[0] = inode->i_uid; - owner[1] = inode->i_gid; + owner[0] = i_uid_read(inode); + owner[1] = i_gid_read(inode); tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, S_IFREG, NULL, goal, owner); if (IS_ERR(tmp_inode)) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e1fb1d5..436b422 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1448,6 +1448,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, { struct ext4_sb_info *sbi = EXT4_SB(sb); const struct mount_opts *m; + kuid_t uid; + kgid_t gid; int arg = 0; #ifdef CONFIG_QUOTA @@ -1474,10 +1476,20 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "Ignoring removed %s option", opt); return 1; case Opt_resuid: - sbi->s_resuid = arg; + uid = make_kuid(current_user_ns(), arg); + if (!uid_valid(uid)) { + ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); + return -1; + } + sbi->s_resuid = uid; return 1; case Opt_resgid: - sbi->s_resgid = arg; + gid = make_kgid(current_user_ns(), arg); + if (!gid_valid(gid)) { + ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); + return -1; + } + sbi->s_resgid = gid; return 1; case Opt_abort: sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; @@ -1732,12 +1744,14 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("%s", token2str(m->token)); } - if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || + if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) - SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); - if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || + SEQ_OPTS_PRINT("resuid=%u", + from_kuid_munged(&init_user_ns, sbi->s_resuid)); + if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) - SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); + SEQ_OPTS_PRINT("resgid=%u", + from_kgid_munged(&init_user_ns, sbi->s_resgid)); def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) SEQ_OPTS_PUTS("errors=remount-ro"); @@ -2980,8 +2994,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sb->s_fs_info = sbi; sbi->s_mount_opt = 0; - sbi->s_resuid = EXT4_DEF_RESUID; - sbi->s_resgid = EXT4_DEF_RESGID; + sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); + sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; if (sb->s_bdev->bd_part) @@ -3060,8 +3074,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (def_mount_opts & EXT4_DEFM_DISCARD) set_opt(sb, DISCARD); - sbi->s_resuid = le16_to_cpu(es->s_def_resuid); - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; @@ -4213,8 +4227,8 @@ static int ext4_unfreeze(struct super_block *sb) struct ext4_mount_options { unsigned long s_mount_opt; unsigned long s_mount_opt2; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; unsigned long s_commit_interval; u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA @@ -532,9 +532,9 @@ static inline int sigio_perm(struct task_struct *p, rcu_read_lock(); cred = __task_cred(p); - ret = ((fown->euid == 0 || - fown->euid == cred->suid || fown->euid == cred->uid || - fown->uid == cred->suid || fown->uid == cred->uid) && + ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || + uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || + uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); rcu_read_unlock(); return ret; @@ -135,8 +135,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fop = &empty_fops; inode->__i_nlink = 1; inode->i_opflags = 0; - inode->i_uid = 0; - inode->i_gid = 0; + i_uid_write(inode, 0); + i_gid_write(inode, 0); atomic_set(&inode->i_writecount, 0); inode->i_size = 0; inode->i_blocks = 0; @@ -1732,11 +1732,9 @@ EXPORT_SYMBOL(inode_init_owner); */ bool inode_owner_or_capable(const struct inode *inode) { - struct user_namespace *ns = inode_userns(inode); - - if (current_user_ns() == ns && current_fsuid() == inode->i_uid) + if (uid_eq(current_fsuid(), inode->i_uid)) return true; - if (ns_capable(ns, CAP_FOWNER)) + if (inode_capable(inode, CAP_FOWNER)) return true; return false; } diff --git a/fs/ioprio.c b/fs/ioprio.c index 0f1b951..5e6dbe89 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -37,8 +37,8 @@ int set_task_ioprio(struct task_struct *task, int ioprio) rcu_read_lock(); tcred = __task_cred(task); - if (tcred->uid != cred->euid && - tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) { + if (!uid_eq(tcred->uid, cred->euid) && + !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); return -EPERM; } @@ -65,6 +65,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) struct task_struct *p, *g; struct user_struct *user; struct pid *pgrp; + kuid_t uid; int ret; switch (class) { @@ -110,16 +111,19 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case IOPRIO_WHO_USER: + uid = make_kuid(current_user_ns(), who); + if (!uid_valid(uid)) + break; if (!who) user = current_user(); else - user = find_user(who); + user = find_user(uid); if (!user) break; do_each_thread(g, p) { - if (__task_cred(p)->uid != who) + if (!uid_eq(task_uid(p), uid)) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -174,6 +178,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) struct task_struct *g, *p; struct user_struct *user; struct pid *pgrp; + kuid_t uid; int ret = -ESRCH; int tmpio; @@ -203,16 +208,17 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case IOPRIO_WHO_USER: + uid = make_kuid(current_user_ns(), who); if (!who) user = current_user(); else - user = find_user(who); + user = find_user(uid); if (!user) break; do_each_thread(g, p) { - if (__task_cred(p)->uid != user->uid) + if (!uid_eq(task_uid(p), user->uid)) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -1446,7 +1446,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) struct inode *inode = dentry->d_inode; int error; - if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) + if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -218,10 +218,7 @@ static int acl_permission_check(struct inode *inode, int mask) { unsigned int mode = inode->i_mode; - if (current_user_ns() != inode_userns(inode)) - goto other_perms; - - if (likely(current_fsuid() == inode->i_uid)) + if (likely(uid_eq(current_fsuid(), inode->i_uid))) mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { @@ -234,7 +231,6 @@ static int acl_permission_check(struct inode *inode, int mask) mode >>= 3; } -other_perms: /* * If the DACs are ok we don't need any capability check. */ @@ -270,10 +266,10 @@ int generic_permission(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ - if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) + if (inode_capable(inode, CAP_DAC_OVERRIDE)) return 0; if (!(mask & MAY_WRITE)) - if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) + if (inode_capable(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; } @@ -283,7 +279,7 @@ int generic_permission(struct inode *inode, int mask) * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) - if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) + if (inode_capable(inode, CAP_DAC_OVERRIDE)) return 0; /* @@ -291,7 +287,7 @@ int generic_permission(struct inode *inode, int mask) */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) - if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) + if (inode_capable(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; @@ -1934,19 +1930,15 @@ static int user_path_parent(int dfd, const char __user *path, */ static inline int check_sticky(struct inode *dir, struct inode *inode) { - uid_t fsuid = current_fsuid(); + kuid_t fsuid = current_fsuid(); if (!(dir->i_mode & S_ISVTX)) return 0; - if (current_user_ns() != inode_userns(inode)) - goto other_userns; - if (inode->i_uid == fsuid) + if (uid_eq(inode->i_uid, fsuid)) return 0; - if (dir->i_uid == fsuid) + if (uid_eq(dir->i_uid, fsuid)) return 0; - -other_userns: - return !ns_capable(inode_userns(inode), CAP_FOWNER); + return !inode_capable(inode, CAP_FOWNER); } /* @@ -2534,8 +2526,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && - !ns_capable(inode_userns(dir), CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 79717a4..204438c 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,6 +1,7 @@ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/sched.h> +#include <linux/user_namespace.h> #include "nfsd.h" #include "auth.h" @@ -56,8 +57,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) goto oom; for (i = 0; i < rqgi->ngroups; i++) { - if (!GROUP_AT(rqgi, i)) - GROUP_AT(gi, i) = exp->ex_anon_gid; + if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) + GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid); else GROUP_AT(gi, i) = GROUP_AT(rqgi, i); } @@ -316,7 +316,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (override_cred->uid) + kuid_t root_uid = make_kuid(override_cred->user_ns, 0); + if (!uid_eq(override_cred->uid, root_uid)) cap_clear(override_cred->cap_effective); else override_cred->cap_effective = @@ -505,15 +506,24 @@ static int chown_common(struct path *path, uid_t user, gid_t group) struct inode *inode = path->dentry->d_inode; int error; struct iattr newattrs; + kuid_t uid; + kgid_t gid; + + uid = make_kuid(current_user_ns(), user); + gid = make_kgid(current_user_ns(), group); newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { + if (!uid_valid(uid)) + return -EINVAL; newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = uid; } if (group != (gid_t) -1) { + if (!gid_valid(gid)) + return -EINVAL; newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = gid; } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= diff --git a/fs/proc/array.c b/fs/proc/array.c index f9bd395..dc4c5a7 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -81,6 +81,7 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/tracehook.h> +#include <linux/user_namespace.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -161,6 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { + struct user_namespace *user_ns = current_user_ns(); struct group_info *group_info; int g; struct fdtable *fdt = NULL; @@ -189,8 +191,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - cred->uid, cred->euid, cred->suid, cred->fsuid, - cred->gid, cred->egid, cred->sgid, cred->fsgid); + from_kuid_munged(user_ns, cred->uid), + from_kuid_munged(user_ns, cred->euid), + from_kuid_munged(user_ns, cred->suid), + from_kuid_munged(user_ns, cred->fsuid), + from_kgid_munged(user_ns, cred->gid), + from_kgid_munged(user_ns, cred->egid), + from_kgid_munged(user_ns, cred->sgid), + from_kgid_munged(user_ns, cred->fsgid)); task_lock(p); if (p->files) @@ -205,7 +213,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) - seq_printf(m, "%d ", GROUP_AT(group_info, g)); + seq_printf(m, "%d ", + from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); seq_putc(m, '\n'); diff --git a/fs/proc/base.c b/fs/proc/base.c index 57b8159..d2d3108 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -81,6 +81,7 @@ #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/flex_array.h> @@ -1561,8 +1562,8 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); rcu_read_lock(); - stat->uid = 0; - stat->gid = 0; + stat->uid = GLOBAL_ROOT_UID; + stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(pid, task, 2)) { @@ -1622,8 +1623,8 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); @@ -1815,8 +1816,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } i_mode = S_IFLNK; @@ -2045,8 +2046,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } security_task_to_inode(task, inode); status = 1; @@ -2924,6 +2925,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) } #endif /* CONFIG_TASK_IO_ACCOUNTING */ +#ifdef CONFIG_USER_NS +static int proc_id_map_open(struct inode *inode, struct file *file, + struct seq_operations *seq_ops) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + struct seq_file *seq; + int ret = -EINVAL; + + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + ret = seq_open(file, seq_ops); + if (ret) + goto err_put_ns; + + seq = file->private_data; + seq->private = ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_id_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + put_user_ns(ns); + return seq_release(inode, file); +} + +static int proc_uid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_uid_seq_operations); +} + +static int proc_gid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_gid_seq_operations); +} + +static const struct file_operations proc_uid_map_operations = { + .open = proc_uid_map_open, + .write = proc_uid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; + +static const struct file_operations proc_gid_map_operations = { + .open = proc_gid_map_open, + .write = proc_gid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; +#endif /* CONFIG_USER_NS */ + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3026,6 +3095,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), #endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -3381,6 +3454,10 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), #endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 205c922..554ecc5 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -108,8 +108,8 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) struct super_block *sb = root->d_sb; struct pid_namespace *pid = sb->s_fs_info; - if (pid->pid_gid) - seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); + if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); if (pid->hide_pid != 0) seq_printf(seq, ",hidepid=%u", pid->hide_pid); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 21d836f..3476bca 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -371,9 +371,9 @@ void register_sysctl_root(struct ctl_table_root *root) static int test_perm(int mode, int op) { - if (!current_euid()) + if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) mode >>= 6; - else if (in_egroup_p(0)) + else if (in_egroup_p(GLOBAL_ROOT_GID)) mode >>= 3; if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; diff --git a/fs/proc/root.c b/fs/proc/root.c index eed44bf..7c30fce 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -67,7 +67,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) case Opt_gid: if (match_int(&args[0], &option)) return 0; - pid->pid_gid = option; + pid->pid_gid = make_kgid(current_user_ns(), option); break; case Opt_hidepid: if (match_int(&args[0], &option)) @@ -138,8 +138,8 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; - SET_UID(tmp.st_uid, stat->uid); - SET_GID(tmp.st_gid, stat->gid); + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) @@ -224,8 +224,8 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; - SET_UID(tmp.st_uid, stat->uid); - SET_GID(tmp.st_gid, stat->gid); + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; @@ -355,8 +355,8 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) #endif tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index feb2d69..907c2b3 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -62,8 +62,8 @@ static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) /* assign default attributes */ iattrs->ia_mode = sd->s_mode; - iattrs->ia_uid = 0; - iattrs->ia_gid = 0; + iattrs->ia_uid = GLOBAL_ROOT_UID; + iattrs->ia_gid = GLOBAL_ROOT_GID; iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; return attrs; diff --git a/include/linux/capability.h b/include/linux/capability.h index c398cff..68d56ef 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -377,6 +377,7 @@ struct cpu_vfs_cap_data { #ifdef __KERNEL__ +struct inode; struct dentry; struct user_namespace; @@ -551,6 +552,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool nsown_capable(int cap); +extern bool inode_capable(const struct inode *inode, int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cred.h b/include/linux/cred.h index adadf71..917dc5a 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -17,6 +17,7 @@ #include <linux/key.h> #include <linux/selinux.h> #include <linux/atomic.h> +#include <linux/uidgid.h> struct user_struct; struct cred; @@ -26,14 +27,14 @@ struct inode; * COW Supplementary groups list */ #define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(kgid_t))) struct group_info { atomic_t usage; int ngroups; int nblocks; - gid_t small_block[NGROUPS_SMALL]; - gid_t *blocks[0]; + kgid_t small_block[NGROUPS_SMALL]; + kgid_t *blocks[0]; }; /** @@ -66,14 +67,14 @@ extern struct group_info init_groups; extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); -extern int groups_search(const struct group_info *, gid_t); +extern int groups_search(const struct group_info *, kgid_t); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); +extern int in_group_p(kgid_t); +extern int in_egroup_p(kgid_t); /* * The common credentials for a thread group @@ -122,14 +123,14 @@ struct cred { #define CRED_MAGIC 0x43736564 #define CRED_MAGIC_DEAD 0x44656144 #endif - uid_t uid; /* real UID of the task */ - gid_t gid; /* real GID of the task */ - uid_t suid; /* saved UID of the task */ - gid_t sgid; /* saved GID of the task */ - uid_t euid; /* effective UID of the task */ - gid_t egid; /* effective GID of the task */ - uid_t fsuid; /* UID for VFS ops */ - gid_t fsgid; /* GID for VFS ops */ + kuid_t uid; /* real UID of the task */ + kgid_t gid; /* real GID of the task */ + kuid_t suid; /* saved UID of the task */ + kgid_t sgid; /* saved GID of the task */ + kuid_t euid; /* effective UID of the task */ + kgid_t egid; /* effective GID of the task */ + kuid_t fsuid; /* UID for VFS ops */ + kgid_t fsgid; /* GID for VFS ops */ unsigned securebits; /* SUID-less security management */ kernel_cap_t cap_inheritable; /* caps our children can inherit */ kernel_cap_t cap_permitted; /* caps we're permitted */ @@ -146,7 +147,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ - struct user_namespace *user_ns; /* cached user->user_ns */ + struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -357,11 +358,11 @@ static inline void put_cred(const struct cred *_cred) #define current_user() (current_cred_xxx(user)) #define current_security() (current_cred_xxx(security)) +extern struct user_namespace init_user_ns; #ifdef CONFIG_USER_NS #define current_user_ns() (current_cred_xxx(user_ns)) #define task_user_ns(task) (task_cred_xxx((task), user_ns)) #else -extern struct user_namespace init_user_ns; #define current_user_ns() (&init_user_ns) #define task_user_ns(task) (&init_user_ns) #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 25c40b9..c0e5337 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -402,6 +402,7 @@ struct inodes_stat_t { #include <linux/atomic.h> #include <linux/shrinker.h> #include <linux/migrate_mode.h> +#include <linux/uidgid.h> #include <asm/byteorder.h> @@ -469,8 +470,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, struct iattr { unsigned int ia_valid; umode_t ia_mode; - uid_t ia_uid; - gid_t ia_gid; + kuid_t ia_uid; + kgid_t ia_gid; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -761,8 +762,8 @@ struct posix_acl; struct inode { umode_t i_mode; unsigned short i_opflags; - uid_t i_uid; - gid_t i_gid; + kuid_t i_uid; + kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL @@ -927,6 +928,31 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } +/* Helper functions so that in most cases filesystems will + * not need to deal directly with kuid_t and kgid_t and can + * instead deal with the raw numeric values that are stored + * in the filesystem. + */ +static inline uid_t i_uid_read(const struct inode *inode) +{ + return from_kuid(&init_user_ns, inode->i_uid); +} + +static inline gid_t i_gid_read(const struct inode *inode) +{ + return from_kgid(&init_user_ns, inode->i_gid); +} + +static inline void i_uid_write(struct inode *inode, uid_t uid) +{ + inode->i_uid = make_kuid(&init_user_ns, uid); +} + +static inline void i_gid_write(struct inode *inode, gid_t gid) +{ + inode->i_gid = make_kgid(&init_user_ns, gid); +} + static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); @@ -943,7 +969,7 @@ struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ - uid_t uid, euid; /* uid/euid of process setting the owner */ + kuid_t uid, euid; /* uid/euid of process setting the owner */ int signum; /* posix.1b rt signal to be delivered on IO */ }; @@ -1527,12 +1553,6 @@ enum { #define vfs_check_frozen(sb, level) \ wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) -/* - * until VFS tracks user namespaces for inodes, just make all files - * belong to init_user_ns - */ -extern struct user_namespace init_user_ns; -#define inode_userns(inode) (&init_user_ns) extern bool inode_owner_or_capable(const struct inode *inode); /* not quite ready to be deprecated, but... */ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b067bd8..00474b0 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,7 +31,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif - gid_t pid_gid; + kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ }; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 85c5073..3fd2e87 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -52,8 +52,8 @@ struct proc_dir_entry { unsigned int low_ino; umode_t mode; nlink_t nlink; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; loff_t size; const struct inode_operations *proc_iops; /* diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d93f95e..17b9773 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -22,8 +22,8 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) { return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) || - (ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || - (ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid); + (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) || + (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); } #if defined(CONFIG_QUOTA) diff --git a/include/linux/sched.h b/include/linux/sched.h index 28fa9d0..5ea8bae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -90,6 +90,7 @@ struct sched_param { #include <linux/latencytop.h> #include <linux/cred.h> #include <linux/llist.h> +#include <linux/uidgid.h> #include <asm/processor.h> @@ -728,8 +729,7 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; - uid_t uid; - struct user_namespace *user_ns; + kuid_t uid; #ifdef CONFIG_PERF_EVENTS atomic_long_t locked_vm; @@ -738,7 +738,7 @@ struct user_struct { extern int uids_sysfs_init(void); -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(kuid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -2142,14 +2142,13 @@ extern struct task_struct *find_task_by_pid_ns(pid_t nr, extern void __set_special_pids(struct pid *pid); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(struct user_namespace *, uid_t); +extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); return u; } extern void free_uid(struct user_struct *); -extern void release_uids(struct user_namespace *ns); #include <asm/current.h> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 79ab255..bef2cf0 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -28,8 +28,8 @@ struct shmem_sb_info { unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ - uid_t uid; /* Mount uid for root directory */ - gid_t gid; /* Mount gid for root directory */ + kuid_t uid; /* Mount uid for root directory */ + kgid_t gid; /* Mount gid for root directory */ umode_t mode; /* Mount mode for root directory */ struct mempolicy *mpol; /* default memory policy for mappings */ }; diff --git a/include/linux/stat.h b/include/linux/stat.h index 611c398..4613240 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -58,14 +58,15 @@ #include <linux/types.h> #include <linux/time.h> +#include <linux/uidgid.h> struct kstat { u64 ino; dev_t dev; umode_t mode; unsigned int nlink; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; dev_t rdev; loff_t size; struct timespec atime; diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h new file mode 100644 index 0000000..8e522cbc --- /dev/null +++ b/include/linux/uidgid.h @@ -0,0 +1,200 @@ +#ifndef _LINUX_UIDGID_H +#define _LINUX_UIDGID_H + +/* + * A set of types for the internal kernel types representing uids and gids. + * + * The types defined in this header allow distinguishing which uids and gids in + * the kernel are values used by userspace and which uid and gid values are + * the internal kernel values. With the addition of user namespaces the values + * can be different. Using the type system makes it possible for the compiler + * to detect when we overlook these differences. + * + */ +#include <linux/types.h> +#include <linux/highuid.h> + +struct user_namespace; +extern struct user_namespace init_user_ns; + +#ifdef CONFIG_UIDGID_STRICT_TYPE_CHECKS + +typedef struct { + uid_t val; +} kuid_t; + + +typedef struct { + gid_t val; +} kgid_t; + +#define KUIDT_INIT(value) (kuid_t){ value } +#define KGIDT_INIT(value) (kgid_t){ value } + +static inline uid_t __kuid_val(kuid_t uid) +{ + return uid.val; +} + +static inline gid_t __kgid_val(kgid_t gid) +{ + return gid.val; +} + +#else + +typedef uid_t kuid_t; +typedef gid_t kgid_t; + +static inline uid_t __kuid_val(kuid_t uid) +{ + return uid; +} + +static inline gid_t __kgid_val(kgid_t gid) +{ + return gid; +} + +#define KUIDT_INIT(value) ((kuid_t) value ) +#define KGIDT_INIT(value) ((kgid_t) value ) + +#endif + +#define GLOBAL_ROOT_UID KUIDT_INIT(0) +#define GLOBAL_ROOT_GID KGIDT_INIT(0) + +#define INVALID_UID KUIDT_INIT(-1) +#define INVALID_GID KGIDT_INIT(-1) + +static inline bool uid_eq(kuid_t left, kuid_t right) +{ + return __kuid_val(left) == __kuid_val(right); +} + +static inline bool gid_eq(kgid_t left, kgid_t right) +{ + return __kgid_val(left) == __kgid_val(right); +} + +static inline bool uid_gt(kuid_t left, kuid_t right) +{ + return __kuid_val(left) > __kuid_val(right); +} + +static inline bool gid_gt(kgid_t left, kgid_t right) +{ + return __kgid_val(left) > __kgid_val(right); +} + +static inline bool uid_gte(kuid_t left, kuid_t right) +{ + return __kuid_val(left) >= __kuid_val(right); +} + +static inline bool gid_gte(kgid_t left, kgid_t right) +{ + return __kgid_val(left) >= __kgid_val(right); +} + +static inline bool uid_lt(kuid_t left, kuid_t right) +{ + return __kuid_val(left) < __kuid_val(right); +} + +static inline bool gid_lt(kgid_t left, kgid_t right) +{ + return __kgid_val(left) < __kgid_val(right); +} + +static inline bool uid_lte(kuid_t left, kuid_t right) +{ + return __kuid_val(left) <= __kuid_val(right); +} + +static inline bool gid_lte(kgid_t left, kgid_t right) +{ + return __kgid_val(left) <= __kgid_val(right); +} + +static inline bool uid_valid(kuid_t uid) +{ + return !uid_eq(uid, INVALID_UID); +} + +static inline bool gid_valid(kgid_t gid) +{ + return !gid_eq(gid, INVALID_GID); +} + +#ifdef CONFIG_USER_NS + +extern kuid_t make_kuid(struct user_namespace *from, uid_t uid); +extern kgid_t make_kgid(struct user_namespace *from, gid_t gid); + +extern uid_t from_kuid(struct user_namespace *to, kuid_t uid); +extern gid_t from_kgid(struct user_namespace *to, kgid_t gid); +extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid); +extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid); + +static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid) +{ + return from_kuid(ns, uid) != (uid_t) -1; +} + +static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) +{ + return from_kgid(ns, gid) != (gid_t) -1; +} + +#else + +static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid) +{ + return KUIDT_INIT(uid); +} + +static inline kgid_t make_kgid(struct user_namespace *from, gid_t gid) +{ + return KGIDT_INIT(gid); +} + +static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid) +{ + return __kuid_val(kuid); +} + +static inline gid_t from_kgid(struct user_namespace *to, kgid_t kgid) +{ + return __kgid_val(kgid); +} + +static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid) +{ + uid_t uid = from_kuid(to, kuid); + if (uid == (uid_t)-1) + uid = overflowuid; + return uid; +} + +static inline gid_t from_kgid_munged(struct user_namespace *to, kgid_t kgid) +{ + gid_t gid = from_kgid(to, kgid); + if (gid == (gid_t)-1) + gid = overflowgid; + return gid; +} + +static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid) +{ + return true; +} + +static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) +{ + return true; +} + +#endif /* CONFIG_USER_NS */ + +#endif /* _LINUX_UIDGID_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index faf4679..4e72922 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -6,14 +6,24 @@ #include <linux/sched.h> #include <linux/err.h> -#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 7) -#define UIDHASH_SZ (1 << UIDHASH_BITS) +#define UID_GID_MAP_MAX_EXTENTS 5 + +struct uid_gid_map { /* 64 bytes -- 1 cache line */ + u32 nr_extents; + struct uid_gid_extent { + u32 first; + u32 lower_first; + u32 count; + } extent[UID_GID_MAP_MAX_EXTENTS]; +}; struct user_namespace { + struct uid_gid_map uid_map; + struct uid_gid_map gid_map; struct kref kref; - struct hlist_head uidhash_table[UIDHASH_SZ]; - struct user_struct *creator; - struct work_struct destroyer; + struct user_namespace *parent; + kuid_t owner; + kgid_t group; }; extern struct user_namespace init_user_ns; @@ -36,9 +46,11 @@ static inline void put_user_ns(struct user_namespace *ns) kref_put(&ns->kref, free_user_ns); } -uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid); -gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid); - +struct seq_operations; +extern struct seq_operations proc_uid_seq_operations; +extern struct seq_operations proc_gid_seq_operations; +extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -55,17 +67,6 @@ static inline void put_user_ns(struct user_namespace *ns) { } -static inline uid_t user_ns_map_uid(struct user_namespace *to, - const struct cred *cred, uid_t uid) -{ - return uid; -} -static inline gid_t user_ns_map_gid(struct user_namespace *to, - const struct cred *cred, gid_t gid) -{ - return gid; -} - #endif #endif /* _LINUX_USER_H */ diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h index 7b53c05..15d11a3 100644 --- a/include/trace/events/ext3.h +++ b/include/trace/events/ext3.h @@ -24,8 +24,8 @@ TRACE_EVENT(ext3_free_inode, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; - __entry->uid = inode->i_uid; - __entry->gid = inode->i_gid; + __entry->uid = i_uid_read(inode); + __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; ), diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 319538b..69d8a69 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -36,8 +36,8 @@ TRACE_EVENT(ext4_free_inode, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; - __entry->uid = inode->i_uid; - __entry->gid = inode->i_gid; + __entry->uid = i_uid_read(inode); + __entry->gid = i_gid_read(inode); __entry->blocks = inode->i_blocks; ), diff --git a/init/Kconfig b/init/Kconfig index a30fe08..ccb5248 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -873,7 +873,10 @@ config IPC_NS config USER_NS bool "User namespace (EXPERIMENTAL)" depends on EXPERIMENTAL - default y + depends on UIDGID_CONVERTED + select UIDGID_STRICT_TYPE_CHECKS + + default n help This allows containers, i.e. vservers, to use user namespaces to provide different user info for different servers. @@ -897,6 +900,131 @@ config NET_NS endif # NAMESPACES +config UIDGID_CONVERTED + # True if all of the selected software conmponents are known + # to have uid_t and gid_t converted to kuid_t and kgid_t + # where appropriate and are otherwise safe to use with + # the user namespace. + bool + default y + + # List of kernel pieces that need user namespace work + # Features + depends on SYSVIPC = n + depends on IMA = n + depends on EVM = n + depends on KEYS = n + depends on AUDIT = n + depends on AUDITSYSCALL = n + depends on TASKSTATS = n + depends on TRACING = n + depends on FS_POSIX_ACL = n + depends on QUOTA = n + depends on QUOTACTL = n + depends on DEBUG_CREDENTIALS = n + depends on BSD_PROCESS_ACCT = n + depends on DRM = n + depends on PROC_EVENTS = n + + # Networking + depends on NET = n + depends on NET_9P = n + depends on IPX = n + depends on PHONET = n + depends on NET_CLS_FLOW = n + depends on NETFILTER_XT_MATCH_OWNER = n + depends on NETFILTER_XT_MATCH_RECENT = n + depends on NETFILTER_XT_TARGET_LOG = n + depends on NETFILTER_NETLINK_LOG = n + depends on INET = n + depends on IPV6 = n + depends on IP_SCTP = n + depends on AF_RXRPC = n + depends on LLC2 = n + depends on NET_KEY = n + depends on INET_DIAG = n + depends on DNS_RESOLVER = n + depends on AX25 = n + depends on ATALK = n + + # Filesystems + depends on USB_DEVICEFS = n + depends on USB_GADGETFS = n + depends on USB_FUNCTIONFS = n + depends on DEVTMPFS = n + depends on XENFS = n + + depends on 9P_FS = n + depends on ADFS_FS = n + depends on AFFS_FS = n + depends on AFS_FS = n + depends on AUTOFS4_FS = n + depends on BEFS_FS = n + depends on BFS_FS = n + depends on BTRFS_FS = n + depends on CEPH_FS = n + depends on CIFS = n + depends on CODA_FS = n + depends on CONFIGFS_FS = n + depends on CRAMFS = n + depends on DEBUG_FS = n + depends on ECRYPT_FS = n + depends on EFS_FS = n + depends on EXOFS_FS = n + depends on FAT_FS = n + depends on FUSE_FS = n + depends on GFS2_FS = n + depends on HFS_FS = n + depends on HFSPLUS_FS = n + depends on HPFS_FS = n + depends on HUGETLBFS = n + depends on ISO9660_FS = n + depends on JFFS2_FS = n + depends on JFS_FS = n + depends on LOGFS = n + depends on MINIX_FS = n + depends on NCP_FS = n + depends on NFSD = n + depends on NFS_FS = n + depends on NILFS2_FS = n + depends on NTFS_FS = n + depends on OCFS2_FS = n + depends on OMFS_FS = n + depends on QNX4FS_FS = n + depends on QNX6FS_FS = n + depends on REISERFS_FS = n + depends on SQUASHFS = n + depends on SYSV_FS = n + depends on UBIFS_FS = n + depends on UDF_FS = n + depends on UFS_FS = n + depends on VXFS_FS = n + depends on XFS_FS = n + + depends on !UML || HOSTFS = n + + # The rare drivers that won't build + depends on AIRO = n + depends on AIRO_CS = n + depends on TUN = n + depends on INFINIBAND_QIB = n + depends on BLK_DEV_LOOP = n + depends on ANDROID_BINDER_IPC = n + + # Security modules + depends on SECURITY_TOMOYO = n + depends on SECURITY_APPARMOR = n + +config UIDGID_STRICT_TYPE_CHECKS + bool "Require conversions between uid/gids and their internal representation" + depends on UIDGID_CONVERTED + default n + help + While the nececessary conversions are being added to all subsystems this option allows + the code to continue to build for unconverted subsystems. + + Say Y here if you want the strict type checking enabled + config SCHED_AUTOGROUP bool "Automatic process group scheduling" select EVENTFD diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 28bd64d..b6a0d46 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -66,6 +66,7 @@ struct mqueue_inode_info { struct sigevent notify; struct pid* notify_owner; + struct user_namespace *notify_user_ns; struct user_struct *user; /* user who created, for accounting */ struct sock *notify_sock; struct sk_buff *notify_cookie; @@ -139,6 +140,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, INIT_LIST_HEAD(&info->e_wait_q[0].list); INIT_LIST_HEAD(&info->e_wait_q[1].list); info->notify_owner = NULL; + info->notify_user_ns = NULL; info->qsize = 0; info->user = NULL; /* set when all is ok */ memset(&info->attr, 0, sizeof(info->attr)); @@ -536,8 +538,7 @@ static void __do_notify(struct mqueue_inode_info *info) rcu_read_lock(); sig_i.si_pid = task_tgid_nr_ns(current, ns_of_pid(info->notify_owner)); - sig_i.si_uid = user_ns_map_uid(info->user->user_ns, - current_cred(), current_uid()); + sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid()); rcu_read_unlock(); kill_pid_info(info->notify.sigev_signo, @@ -550,7 +551,9 @@ static void __do_notify(struct mqueue_inode_info *info) } /* after notification unregisters process */ put_pid(info->notify_owner); + put_user_ns(info->notify_user_ns); info->notify_owner = NULL; + info->notify_user_ns = NULL; } wake_up(&info->wait_q); } @@ -575,7 +578,9 @@ static void remove_notification(struct mqueue_inode_info *info) netlink_sendskb(info->notify_sock, info->notify_cookie); } put_pid(info->notify_owner); + put_user_ns(info->notify_user_ns); info->notify_owner = NULL; + info->notify_user_ns = NULL; } static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) @@ -1140,6 +1145,7 @@ retry: } info->notify_owner = get_pid(task_tgid(current)); + info->notify_user_ns = get_user_ns(current_user_ns()); inode->i_atime = inode->i_ctime = CURRENT_TIME; } spin_unlock(&info->lock); diff --git a/ipc/namespace.c b/ipc/namespace.c index ce0a647..f362298c 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -46,7 +46,7 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, ipcns_notify(IPCNS_CREATED); register_ipcns_notifier(ns); - ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); + ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); return ns; } diff --git a/kernel/capability.c b/kernel/capability.c index 3f1adb6..493d972 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -419,3 +419,24 @@ bool nsown_capable(int cap) { return ns_capable(current_user_ns(), cap); } + +/** + * inode_capable - Check superior capability over inode + * @inode: The inode in question + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at it's own user namespace and that the given inode is owned + * by the current user namespace or a child namespace. + * + * Currently we check to see if an inode is owned by the current + * user namespace by seeing if the inode's owner maps into the + * current user namespace. + * + */ +bool inode_capable(const struct inode *inode, int cap) +{ + struct user_namespace *ns = current_user_ns(); + + return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid); +} diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ad8eae5..a0c6af3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2214,9 +2214,9 @@ retry_find_task: * only need to check permissions on one of them. */ tcred = __task_cred(tsk); - if (cred->euid && - cred->euid != tcred->uid && - cred->euid != tcred->suid) { + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) { rcu_read_unlock(); ret = -EACCES; goto out_unlock_cgroup; diff --git a/kernel/cred.c b/kernel/cred.c index e70683d..430557e 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -49,6 +49,14 @@ struct cred init_cred = { .subscribers = ATOMIC_INIT(2), .magic = CRED_MAGIC, #endif + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, .securebits = SECUREBITS_DEFAULT, .cap_inheritable = CAP_EMPTY_SET, .cap_permitted = CAP_FULL_SET, @@ -148,6 +156,7 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); + put_user_ns(cred->user_ns); kmem_cache_free(cred_jar, cred); } @@ -303,6 +312,7 @@ struct cred *prepare_creds(void) set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); + get_user_ns(new->user_ns); #ifdef CONFIG_KEYS key_get(new->thread_keyring); @@ -414,11 +424,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } - /* cache user_ns in cred. Doesn't need a refcount because it will - * stay pinned by cred->user - */ - new->user_ns = new->user->user_ns; - #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -493,10 +498,10 @@ int commit_creds(struct cred *new) get_cred(new); /* we will require a ref for the subj creds too */ /* dumpability changes */ - if (old->euid != new->euid || - old->egid != new->egid || - old->fsuid != new->fsuid || - old->fsgid != new->fsgid || + if (!uid_eq(old->euid, new->euid) || + !gid_eq(old->egid, new->egid) || + !uid_eq(old->fsuid, new->fsuid) || + !gid_eq(old->fsgid, new->fsgid) || !cap_issubset(new->cap_permitted, old->cap_permitted)) { if (task->mm) set_dumpable(task->mm, suid_dumpable); @@ -505,9 +510,9 @@ int commit_creds(struct cred *new) } /* alter the thread keyring */ - if (new->fsuid != old->fsuid) + if (!uid_eq(new->fsuid, old->fsuid)) key_fsuid_changed(task); - if (new->fsgid != old->fsgid) + if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(task); /* do it @@ -524,16 +529,16 @@ int commit_creds(struct cred *new) alter_cred_subscribers(old, -2); /* send notifications */ - if (new->uid != old->uid || - new->euid != old->euid || - new->suid != old->suid || - new->fsuid != old->fsuid) + if (!uid_eq(new->uid, old->uid) || + !uid_eq(new->euid, old->euid) || + !uid_eq(new->suid, old->suid) || + !uid_eq(new->fsuid, old->fsuid)) proc_id_connector(task, PROC_EVENT_UID); - if (new->gid != old->gid || - new->egid != old->egid || - new->sgid != old->sgid || - new->fsgid != old->fsgid) + if (!gid_eq(new->gid, old->gid) || + !gid_eq(new->egid, old->egid) || + !gid_eq(new->sgid, old->sgid) || + !gid_eq(new->fsgid, old->fsgid)) proc_id_connector(task, PROC_EVENT_GID); /* release the old obj and subj refs both */ @@ -678,6 +683,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); + get_user_ns(new->user_ns); get_group_info(new->group_info); #ifdef CONFIG_KEYS diff --git a/kernel/exit.c b/kernel/exit.c index d8bd3b42..910a071 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) unsigned long state; int retval, status, traced; pid_t pid = task_pid_vnr(p); - uid_t uid = __task_cred(p)->uid; + uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid); struct siginfo __user *infop; if (!likely(wo->wo_flags & WEXITED)) @@ -1427,7 +1427,7 @@ static int wait_task_stopped(struct wait_opts *wo, if (!unlikely(wo->wo_flags & WNOWAIT)) *p_code = 0; - uid = task_uid(p); + uid = from_kuid_munged(current_user_ns(), task_uid(p)); unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1500,7 +1500,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) } if (!unlikely(wo->wo_flags & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; - uid = task_uid(p); + uid = from_kuid_munged(current_user_ns(), task_uid(p)); spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); diff --git a/kernel/groups.c b/kernel/groups.c index 99b53d1..6b2588d 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -31,7 +31,7 @@ struct group_info *groups_alloc(int gidsetsize) group_info->blocks[0] = group_info->small_block; else { for (i = 0; i < nblocks; i++) { - gid_t *b; + kgid_t *b; b = (void *)__get_free_page(GFP_USER); if (!b) goto out_undo_partial_alloc; @@ -66,18 +66,15 @@ EXPORT_SYMBOL(groups_free); static int groups_to_user(gid_t __user *grouplist, const struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; unsigned int count = group_info->ngroups; - for (i = 0; i < group_info->nblocks; i++) { - unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); - unsigned int len = cp_count * sizeof(*grouplist); - - if (copy_to_user(grouplist, group_info->blocks[i], len)) + for (i = 0; i < count; i++) { + gid_t gid; + gid = from_kgid_munged(user_ns, GROUP_AT(group_info, i)); + if (put_user(gid, grouplist+i)) return -EFAULT; - - grouplist += NGROUPS_PER_BLOCK; - count -= cp_count; } return 0; } @@ -86,18 +83,21 @@ static int groups_to_user(gid_t __user *grouplist, static int groups_from_user(struct group_info *group_info, gid_t __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; unsigned int count = group_info->ngroups; - for (i = 0; i < group_info->nblocks; i++) { - unsigned int cp_count = min(NGROUPS_PER_BLOCK, count); - unsigned int len = cp_count * sizeof(*grouplist); - - if (copy_from_user(group_info->blocks[i], grouplist, len)) + for (i = 0; i < count; i++) { + gid_t gid; + kgid_t kgid; + if (get_user(gid, grouplist+i)) return -EFAULT; - grouplist += NGROUPS_PER_BLOCK; - count -= cp_count; + kgid = make_kgid(user_ns, gid); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; } @@ -117,9 +117,9 @@ static void groups_sort(struct group_info *group_info) for (base = 0; base < max; base++) { int left = base; int right = left + stride; - gid_t tmp = GROUP_AT(group_info, right); + kgid_t tmp = GROUP_AT(group_info, right); - while (left >= 0 && GROUP_AT(group_info, left) > tmp) { + while (left >= 0 && gid_gt(GROUP_AT(group_info, left), tmp)) { GROUP_AT(group_info, right) = GROUP_AT(group_info, left); right = left; @@ -132,7 +132,7 @@ static void groups_sort(struct group_info *group_info) } /* a simple bsearch */ -int groups_search(const struct group_info *group_info, gid_t grp) +int groups_search(const struct group_info *group_info, kgid_t grp) { unsigned int left, right; @@ -143,9 +143,9 @@ int groups_search(const struct group_info *group_info, gid_t grp) right = group_info->ngroups; while (left < right) { unsigned int mid = (left+right)/2; - if (grp > GROUP_AT(group_info, mid)) + if (gid_gt(grp, GROUP_AT(group_info, mid))) left = mid + 1; - else if (grp < GROUP_AT(group_info, mid)) + else if (gid_lt(grp, GROUP_AT(group_info, mid))) right = mid; else return 1; @@ -256,24 +256,24 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) /* * Check whether we're fsgid/egid or in the supplemental group.. */ -int in_group_p(gid_t grp) +int in_group_p(kgid_t grp) { const struct cred *cred = current_cred(); int retval = 1; - if (grp != cred->fsgid) + if (!gid_eq(grp, cred->fsgid)) retval = groups_search(cred->group_info, grp); return retval; } EXPORT_SYMBOL(in_group_p); -int in_egroup_p(gid_t grp) +int in_egroup_p(kgid_t grp) { const struct cred *cred = current_cred(); int retval = 1; - if (grp != cred->egid) + if (!gid_eq(grp, cred->egid)) retval = groups_search(cred->group_info, grp); return retval; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ee8d49b..a232bb5 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -198,15 +198,14 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) return 0; rcu_read_lock(); tcred = __task_cred(task); - if (cred->user->user_ns == tcred->user->user_ns && - (cred->uid == tcred->euid && - cred->uid == tcred->suid && - cred->uid == tcred->uid && - cred->gid == tcred->egid && - cred->gid == tcred->sgid && - cred->gid == tcred->gid)) + if (uid_eq(cred->uid, tcred->euid) && + uid_eq(cred->uid, tcred->suid) && + uid_eq(cred->uid, tcred->uid) && + gid_eq(cred->gid, tcred->egid) && + gid_eq(cred->gid, tcred->sgid) && + gid_eq(cred->gid, tcred->gid)) goto ok; - if (ptrace_has_cap(tcred->user->user_ns, mode)) + if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a5a9d39..39eb601 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4070,11 +4070,8 @@ static bool check_same_owner(struct task_struct *p) rcu_read_lock(); pcred = __task_cred(p); - if (cred->user->user_ns == pcred->user->user_ns) - match = (cred->euid == pcred->euid || - cred->euid == pcred->uid); - else - match = false; + match = (uid_eq(cred->euid, pcred->euid) || + uid_eq(cred->euid, pcred->uid)); rcu_read_unlock(); return match; } diff --git a/kernel/signal.c b/kernel/signal.c index 1a006b5..21ebe75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -767,14 +767,13 @@ static int kill_ok_by_cred(struct task_struct *t) const struct cred *cred = current_cred(); const struct cred *tcred = __task_cred(t); - if (cred->user->user_ns == tcred->user->user_ns && - (cred->euid == tcred->suid || - cred->euid == tcred->uid || - cred->uid == tcred->suid || - cred->uid == tcred->uid)) + if (uid_eq(cred->euid, tcred->suid) || + uid_eq(cred->euid, tcred->uid) || + uid_eq(cred->uid, tcred->suid) || + uid_eq(cred->uid, tcred->uid)) return 1; - if (ns_capable(tcred->user->user_ns, CAP_KILL)) + if (ns_capable(tcred->user_ns, CAP_KILL)) return 1; return 0; @@ -1020,15 +1019,6 @@ static inline int legacy_queue(struct sigpending *signals, int sig) return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } -/* - * map the uid in struct cred into user namespace *ns - */ -static inline uid_t map_cred_ns(const struct cred *cred, - struct user_namespace *ns) -{ - return user_ns_map_uid(ns, cred, cred->uid); -} - #ifdef CONFIG_USER_NS static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) { @@ -1038,8 +1028,10 @@ static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_str if (SI_FROMKERNEL(info)) return; - info->si_uid = user_ns_map_uid(task_cred_xxx(t, user_ns), - current_cred(), info->si_uid); + rcu_read_lock(); + info->si_uid = from_kuid_munged(task_cred_xxx(t, user_ns), + make_kuid(current_user_ns(), info->si_uid)); + rcu_read_unlock(); } #else static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) @@ -1106,7 +1098,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, q->info.si_code = SI_USER; q->info.si_pid = task_tgid_nr_ns(current, task_active_pid_ns(t)); - q->info.si_uid = current_uid(); + q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); break; case (unsigned long) SEND_SIG_PRIV: q->info.si_signo = sig; @@ -1387,10 +1379,8 @@ static int kill_as_cred_perm(const struct cred *cred, struct task_struct *target) { const struct cred *pcred = __task_cred(target); - if (cred->user_ns != pcred->user_ns) - return 0; - if (cred->euid != pcred->suid && cred->euid != pcred->uid && - cred->uid != pcred->suid && cred->uid != pcred->uid) + if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) && + !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid)) return 0; return 1; } @@ -1678,8 +1668,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); - info.si_uid = map_cred_ns(__task_cred(tsk), - task_cred_xxx(tsk->parent, user_ns)); + info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), + task_uid(tsk)); rcu_read_unlock(); info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); @@ -1762,8 +1752,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); - info.si_uid = map_cred_ns(__task_cred(tsk), - task_cred_xxx(parent, user_ns)); + info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); info.si_utime = cputime_to_clock_t(tsk->utime); @@ -1973,7 +1962,7 @@ static void ptrace_do_notify(int signr, int exit_code, int why) info.si_signo = signr; info.si_code = exit_code; info.si_pid = task_pid_vnr(current); - info.si_uid = current_uid(); + info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ ptrace_stop(exit_code, why, 1, &info); @@ -2181,8 +2170,8 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_code = SI_USER; rcu_read_lock(); info->si_pid = task_pid_vnr(current->parent); - info->si_uid = map_cred_ns(__task_cred(current->parent), - current_user_ns()); + info->si_uid = from_kuid_munged(current_user_ns(), + task_uid(current->parent)); rcu_read_unlock(); } @@ -2835,7 +2824,7 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) info.si_errno = 0; info.si_code = SI_USER; info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); return kill_something_info(sig, &info, pid); } @@ -2878,7 +2867,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) info.si_errno = 0; info.si_code = SI_TKILL; info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); return do_send_specific(tgid, pid, sig, &info); } diff --git a/kernel/sys.c b/kernel/sys.c index ba0ae8e..6df4262 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -93,10 +93,8 @@ int overflowuid = DEFAULT_OVERFLOWUID; int overflowgid = DEFAULT_OVERFLOWGID; -#ifdef CONFIG_UID16 EXPORT_SYMBOL(overflowuid); EXPORT_SYMBOL(overflowgid); -#endif /* * the same as above, but for filesystems which can only store a 16-bit @@ -133,11 +131,10 @@ static bool set_one_prio_perm(struct task_struct *p) { const struct cred *cred = current_cred(), *pcred = __task_cred(p); - if (pcred->user->user_ns == cred->user->user_ns && - (pcred->uid == cred->euid || - pcred->euid == cred->euid)) + if (uid_eq(pcred->uid, cred->euid) || + uid_eq(pcred->euid, cred->euid)) return true; - if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE)) + if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) return true; return false; } @@ -177,6 +174,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) const struct cred *cred = current_cred(); int error = -EINVAL; struct pid *pgrp; + kuid_t uid; if (which > PRIO_USER || which < PRIO_PROCESS) goto out; @@ -209,18 +207,19 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = (struct user_struct *) cred->user; + uid = make_kuid(cred->user_ns, who); + user = cred->user; if (!who) - who = cred->uid; - else if ((who != cred->uid) && - !(user = find_user(who))) + uid = cred->uid; + else if (!uid_eq(uid, cred->uid) && + !(user = find_user(uid))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) { - if (__task_cred(p)->uid == who) + if (uid_eq(task_uid(p), uid)) error = set_one_prio(p, niceval, error); } while_each_thread(g, p); - if (who != cred->uid) + if (!uid_eq(uid, cred->uid)) free_uid(user); /* For find_user() */ break; } @@ -244,6 +243,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) const struct cred *cred = current_cred(); long niceval, retval = -ESRCH; struct pid *pgrp; + kuid_t uid; if (which > PRIO_USER || which < PRIO_PROCESS) return -EINVAL; @@ -274,21 +274,22 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = (struct user_struct *) cred->user; + uid = make_kuid(cred->user_ns, who); + user = cred->user; if (!who) - who = cred->uid; - else if ((who != cred->uid) && - !(user = find_user(who))) + uid = cred->uid; + else if (!uid_eq(uid, cred->uid) && + !(user = find_user(uid))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) { - if (__task_cred(p)->uid == who) { + if (uid_eq(task_uid(p), uid)) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; } } while_each_thread(g, p); - if (who != cred->uid) + if (!uid_eq(uid, cred->uid)) free_uid(user); /* for find_user() */ break; } @@ -553,9 +554,19 @@ void ctrl_alt_del(void) */ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kgid_t krgid, kegid; + + krgid = make_kgid(ns, rgid); + kegid = make_kgid(ns, egid); + + if ((rgid != (gid_t) -1) && !gid_valid(krgid)) + return -EINVAL; + if ((egid != (gid_t) -1) && !gid_valid(kegid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -564,25 +575,25 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) retval = -EPERM; if (rgid != (gid_t) -1) { - if (old->gid == rgid || - old->egid == rgid || + if (gid_eq(old->gid, krgid) || + gid_eq(old->egid, krgid) || nsown_capable(CAP_SETGID)) - new->gid = rgid; + new->gid = krgid; else goto error; } if (egid != (gid_t) -1) { - if (old->gid == egid || - old->egid == egid || - old->sgid == egid || + if (gid_eq(old->gid, kegid) || + gid_eq(old->egid, kegid) || + gid_eq(old->sgid, kegid) || nsown_capable(CAP_SETGID)) - new->egid = egid; + new->egid = kegid; else goto error; } if (rgid != (gid_t) -1 || - (egid != (gid_t) -1 && egid != old->gid)) + (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) new->sgid = new->egid; new->fsgid = new->egid; @@ -600,9 +611,15 @@ error: */ SYSCALL_DEFINE1(setgid, gid_t, gid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kgid_t kgid; + + kgid = make_kgid(ns, gid); + if (!gid_valid(kgid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -611,9 +628,9 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) retval = -EPERM; if (nsown_capable(CAP_SETGID)) - new->gid = new->egid = new->sgid = new->fsgid = gid; - else if (gid == old->gid || gid == old->sgid) - new->egid = new->fsgid = gid; + new->gid = new->egid = new->sgid = new->fsgid = kgid; + else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) + new->egid = new->fsgid = kgid; else goto error; @@ -631,7 +648,7 @@ static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current_user_ns(), new->uid); + new_user = alloc_uid(new->uid); if (!new_user) return -EAGAIN; @@ -670,9 +687,19 @@ static int set_user(struct cred *new) */ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kuid_t kruid, keuid; + + kruid = make_kuid(ns, ruid); + keuid = make_kuid(ns, euid); + + if ((ruid != (uid_t) -1) && !uid_valid(kruid)) + return -EINVAL; + if ((euid != (uid_t) -1) && !uid_valid(keuid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -681,29 +708,29 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) retval = -EPERM; if (ruid != (uid_t) -1) { - new->uid = ruid; - if (old->uid != ruid && - old->euid != ruid && + new->uid = kruid; + if (!uid_eq(old->uid, kruid) && + !uid_eq(old->euid, kruid) && !nsown_capable(CAP_SETUID)) goto error; } if (euid != (uid_t) -1) { - new->euid = euid; - if (old->uid != euid && - old->euid != euid && - old->suid != euid && + new->euid = keuid; + if (!uid_eq(old->uid, keuid) && + !uid_eq(old->euid, keuid) && + !uid_eq(old->suid, keuid) && !nsown_capable(CAP_SETUID)) goto error; } - if (new->uid != old->uid) { + if (!uid_eq(new->uid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } if (ruid != (uid_t) -1 || - (euid != (uid_t) -1 && euid != old->uid)) + (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) new->suid = new->euid; new->fsuid = new->euid; @@ -731,9 +758,15 @@ error: */ SYSCALL_DEFINE1(setuid, uid_t, uid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kuid_t kuid; + + kuid = make_kuid(ns, uid); + if (!uid_valid(kuid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -742,17 +775,17 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (nsown_capable(CAP_SETUID)) { - new->suid = new->uid = uid; - if (uid != old->uid) { + new->suid = new->uid = kuid; + if (!uid_eq(kuid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } - } else if (uid != old->uid && uid != new->suid) { + } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { goto error; } - new->fsuid = new->euid = uid; + new->fsuid = new->euid = kuid; retval = security_task_fix_setuid(new, old, LSM_SETID_ID); if (retval < 0) @@ -772,9 +805,24 @@ error: */ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kuid_t kruid, keuid, ksuid; + + kruid = make_kuid(ns, ruid); + keuid = make_kuid(ns, euid); + ksuid = make_kuid(ns, suid); + + if ((ruid != (uid_t) -1) && !uid_valid(kruid)) + return -EINVAL; + + if ((euid != (uid_t) -1) && !uid_valid(keuid)) + return -EINVAL; + + if ((suid != (uid_t) -1) && !uid_valid(ksuid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -784,29 +832,29 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) retval = -EPERM; if (!nsown_capable(CAP_SETUID)) { - if (ruid != (uid_t) -1 && ruid != old->uid && - ruid != old->euid && ruid != old->suid) + if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && + !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) goto error; - if (euid != (uid_t) -1 && euid != old->uid && - euid != old->euid && euid != old->suid) + if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && + !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) goto error; - if (suid != (uid_t) -1 && suid != old->uid && - suid != old->euid && suid != old->suid) + if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && + !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) goto error; } if (ruid != (uid_t) -1) { - new->uid = ruid; - if (ruid != old->uid) { + new->uid = kruid; + if (!uid_eq(kruid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } } if (euid != (uid_t) -1) - new->euid = euid; + new->euid = keuid; if (suid != (uid_t) -1) - new->suid = suid; + new->suid = ksuid; new->fsuid = new->euid; retval = security_task_fix_setuid(new, old, LSM_SETID_RES); @@ -820,14 +868,19 @@ error: return retval; } -SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid) +SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) { const struct cred *cred = current_cred(); int retval; + uid_t ruid, euid, suid; + + ruid = from_kuid_munged(cred->user_ns, cred->uid); + euid = from_kuid_munged(cred->user_ns, cred->euid); + suid = from_kuid_munged(cred->user_ns, cred->suid); - if (!(retval = put_user(cred->uid, ruid)) && - !(retval = put_user(cred->euid, euid))) - retval = put_user(cred->suid, suid); + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } @@ -837,9 +890,22 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __u */ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) { + struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; + kgid_t krgid, kegid, ksgid; + + krgid = make_kgid(ns, rgid); + kegid = make_kgid(ns, egid); + ksgid = make_kgid(ns, sgid); + + if ((rgid != (gid_t) -1) && !gid_valid(krgid)) + return -EINVAL; + if ((egid != (gid_t) -1) && !gid_valid(kegid)) + return -EINVAL; + if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) + return -EINVAL; new = prepare_creds(); if (!new) @@ -848,23 +914,23 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) retval = -EPERM; if (!nsown_capable(CAP_SETGID)) { - if (rgid != (gid_t) -1 && rgid != old->gid && - rgid != old->egid && rgid != old->sgid) + if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && + !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) goto error; - if (egid != (gid_t) -1 && egid != old->gid && - egid != old->egid && egid != old->sgid) + if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && + !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) goto error; - if (sgid != (gid_t) -1 && sgid != old->gid && - sgid != old->egid && sgid != old->sgid) + if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && + !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) goto error; } if (rgid != (gid_t) -1) - new->gid = rgid; + new->gid = krgid; if (egid != (gid_t) -1) - new->egid = egid; + new->egid = kegid; if (sgid != (gid_t) -1) - new->sgid = sgid; + new->sgid = ksgid; new->fsgid = new->egid; return commit_creds(new); @@ -874,14 +940,19 @@ error: return retval; } -SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid) +SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) { const struct cred *cred = current_cred(); int retval; + gid_t rgid, egid, sgid; + + rgid = from_kgid_munged(cred->user_ns, cred->gid); + egid = from_kgid_munged(cred->user_ns, cred->egid); + sgid = from_kgid_munged(cred->user_ns, cred->sgid); - if (!(retval = put_user(cred->gid, rgid)) && - !(retval = put_user(cred->egid, egid))) - retval = put_user(cred->sgid, sgid); + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } @@ -898,18 +969,24 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) const struct cred *old; struct cred *new; uid_t old_fsuid; + kuid_t kuid; + + old = current_cred(); + old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); + + kuid = make_kuid(old->user_ns, uid); + if (!uid_valid(kuid)) + return old_fsuid; new = prepare_creds(); if (!new) - return current_fsuid(); - old = current_cred(); - old_fsuid = old->fsuid; + return old_fsuid; - if (uid == old->uid || uid == old->euid || - uid == old->suid || uid == old->fsuid || + if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || + uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || nsown_capable(CAP_SETUID)) { - if (uid != old_fsuid) { - new->fsuid = uid; + if (!uid_eq(kuid, old->fsuid)) { + new->fsuid = kuid; if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) goto change_okay; } @@ -931,18 +1008,24 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) const struct cred *old; struct cred *new; gid_t old_fsgid; + kgid_t kgid; + + old = current_cred(); + old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); + + kgid = make_kgid(old->user_ns, gid); + if (!gid_valid(kgid)) + return old_fsgid; new = prepare_creds(); if (!new) - return current_fsgid(); - old = current_cred(); - old_fsgid = old->fsgid; + return old_fsgid; - if (gid == old->gid || gid == old->egid || - gid == old->sgid || gid == old->fsgid || + if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || + gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || nsown_capable(CAP_SETGID)) { - if (gid != old_fsgid) { - new->fsgid = gid; + if (!gid_eq(kgid, old->fsgid)) { + new->fsgid = kgid; goto change_okay; } } @@ -1498,15 +1581,14 @@ static int check_prlimit_permission(struct task_struct *task) return 0; tcred = __task_cred(task); - if (cred->user->user_ns == tcred->user->user_ns && - (cred->uid == tcred->euid && - cred->uid == tcred->suid && - cred->uid == tcred->uid && - cred->gid == tcred->egid && - cred->gid == tcred->sgid && - cred->gid == tcred->gid)) + if (uid_eq(cred->uid, tcred->euid) && + uid_eq(cred->uid, tcred->suid) && + uid_eq(cred->uid, tcred->uid) && + gid_eq(cred->gid, tcred->egid) && + gid_eq(cred->gid, tcred->sgid) && + gid_eq(cred->gid, tcred->gid)) return 0; - if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE)) + if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) return 0; return -EPERM; diff --git a/kernel/timer.c b/kernel/timer.c index 09de9a9..6ec7e7e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1435,25 +1435,25 @@ SYSCALL_DEFINE0(getppid) SYSCALL_DEFINE0(getuid) { /* Only we change this so SMP safe */ - return current_uid(); + return from_kuid_munged(current_user_ns(), current_uid()); } SYSCALL_DEFINE0(geteuid) { /* Only we change this so SMP safe */ - return current_euid(); + return from_kuid_munged(current_user_ns(), current_euid()); } SYSCALL_DEFINE0(getgid) { /* Only we change this so SMP safe */ - return current_gid(); + return from_kgid_munged(current_user_ns(), current_gid()); } SYSCALL_DEFINE0(getegid) { /* Only we change this so SMP safe */ - return current_egid(); + return from_kgid_munged(current_user_ns(), current_egid()); } #endif diff --git a/kernel/uid16.c b/kernel/uid16.c index 51c6e89..d7948eb 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -81,14 +81,19 @@ SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) return ret; } -SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid) +SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruidp, old_uid_t __user *, euidp, old_uid_t __user *, suidp) { const struct cred *cred = current_cred(); int retval; + old_uid_t ruid, euid, suid; - if (!(retval = put_user(high2lowuid(cred->uid), ruid)) && - !(retval = put_user(high2lowuid(cred->euid), euid))) - retval = put_user(high2lowuid(cred->suid), suid); + ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); + euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); + suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); + + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } @@ -103,14 +108,19 @@ SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) } -SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid) +SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgidp, old_gid_t __user *, egidp, old_gid_t __user *, sgidp) { const struct cred *cred = current_cred(); int retval; + old_gid_t rgid, egid, sgid; + + rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); + egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); + sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - if (!(retval = put_user(high2lowgid(cred->gid), rgid)) && - !(retval = put_user(high2lowgid(cred->egid), egid))) - retval = put_user(high2lowgid(cred->sgid), sgid); + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } @@ -134,11 +144,14 @@ SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) static int groups16_to_user(old_gid_t __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; old_gid_t group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = high2lowgid(GROUP_AT(group_info, i)); + kgid = GROUP_AT(group_info, i); + group = high2lowgid(from_kgid_munged(user_ns, kgid)); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -149,13 +162,20 @@ static int groups16_to_user(old_gid_t __user *grouplist, static int groups16_from_user(struct group_info *group_info, old_gid_t __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; old_gid_t group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = low2highgid(group); + + kgid = make_kgid(user_ns, low2highgid(group)); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; @@ -211,20 +231,20 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) SYSCALL_DEFINE0(getuid16) { - return high2lowuid(current_uid()); + return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } SYSCALL_DEFINE0(geteuid16) { - return high2lowuid(current_euid()); + return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } SYSCALL_DEFINE0(getgid16) { - return high2lowgid(current_gid()); + return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } SYSCALL_DEFINE0(getegid16) { - return high2lowgid(current_egid()); + return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } diff --git a/kernel/user.c b/kernel/user.c index 71dd236..b815fef 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -22,10 +22,27 @@ * and 1 for... ? */ struct user_namespace init_user_ns = { + .uid_map = { + .nr_extents = 1, + .extent[0] = { + .first = 0, + .lower_first = 0, + .count = 4294967295U, + }, + }, + .gid_map = { + .nr_extents = 1, + .extent[0] = { + .first = 0, + .lower_first = 0, + .count = 4294967295U, + }, + }, .kref = { .refcount = ATOMIC_INIT(3), }, - .creator = &root_user, + .owner = GLOBAL_ROOT_UID, + .group = GLOBAL_ROOT_GID, }; EXPORT_SYMBOL_GPL(init_user_ns); @@ -34,11 +51,14 @@ EXPORT_SYMBOL_GPL(init_user_ns); * when changing user ID's (ie setuid() and friends). */ +#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 7) +#define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) +#define uidhashentry(uid) (uidhash_table + __uidhashfn((__kuid_val(uid)))) static struct kmem_cache *uid_cachep; +struct hlist_head uidhash_table[UIDHASH_SZ]; /* * The uidhash_lock is mostly taken from process context, but it is @@ -51,14 +71,14 @@ static struct kmem_cache *uid_cachep; */ static DEFINE_SPINLOCK(uidhash_lock); -/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */ +/* root_user.__count is 1, for init task cred */ struct user_struct root_user = { - .__count = ATOMIC_INIT(2), + .__count = ATOMIC_INIT(1), .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, - .user_ns = &init_user_ns, + .uid = GLOBAL_ROOT_UID, }; /* @@ -72,16 +92,15 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) static void uid_hash_remove(struct user_struct *up) { hlist_del_init(&up->uidhash_node); - put_user_ns(up->user_ns); } -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) +static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) { struct user_struct *user; struct hlist_node *h; hlist_for_each_entry(user, h, hashent, uidhash_node) { - if (user->uid == uid) { + if (uid_eq(user->uid, uid)) { atomic_inc(&user->__count); return user; } @@ -110,14 +129,13 @@ static void free_user(struct user_struct *up, unsigned long flags) * * If the user_struct could not be found, return NULL. */ -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(kuid_t uid) { struct user_struct *ret; unsigned long flags; - struct user_namespace *ns = current_user_ns(); spin_lock_irqsave(&uidhash_lock, flags); - ret = uid_hash_find(uid, uidhashentry(ns, uid)); + ret = uid_hash_find(uid, uidhashentry(uid)); spin_unlock_irqrestore(&uidhash_lock, flags); return ret; } @@ -136,9 +154,9 @@ void free_uid(struct user_struct *up) local_irq_restore(flags); } -struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) +struct user_struct *alloc_uid(kuid_t uid) { - struct hlist_head *hashent = uidhashentry(ns, uid); + struct hlist_head *hashent = uidhashentry(uid); struct user_struct *up, *new; spin_lock_irq(&uidhash_lock); @@ -153,8 +171,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) new->uid = uid; atomic_set(&new->__count, 1); - new->user_ns = get_user_ns(ns); - /* * Before adding this, check whether we raced * on adding the same user already.. @@ -162,7 +178,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - put_user_ns(ns); key_put(new->uid_keyring); key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); @@ -187,11 +202,11 @@ static int __init uid_cache_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); + INIT_HLIST_HEAD(uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); + uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID)); spin_unlock_irq(&uidhash_lock); return 0; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 3b906e9..8660231 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -11,9 +11,20 @@ #include <linux/user_namespace.h> #include <linux/highuid.h> #include <linux/cred.h> +#include <linux/securebits.h> +#include <linux/keyctl.h> +#include <linux/key-type.h> +#include <keys/user-type.h> +#include <linux/seq_file.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> static struct kmem_cache *user_ns_cachep __read_mostly; +static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, + struct uid_gid_map *map); + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -24,109 +35,565 @@ static struct kmem_cache *user_ns_cachep __read_mostly; */ int create_user_ns(struct cred *new) { - struct user_namespace *ns; - struct user_struct *root_user; - int n; + struct user_namespace *ns, *parent_ns = new->user_ns; + kuid_t owner = new->euid; + kgid_t group = new->egid; + + /* The creator needs a mapping in the parent user namespace + * or else we won't be able to reasonably tell userspace who + * created a user_namespace. + */ + if (!kuid_has_mapping(parent_ns, owner) || + !kgid_has_mapping(parent_ns, group)) + return -EPERM; - ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL); + ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) return -ENOMEM; kref_init(&ns->kref); + ns->parent = parent_ns; + ns->owner = owner; + ns->group = group; - for (n = 0; n < UIDHASH_SZ; ++n) - INIT_HLIST_HEAD(ns->uidhash_table + n); - - /* Alloc new root user. */ - root_user = alloc_uid(ns, 0); - if (!root_user) { - kmem_cache_free(user_ns_cachep, ns); - return -ENOMEM; - } - - /* set the new root user in the credentials under preparation */ - ns->creator = new->user; - new->user = root_user; - new->uid = new->euid = new->suid = new->fsuid = 0; - new->gid = new->egid = new->sgid = new->fsgid = 0; - put_group_info(new->group_info); - new->group_info = get_group_info(&init_groups); + /* Start with the same capabilities as init but useless for doing + * anything as the capabilities are bound to the new user namespace. + */ + new->securebits = SECUREBITS_DEFAULT; + new->cap_inheritable = CAP_EMPTY_SET; + new->cap_permitted = CAP_FULL_SET; + new->cap_effective = CAP_FULL_SET; + new->cap_bset = CAP_FULL_SET; #ifdef CONFIG_KEYS key_put(new->request_key_auth); new->request_key_auth = NULL; #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - /* root_user holds a reference to ns, our reference can be dropped */ - put_user_ns(ns); + /* Leave the new->user_ns reference with the new user namespace. */ + /* Leave the reference to our user_ns with the new cred. */ + new->user_ns = ns; return 0; } -/* - * Deferred destructor for a user namespace. This is required because - * free_user_ns() may be called with uidhash_lock held, but we need to call - * back to free_uid() which will want to take the lock again. - */ -static void free_user_ns_work(struct work_struct *work) +void free_user_ns(struct kref *kref) { - struct user_namespace *ns = - container_of(work, struct user_namespace, destroyer); - free_uid(ns->creator); + struct user_namespace *parent, *ns = + container_of(kref, struct user_namespace, kref); + + parent = ns->parent; kmem_cache_free(user_ns_cachep, ns); + put_user_ns(parent); } +EXPORT_SYMBOL(free_user_ns); -void free_user_ns(struct kref *kref) +static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) { - struct user_namespace *ns = - container_of(kref, struct user_namespace, kref); + unsigned idx, extents; + u32 first, last, id2; + + id2 = id + count - 1; + + /* Find the matching extent */ + extents = map->nr_extents; + smp_read_barrier_depends(); + for (idx = 0; idx < extents; idx++) { + first = map->extent[idx].first; + last = first + map->extent[idx].count - 1; + if (id >= first && id <= last && + (id2 >= first && id2 <= last)) + break; + } + /* Map the id or note failure */ + if (idx < extents) + id = (id - first) + map->extent[idx].lower_first; + else + id = (u32) -1; - INIT_WORK(&ns->destroyer, free_user_ns_work); - schedule_work(&ns->destroyer); + return id; } -EXPORT_SYMBOL(free_user_ns); -uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid) +static u32 map_id_down(struct uid_gid_map *map, u32 id) { - struct user_namespace *tmp; + unsigned idx, extents; + u32 first, last; - if (likely(to == cred->user->user_ns)) - return uid; + /* Find the matching extent */ + extents = map->nr_extents; + smp_read_barrier_depends(); + for (idx = 0; idx < extents; idx++) { + first = map->extent[idx].first; + last = first + map->extent[idx].count - 1; + if (id >= first && id <= last) + break; + } + /* Map the id or note failure */ + if (idx < extents) + id = (id - first) + map->extent[idx].lower_first; + else + id = (u32) -1; + return id; +} - /* Is cred->user the creator of the target user_ns - * or the creator of one of it's parents? - */ - for ( tmp = to; tmp != &init_user_ns; - tmp = tmp->creator->user_ns ) { - if (cred->user == tmp->creator) { - return (uid_t)0; - } +static u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + unsigned idx, extents; + u32 first, last; + + /* Find the matching extent */ + extents = map->nr_extents; + smp_read_barrier_depends(); + for (idx = 0; idx < extents; idx++) { + first = map->extent[idx].lower_first; + last = first + map->extent[idx].count - 1; + if (id >= first && id <= last) + break; } + /* Map the id or note failure */ + if (idx < extents) + id = (id - first) + map->extent[idx].first; + else + id = (u32) -1; + + return id; +} + +/** + * make_kuid - Map a user-namespace uid pair into a kuid. + * @ns: User namespace that the uid is in + * @uid: User identifier + * + * Maps a user-namespace uid pair into a kernel internal kuid, + * and returns that kuid. + * + * When there is no mapping defined for the user-namespace uid + * pair INVALID_UID is returned. Callers are expected to test + * for and handle handle INVALID_UID being returned. INVALID_UID + * may be tested for using uid_valid(). + */ +kuid_t make_kuid(struct user_namespace *ns, uid_t uid) +{ + /* Map the uid to a global kernel uid */ + return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); +} +EXPORT_SYMBOL(make_kuid); + +/** + * from_kuid - Create a uid from a kuid user-namespace pair. + * @targ: The user namespace we want a uid in. + * @kuid: The kernel internal uid to start with. + * + * Map @kuid into the user-namespace specified by @targ and + * return the resulting uid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kuid has no mapping in @targ (uid_t)-1 is returned. + */ +uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) +{ + /* Map the uid from a global kernel uid */ + return map_id_up(&targ->uid_map, __kuid_val(kuid)); +} +EXPORT_SYMBOL(from_kuid); - /* No useful relationship so no mapping */ - return overflowuid; +/** + * from_kuid_munged - Create a uid from a kuid user-namespace pair. + * @targ: The user namespace we want a uid in. + * @kuid: The kernel internal uid to start with. + * + * Map @kuid into the user-namespace specified by @targ and + * return the resulting uid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kuid from_kuid_munged never fails and always + * returns a valid uid. This makes from_kuid_munged appropriate + * for use in syscalls like stat and getuid where failing the + * system call and failing to provide a valid uid are not an + * options. + * + * If @kuid has no mapping in @targ overflowuid is returned. + */ +uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) +{ + uid_t uid; + uid = from_kuid(targ, kuid); + + if (uid == (uid_t) -1) + uid = overflowuid; + return uid; } +EXPORT_SYMBOL(from_kuid_munged); -gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid) +/** + * make_kgid - Map a user-namespace gid pair into a kgid. + * @ns: User namespace that the gid is in + * @uid: group identifier + * + * Maps a user-namespace gid pair into a kernel internal kgid, + * and returns that kgid. + * + * When there is no mapping defined for the user-namespace gid + * pair INVALID_GID is returned. Callers are expected to test + * for and handle INVALID_GID being returned. INVALID_GID may be + * tested for using gid_valid(). + */ +kgid_t make_kgid(struct user_namespace *ns, gid_t gid) { - struct user_namespace *tmp; + /* Map the gid to a global kernel gid */ + return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); +} +EXPORT_SYMBOL(make_kgid); - if (likely(to == cred->user->user_ns)) - return gid; +/** + * from_kgid - Create a gid from a kgid user-namespace pair. + * @targ: The user namespace we want a gid in. + * @kgid: The kernel internal gid to start with. + * + * Map @kgid into the user-namespace specified by @targ and + * return the resulting gid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kgid has no mapping in @targ (gid_t)-1 is returned. + */ +gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) +{ + /* Map the gid from a global kernel gid */ + return map_id_up(&targ->gid_map, __kgid_val(kgid)); +} +EXPORT_SYMBOL(from_kgid); + +/** + * from_kgid_munged - Create a gid from a kgid user-namespace pair. + * @targ: The user namespace we want a gid in. + * @kgid: The kernel internal gid to start with. + * + * Map @kgid into the user-namespace specified by @targ and + * return the resulting gid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kgid from_kgid_munged never fails and always + * returns a valid gid. This makes from_kgid_munged appropriate + * for use in syscalls like stat and getgid where failing the + * system call and failing to provide a valid gid are not options. + * + * If @kgid has no mapping in @targ overflowgid is returned. + */ +gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) +{ + gid_t gid; + gid = from_kgid(targ, kgid); - /* Is cred->user the creator of the target user_ns - * or the creator of one of it's parents? + if (gid == (gid_t) -1) + gid = overflowgid; + return gid; +} +EXPORT_SYMBOL(from_kgid_munged); + +static int uid_m_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + struct uid_gid_extent *extent = v; + struct user_namespace *lower_ns; + uid_t lower; + + lower_ns = current_user_ns(); + if ((lower_ns == ns) && lower_ns->parent) + lower_ns = lower_ns->parent; + + lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); + + seq_printf(seq, "%10u %10u %10u\n", + extent->first, + lower, + extent->count); + + return 0; +} + +static int gid_m_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + struct uid_gid_extent *extent = v; + struct user_namespace *lower_ns; + gid_t lower; + + lower_ns = current_user_ns(); + if ((lower_ns == ns) && lower_ns->parent) + lower_ns = lower_ns->parent; + + lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); + + seq_printf(seq, "%10u %10u %10u\n", + extent->first, + lower, + extent->count); + + return 0; +} + +static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) +{ + struct uid_gid_extent *extent = NULL; + loff_t pos = *ppos; + + if (pos < map->nr_extents) + extent = &map->extent[pos]; + + return extent; +} + +static void *uid_m_start(struct seq_file *seq, loff_t *ppos) +{ + struct user_namespace *ns = seq->private; + + return m_start(seq, ppos, &ns->uid_map); +} + +static void *gid_m_start(struct seq_file *seq, loff_t *ppos) +{ + struct user_namespace *ns = seq->private; + + return m_start(seq, ppos, &ns->gid_map); +} + +static void *m_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return seq->op->start(seq, pos); +} + +static void m_stop(struct seq_file *seq, void *v) +{ + return; +} + +struct seq_operations proc_uid_seq_operations = { + .start = uid_m_start, + .stop = m_stop, + .next = m_next, + .show = uid_m_show, +}; + +struct seq_operations proc_gid_seq_operations = { + .start = gid_m_start, + .stop = m_stop, + .next = m_next, + .show = gid_m_show, +}; + +static DEFINE_MUTEX(id_map_mutex); + +static ssize_t map_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, + int cap_setid, + struct uid_gid_map *map, + struct uid_gid_map *parent_map) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + struct uid_gid_map new_map; + unsigned idx; + struct uid_gid_extent *extent, *last = NULL; + unsigned long page = 0; + char *kbuf, *pos, *next_line; + ssize_t ret = -EINVAL; + + /* + * The id_map_mutex serializes all writes to any given map. + * + * Any map is only ever written once. + * + * An id map fits within 1 cache line on most architectures. + * + * On read nothing needs to be done unless you are on an + * architecture with a crazy cache coherency model like alpha. + * + * There is a one time data dependency between reading the + * count of the extents and the values of the extents. The + * desired behavior is to see the values of the extents that + * were written before the count of the extents. + * + * To achieve this smp_wmb() is used on guarantee the write + * order and smp_read_barrier_depends() is guaranteed that we + * don't have crazy architectures returning stale data. + * */ - for ( tmp = to; tmp != &init_user_ns; - tmp = tmp->creator->user_ns ) { - if (cred->user == tmp->creator) { - return (gid_t)0; + mutex_lock(&id_map_mutex); + + ret = -EPERM; + /* Only allow one successful write to the map */ + if (map->nr_extents != 0) + goto out; + + /* Require the appropriate privilege CAP_SETUID or CAP_SETGID + * over the user namespace in order to set the id mapping. + */ + if (!ns_capable(ns, cap_setid)) + goto out; + + /* Get a buffer */ + ret = -ENOMEM; + page = __get_free_page(GFP_TEMPORARY); + kbuf = (char *) page; + if (!page) + goto out; + + /* Only allow <= page size writes at the beginning of the file */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= PAGE_SIZE)) + goto out; + + /* Slurp in the user data */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + + /* Parse the user data */ + ret = -EINVAL; + pos = kbuf; + new_map.nr_extents = 0; + for (;pos; pos = next_line) { + extent = &new_map.extent[new_map.nr_extents]; + + /* Find the end of line and ensure I don't look past it */ + next_line = strchr(pos, '\n'); + if (next_line) { + *next_line = '\0'; + next_line++; + if (*next_line == '\0') + next_line = NULL; } + + pos = skip_spaces(pos); + extent->first = simple_strtoul(pos, &pos, 10); + if (!isspace(*pos)) + goto out; + + pos = skip_spaces(pos); + extent->lower_first = simple_strtoul(pos, &pos, 10); + if (!isspace(*pos)) + goto out; + + pos = skip_spaces(pos); + extent->count = simple_strtoul(pos, &pos, 10); + if (*pos && !isspace(*pos)) + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + /* Verify we have been given valid starting values */ + if ((extent->first == (u32) -1) || + (extent->lower_first == (u32) -1 )) + goto out; + + /* Verify count is not zero and does not cause the extent to wrap */ + if ((extent->first + extent->count) <= extent->first) + goto out; + if ((extent->lower_first + extent->count) <= extent->lower_first) + goto out; + + /* For now only accept extents that are strictly in order */ + if (last && + (((last->first + last->count) > extent->first) || + ((last->lower_first + last->count) > extent->lower_first))) + goto out; + + new_map.nr_extents++; + last = extent; + + /* Fail if the file contains too many extents */ + if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && + (next_line != NULL)) + goto out; + } + /* Be very certaint the new map actually exists */ + if (new_map.nr_extents == 0) + goto out; + + ret = -EPERM; + /* Validate the user is allowed to use user id's mapped to. */ + if (!new_idmap_permitted(ns, cap_setid, &new_map)) + goto out; + + /* Map the lower ids from the parent user namespace to the + * kernel global id space. + */ + for (idx = 0; idx < new_map.nr_extents; idx++) { + u32 lower_first; + extent = &new_map.extent[idx]; + + lower_first = map_id_range_down(parent_map, + extent->lower_first, + extent->count); + + /* Fail if we can not map the specified extent to + * the kernel global id space. + */ + if (lower_first == (u32) -1) + goto out; + + extent->lower_first = lower_first; } - /* No useful relationship so no mapping */ - return overflowgid; + /* Install the map */ + memcpy(map->extent, new_map.extent, + new_map.nr_extents*sizeof(new_map.extent[0])); + smp_wmb(); + map->nr_extents = new_map.nr_extents; + + *ppos = count; + ret = count; +out: + mutex_unlock(&id_map_mutex); + if (page) + free_page(page); + return ret; +} + +ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + + if (!ns->parent) + return -EPERM; + + return map_write(file, buf, size, ppos, CAP_SETUID, + &ns->uid_map, &ns->parent->uid_map); +} + +ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + + if (!ns->parent) + return -EPERM; + + return map_write(file, buf, size, ppos, CAP_SETGID, + &ns->gid_map, &ns->parent->gid_map); +} + +static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, + struct uid_gid_map *new_map) +{ + /* Allow the specified ids if we have the appropriate capability + * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + */ + if (ns_capable(ns->parent, cap_setid)) + return true; + + return false; } static __init int user_namespaces_init(void) diff --git a/kernel/utsname.c b/kernel/utsname.c index 405caf9..679d97a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -43,7 +43,7 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); - ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); + ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); up_read(&uts_sem); return ns; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b195691..39fd416 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1334,8 +1334,8 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, * userid as the target process. */ tcred = __task_cred(task); - if (cred->euid != tcred->suid && cred->euid != tcred->uid && - cred->uid != tcred->suid && cred->uid != tcred->uid && + if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; diff --git a/mm/migrate.c b/mm/migrate.c index 1107238..ab81d48 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1371,8 +1371,8 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, * userid as the target process. */ tcred = __task_cred(task); - if (cred->euid != tcred->suid && cred->euid != tcred->uid && - cred->uid != tcred->suid && cred->uid != tcred->uid && + if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 46bf2ed5..9f09a1f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -410,8 +410,8 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas } pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", - task->pid, task_uid(task), task->tgid, - task->mm->total_vm, get_mm_rss(task->mm), + task->pid, from_kuid(&init_user_ns, task_uid(task)), + task->tgid, task->mm->total_vm, get_mm_rss(task->mm), task_cpu(task), task->signal->oom_adj, task->signal->oom_score_adj, task->comm); task_unlock(task); @@ -2075,6 +2075,8 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, bool remount) { char *this_char, *value, *rest; + uid_t uid; + gid_t gid; while (options != NULL) { this_char = options; @@ -2134,15 +2136,21 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, } else if (!strcmp(this_char,"uid")) { if (remount) continue; - sbinfo->uid = simple_strtoul(value, &rest, 0); + uid = simple_strtoul(value, &rest, 0); if (*rest) goto bad_val; + sbinfo->uid = make_kuid(current_user_ns(), uid); + if (!uid_valid(sbinfo->uid)) + goto bad_val; } else if (!strcmp(this_char,"gid")) { if (remount) continue; - sbinfo->gid = simple_strtoul(value, &rest, 0); + gid = simple_strtoul(value, &rest, 0); if (*rest) goto bad_val; + sbinfo->gid = make_kgid(current_user_ns(), gid); + if (!gid_valid(sbinfo->gid)) + goto bad_val; } else if (!strcmp(this_char,"mpol")) { if (mpol_parse_str(value, &sbinfo->mpol, 1)) goto bad_val; @@ -2210,10 +2218,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); - if (sbinfo->uid != 0) - seq_printf(seq, ",uid=%u", sbinfo->uid); - if (sbinfo->gid != 0) - seq_printf(seq, ",gid=%u", sbinfo->gid); + if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) + seq_printf(seq, ",uid=%u", + from_kuid_munged(&init_user_ns, sbinfo->uid)); + if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", + from_kgid_munged(&init_user_ns, sbinfo->gid)); shmem_show_mpol(seq, sbinfo->mpol); return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index f372d9b..653f8c0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -813,8 +813,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, if (cred) { struct user_namespace *current_ns = current_user_ns(); - ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); - ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); + ucred->uid = from_kuid(current_ns, cred->euid); + ucred->gid = from_kgid(current_ns, cred->egid); } } EXPORT_SYMBOL_GPL(cred_to_ucred); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6e930c7..2c00e8b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -207,17 +207,22 @@ static int ping_init_sock(struct sock *sk) gid_t range[2]; struct group_info *group_info = get_current_groups(); int i, j, count = group_info->ngroups; + kgid_t low, high; inet_get_ping_group_range_net(net, range, range+1); + low = make_kgid(&init_user_ns, range[0]); + high = make_kgid(&init_user_ns, range[1]); + if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low)) + return -EACCES; + if (range[0] <= group && group <= range[1]) return 0; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); - for (j = 0; j < cp_count; j++) { - group = group_info->blocks[i][j]; - if (range[0] <= group && group <= range[1]) + kgid_t gid = group_info->blocks[i][j]; + if (gid_lte(low, gid) && gid_lte(gid, high)) return 0; } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 75762f3..6ed6f20 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -160,8 +160,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) goto out_nomatch; for (i = 0; i < gcred->acred.group_info->ngroups; i++) { - if (GROUP_AT(gcred->acred.group_info, i) != - GROUP_AT(acred->group_info, i)) + if (!gid_eq(GROUP_AT(gcred->acred.group_info, i), + GROUP_AT(acred->group_info, i))) goto out_nomatch; } out_match: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1600cfb..28b62db 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -41,6 +41,7 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pagemap.h> +#include <linux/user_namespace.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> @@ -470,9 +471,13 @@ static int rsc_parse(struct cache_detail *cd, status = -EINVAL; for (i=0; i<N; i++) { gid_t gid; + kgid_t kgid; if (get_int(&mesg, &gid)) goto out; - GROUP_AT(rsci.cred.cr_group_info, i) = gid; + kgid = make_kgid(&init_user_ns, gid); + if (!gid_valid(kgid)) + goto out; + GROUP_AT(rsci.cred.cr_group_info, i) = kgid; } /* mech name */ diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index e50502d..52c5abd 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> +#include <linux/user_namespace.h> #define NFS_NGROUPS 16 @@ -78,8 +79,11 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) groups = NFS_NGROUPS; cred->uc_gid = acred->gid; - for (i = 0; i < groups; i++) - cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + for (i = 0; i < groups; i++) { + gid_t gid; + gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i)); + cred->uc_gids[i] = gid; + } if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; @@ -126,9 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) groups = acred->group_info->ngroups; if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - for (i = 0; i < groups ; i++) - if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) + for (i = 0; i < groups ; i++) { + gid_t gid; + gid = from_kgid(&init_user_ns, GROUP_AT(acred->group_info, i)); + if (cred->uc_gids[i] != gid) return 0; + } if (groups < NFS_NGROUPS && cred->uc_gids[groups] != NOGROUP) return 0; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 521d8f7..71ec853 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -14,6 +14,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <linux/kernel.h> +#include <linux/user_namespace.h> #define RPCDBG_FACILITY RPCDBG_AUTH #include <linux/sunrpc/clnt.h> @@ -530,11 +531,15 @@ static int unix_gid_parse(struct cache_detail *cd, for (i = 0 ; i < gids ; i++) { int gid; + kgid_t kgid; rv = get_int(&mesg, &gid); err = -EINVAL; if (rv) goto out; - GROUP_AT(ug.gi, i) = gid; + kgid = make_kgid(&init_user_ns, gid); + if (!gid_valid(kgid)) + goto out; + GROUP_AT(ug.gi, i) = kgid; } ugp = unix_gid_lookup(cd, uid); @@ -563,6 +568,7 @@ static int unix_gid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { + struct user_namespace *user_ns = current_user_ns(); struct unix_gid *ug; int i; int glen; @@ -580,7 +586,7 @@ static int unix_gid_show(struct seq_file *m, seq_printf(m, "%u %d:", ug->uid, glen); for (i = 0; i < glen; i++) - seq_printf(m, " %d", GROUP_AT(ug->gi, i)); + seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i))); seq_printf(m, "\n"); return 0; } @@ -831,8 +837,12 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) return SVC_CLOSE; - for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); + for (i = 0; i < slen; i++) { + kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); + if (!gid_valid(kgid)) + goto badcred; + GROUP_AT(cred->cr_group_info, i) = kgid; + } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; diff --git a/security/commoncap.c b/security/commoncap.c index f80d116..e771cb1 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -77,12 +77,12 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, int cap, int audit) { for (;;) { - /* The creator of the user namespace has all caps. */ - if (targ_ns != &init_user_ns && targ_ns->creator == cred->user) + /* The owner of the user namespace has all caps. */ + if (targ_ns != &init_user_ns && uid_eq(targ_ns->owner, cred->euid)) return 0; /* Do we have the necessary capabilities? */ - if (targ_ns == cred->user->user_ns) + if (targ_ns == cred->user_ns) return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; /* Have we tried all of the parent namespaces? */ @@ -93,7 +93,7 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, *If you have a capability in a parent user ns, then you have * it over all children user namespaces as well. */ - targ_ns = targ_ns->creator->user_ns; + targ_ns = targ_ns->parent; } /* We never get here */ @@ -137,10 +137,10 @@ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) rcu_read_lock(); cred = current_cred(); child_cred = __task_cred(child); - if (cred->user->user_ns == child_cred->user->user_ns && + if (cred->user_ns == child_cred->user_ns && cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) goto out; - if (ns_capable(child_cred->user->user_ns, CAP_SYS_PTRACE)) + if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE)) goto out; ret = -EPERM; out: @@ -169,10 +169,10 @@ int cap_ptrace_traceme(struct task_struct *parent) rcu_read_lock(); cred = __task_cred(parent); child_cred = current_cred(); - if (cred->user->user_ns == child_cred->user->user_ns && + if (cred->user_ns == child_cred->user_ns && cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) goto out; - if (has_ns_capability(parent, child_cred->user->user_ns, CAP_SYS_PTRACE)) + if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE)) goto out; ret = -EPERM; out: @@ -215,7 +215,7 @@ static inline int cap_inh_is_capped(void) /* they are so limited unless the current task has the CAP_SETPCAP * capability */ - if (cap_capable(current_cred(), current_cred()->user->user_ns, + if (cap_capable(current_cred(), current_cred()->user_ns, CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) return 0; return 1; @@ -473,19 +473,22 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) struct cred *new = bprm->cred; bool effective, has_cap = false; int ret; + kuid_t root_uid; effective = false; ret = get_file_caps(bprm, &effective, &has_cap); if (ret < 0) return ret; + root_uid = make_kuid(new->user_ns, 0); + if (!issecure(SECURE_NOROOT)) { /* * If the legacy file capability is set, then don't set privs * for a setuid root binary run by a non-root user. Do set it * for a root user just to cause least surprise to an admin. */ - if (has_cap && new->uid != 0 && new->euid == 0) { + if (has_cap && !uid_eq(new->uid, root_uid) && uid_eq(new->euid, root_uid)) { warn_setuid_and_fcaps_mixed(bprm->filename); goto skip; } @@ -496,12 +499,12 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) * * If only the real uid is 0, we do not set the effective bit. */ - if (new->euid == 0 || new->uid == 0) { + if (uid_eq(new->euid, root_uid) || uid_eq(new->uid, root_uid)) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ new->cap_permitted = cap_combine(old->cap_bset, old->cap_inheritable); } - if (new->euid == 0) + if (uid_eq(new->euid, root_uid)) effective = true; } skip: @@ -516,8 +519,8 @@ skip: * * In addition, if NO_NEW_PRIVS, then ensure we get no new privs. */ - if ((new->euid != old->uid || - new->egid != old->gid || + if ((!uid_eq(new->euid, old->uid) || + !gid_eq(new->egid, old->gid) || !cap_issubset(new->cap_permitted, old->cap_permitted)) && bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) { /* downgrade; they get no more than they had, and maybe less */ @@ -553,7 +556,7 @@ skip: */ if (!cap_isclear(new->cap_effective)) { if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || - new->euid != 0 || new->uid != 0 || + !uid_eq(new->euid, root_uid) || !uid_eq(new->uid, root_uid) || issecure(SECURE_NOROOT)) { ret = audit_log_bprm_fcaps(bprm, new, old); if (ret < 0) @@ -578,16 +581,17 @@ skip: int cap_bprm_secureexec(struct linux_binprm *bprm) { const struct cred *cred = current_cred(); + kuid_t root_uid = make_kuid(cred->user_ns, 0); - if (cred->uid != 0) { + if (!uid_eq(cred->uid, root_uid)) { if (bprm->cap_effective) return 1; if (!cap_isclear(cred->cap_permitted)) return 1; } - return (cred->euid != cred->uid || - cred->egid != cred->gid); + return (!uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)); } /** @@ -677,15 +681,21 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) */ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) { - if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && - (new->uid != 0 && new->euid != 0 && new->suid != 0) && + kuid_t root_uid = make_kuid(old->user_ns, 0); + + if ((uid_eq(old->uid, root_uid) || + uid_eq(old->euid, root_uid) || + uid_eq(old->suid, root_uid)) && + (!uid_eq(new->uid, root_uid) && + !uid_eq(new->euid, root_uid) && + !uid_eq(new->suid, root_uid)) && !issecure(SECURE_KEEP_CAPS)) { cap_clear(new->cap_permitted); cap_clear(new->cap_effective); } - if (old->euid == 0 && new->euid != 0) + if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) cap_clear(new->cap_effective); - if (old->euid != 0 && new->euid == 0) + if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) new->cap_effective = new->cap_permitted; } @@ -718,11 +728,12 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) * if not, we might be a bit too harsh here. */ if (!issecure(SECURE_NO_SETUID_FIXUP)) { - if (old->fsuid == 0 && new->fsuid != 0) + kuid_t root_uid = make_kuid(old->user_ns, 0); + if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) new->cap_effective = cap_drop_fs_set(new->cap_effective); - if (old->fsuid != 0 && new->fsuid == 0) + if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) new->cap_effective = cap_raise_fs_set(new->cap_effective, new->cap_permitted); @@ -875,7 +886,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current_cred(), - current_cred()->user->user_ns, CAP_SETPCAP, + current_cred()->user_ns, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/ /* * [1] no changing of bits that are locked diff --git a/security/keys/key.c b/security/keys/key.c index c9bf66a..50d96d4 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -253,7 +253,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, quotalen = desclen + type->def_datalen; /* get hold of the key tracking for this user */ - user = key_user_lookup(uid, cred->user->user_ns); + user = key_user_lookup(uid, cred->user_ns); if (!user) goto no_memory_1; diff --git a/security/keys/permission.c b/security/keys/permission.c index 57d9636..0b4d019 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -36,7 +36,7 @@ int key_task_permission(const key_ref_t key_ref, const struct cred *cred, key = key_ref_to_ptr(key_ref); - if (key->user->user_ns != cred->user->user_ns) + if (key->user->user_ns != cred->user_ns) goto use_other_perms; /* use the second 8-bits of permissions for keys the caller owns */ @@ -53,7 +53,8 @@ int key_task_permission(const key_ref_t key_ref, const struct cred *cred, goto use_these_perms; } - ret = groups_search(cred->group_info, key->gid); + ret = groups_search(cred->group_info, + make_kgid(current_user_ns(), key->gid)); if (ret) { kperm = key->perm >> 8; goto use_these_perms; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index e137fcd..d71056d 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -860,7 +860,7 @@ void key_replace_session_keyring(void) new-> sgid = old-> sgid; new->fsgid = old->fsgid; new->user = get_uid(old->user); - new->user_ns = new->user->user_ns; + new->user_ns = get_user_ns(new->user_ns); new->group_info = get_group_info(old->group_info); new->securebits = old->securebits; |