/* * x86 single-step support code, common to 32-bit and 64-bit. */ #include #include #include #ifdef CONFIG_X86_32 #include #include /* * Return EIP plus the CS segment base. The segment limit is also * adjusted, clamped to the kernel/user address space (whichever is * appropriate), and returned in *eip_limit. * * The segment is checked, because it might have been changed by another * task between the original faulting instruction and here. * * If CS is no longer a valid code segment, or if EIP is beyond the * limit, or if it is a kernel address when CS is not a kernel segment, * then the returned value will be greater than *eip_limit. * * This is slow, but is very rarely executed. */ unsigned long get_segment_eip(struct pt_regs *regs, unsigned long *eip_limit) { unsigned long ip = regs->ip; unsigned seg = regs->cs & 0xffff; u32 seg_ar, seg_limit, base, *desc; /* Unlikely, but must come before segment checks. */ if (unlikely(regs->flags & VM_MASK)) { base = seg << 4; *eip_limit = base + 0xffff; return base + (ip & 0xffff); } /* The standard kernel/user address space limit. */ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ if (likely(SEGMENT_IS_FLAT_CODE(seg))) return ip; /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, that it's a code segment, and get the limit. */ __asm__("larl %3,%0; lsll %3,%1" : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); if ((~seg_ar & 0x9800) || ip > seg_limit) { *eip_limit = 0; return 1; /* So that returned ip > *eip_limit. */ } /* Get the GDT/LDT descriptor base. When you look for races in this code remember that LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } /* Decode the code segment base from the descriptor */ base = get_desc_base((struct desc_struct *)desc); if (seg & (1<<2)) mutex_unlock(¤t->mm->context.lock); else put_cpu(); /* Adjust EIP and segment limit, and clamp at the kernel limit. It's legitimate for segments to wrap at 0xffffffff. */ seg_limit += base; if (seg_limit < *eip_limit && seg_limit >= base) *eip_limit = seg_limit; return ip + base; } #endif #ifdef CONFIG_X86_32 static #endif unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs) { unsigned long addr, seg; addr = regs->ip; seg = regs->cs & 0xffff; if (v8086_mode(regs)) { addr = (addr & 0xffff) + (seg << 4); return addr; } /* * We'll assume that the code segments in the GDT * are all zero-based. That is largely true: the * TLS segments are used for data, and the PNPBIOS * and APM bios ones we just ignore here. */ if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { u32 *desc; unsigned long base; seg &= ~7UL; mutex_lock(&child->mm->context.lock); if (unlikely((seg >> 3) >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { desc = child->mm->context.ldt + seg; base = ((desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000)); /* 16-bit code segment? */ if (!((desc[1] >> 22) & 1)) addr &= 0xffff; addr += base; } mutex_unlock(&child->mm->context.lock); } return addr; } static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) { int i, copied; unsigned char opcode[15]; unsigned long addr = convert_rip_to_linear(child, regs); copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); for (i = 0; i < copied; i++) { switch (opcode[i]) { /* popf and iret */ case 0x9d: case 0xcf: return 1; /* CHECKME: 64 65 */ /* opcode and address size prefixes */ case 0x66: case 0x67: continue; /* irrelevant prefixes (segment overrides and repeats) */ case 0x26: case 0x2e: case 0x36: case 0x3e: case 0x64: case 0x65: case 0xf0: case 0xf2: case 0xf3: continue; #ifdef CONFIG_X86_64 case 0x40 ... 0x4f: if (regs->cs != __USER_CS) /* 32-bit mode: register increment */ return 0; /* 64-bit mode: REX prefix */ continue; #endif /* CHECKME: f2, f3 */ /* * pushf: NOTE! We should probably not let * the user see the TF bit being set. But * it's more pain than it's worth to avoid * it, and a debugger could emulate this * all in user space if it _really_ cares. */ case 0x9c: default: return 0; } } return 0; } /* * Enable single-stepping. Return nonzero if user mode is not using TF itself. */ static int enable_single_step(struct task_struct *child) { struct pt_regs *regs = task_pt_regs(child); /* * Always set TIF_SINGLESTEP - this guarantees that * we single-step system calls etc.. This will also * cause us to set TF when returning to user mode. */ set_tsk_thread_flag(child, TIF_SINGLESTEP); /* * If TF was already set, don't do anything else */ if (regs->flags & X86_EFLAGS_TF) return 0; /* Set TF on the kernel stack.. */ regs->flags |= X86_EFLAGS_TF; /* * ..but if TF is changed by the instruction we will trace, * don't mark it as being "us" that set it, so that we * won't clear it by hand later. */ if (is_setting_trap_flag(child, regs)) return 0; set_tsk_thread_flag(child, TIF_FORCED_TF); return 1; } /* * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. */ static void write_debugctlmsr(struct task_struct *child, unsigned long val) { child->thread.debugctlmsr = val; if (child != current) return; #ifdef CONFIG_X86_64 wrmsrl(MSR_IA32_DEBUGCTLMSR, val); #else wrmsr(MSR_IA32_DEBUGCTLMSR, val, 0); #endif } /* * Enable single or block step. */ static void enable_step(struct task_struct *child, bool block) { /* * Make sure block stepping (BTF) is not enabled unless it should be. * Note that we don't try to worry about any is_setting_trap_flag() * instructions after the first when using block stepping. * So noone should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ if (enable_single_step(child) && block) { set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); write_debugctlmsr(child, child->thread.debugctlmsr | DEBUGCTLMSR_BTF); } else { write_debugctlmsr(child, child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); if (!child->thread.debugctlmsr) clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); } } void user_enable_single_step(struct task_struct *child) { enable_step(child, 0); } void user_enable_block_step(struct task_struct *child) { enable_step(child, 1); } void user_disable_single_step(struct task_struct *child) { /* * Make sure block stepping (BTF) is disabled. */ write_debugctlmsr(child, child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); if (!child->thread.debugctlmsr) clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); /* But touch TF only if it was set by us.. */ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; }