Add user mode execution with syscall support

- Add PIC driver to remap IRQs 0-15 to vectors 0x20-0x2F, preventing
  conflicts with CPU exception vectors (IRQ0/timer was hitting vector
  0x08/Double Fault)
- Extend GDT with user code/data segments (ring 3) and TSS for
  privilege level transitions
- Add INT 0x80 syscall handler supporting write() and exit() syscalls
- Configure IST1 for double fault handler to ensure reliable exception
  handling during privilege transitions
- Propagate USER flag through all intermediate page table entries
- Add jump_to_user() for transitioning to ring 3 via IRETQ

The kernel now successfully executes user-mode programs that can print
to serial output and exit cleanly via syscalls.
This commit is contained in:
wilkie
2025-12-28 01:59:56 -05:00
parent d87c7cc4bd
commit 4e76831fb0
8 changed files with 1133 additions and 64 deletions

View File

@@ -1,12 +1,29 @@
//! Global Descriptor Table (GDT) for x86-64 //! Global Descriptor Table (GDT) for x86-64
//! //!
//! This module provides a kernel-space GDT that can be used after //! This module provides the GDT with kernel and user segments, plus TSS.
//! identity mapping is removed. //!
//! GDT Layout:
//! - 0x00: Null descriptor
//! - 0x08: Kernel code segment (ring 0)
//! - 0x10: Kernel data segment (ring 0)
//! - 0x18: User data segment (ring 3)
//! - 0x20: User code segment (ring 3)
//! - 0x28: TSS descriptor (16 bytes, spans 0x28-0x37)
use core::arch::asm; use core::arch::asm;
use core::mem::size_of; use core::mem::size_of;
/// GDT entry (segment descriptor) /// Segment selectors
pub mod selectors {
pub const NULL: u16 = 0x00;
pub const KERNEL_CODE: u16 = 0x08;
pub const KERNEL_DATA: u16 = 0x10;
pub const USER_DATA: u16 = 0x18 | 3; // RPL 3
pub const USER_CODE: u16 = 0x20 | 3; // RPL 3
pub const TSS: u16 = 0x28;
}
/// GDT entry (segment descriptor) - 8 bytes
#[repr(C, packed)] #[repr(C, packed)]
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub struct GdtEntry { pub struct GdtEntry {
@@ -31,29 +48,133 @@ impl GdtEntry {
} }
} }
/// Create a 64-bit code segment descriptor /// Create a 64-bit kernel code segment
pub const fn code64() -> Self { pub const fn kernel_code() -> Self {
Self { Self {
limit_low: 0xFFFF, limit_low: 0xFFFF,
base_low: 0, base_low: 0,
base_mid: 0, base_mid: 0,
access: 0x9A, // Present, ring 0, code, exec/read access: 0x9A, // Present, DPL 0, code, exec/read
flags_limit_high: 0xAF, // 64-bit, limit high nibble flags_limit_high: 0xAF, // 64-bit, limit high
base_high: 0, base_high: 0,
} }
} }
/// Create a data segment descriptor /// Create a kernel data segment
pub const fn data() -> Self { pub const fn kernel_data() -> Self {
Self { Self {
limit_low: 0xFFFF, limit_low: 0xFFFF,
base_low: 0, base_low: 0,
base_mid: 0, base_mid: 0,
access: 0x92, // Present, ring 0, data, read/write access: 0x92, // Present, DPL 0, data, read/write
flags_limit_high: 0xCF, // 32-bit, 4KB granularity flags_limit_high: 0xCF, // 32-bit, 4KB granularity
base_high: 0, base_high: 0,
} }
} }
/// Create a 64-bit user code segment
pub const fn user_code() -> Self {
Self {
limit_low: 0xFFFF,
base_low: 0,
base_mid: 0,
access: 0xFA, // Present, DPL 3, code, exec/read
flags_limit_high: 0xAF, // 64-bit, limit high
base_high: 0,
}
}
/// Create a user data segment
pub const fn user_data() -> Self {
Self {
limit_low: 0xFFFF,
base_low: 0,
base_mid: 0,
access: 0xF2, // Present, DPL 3, data, read/write
flags_limit_high: 0xCF, // 32-bit, 4KB granularity
base_high: 0,
}
}
}
/// TSS descriptor (16 bytes in 64-bit mode)
#[repr(C, packed)]
#[derive(Clone, Copy)]
pub struct TssDescriptor {
limit_low: u16,
base_low: u16,
base_mid: u8,
access: u8,
flags_limit_high: u8,
base_high: u8,
base_upper: u32,
reserved: u32,
}
impl TssDescriptor {
pub const fn null() -> Self {
Self {
limit_low: 0,
base_low: 0,
base_mid: 0,
access: 0,
flags_limit_high: 0,
base_high: 0,
base_upper: 0,
reserved: 0,
}
}
/// Create a TSS descriptor for the given TSS address and size
pub fn new(base: u64, limit: u32) -> Self {
Self {
limit_low: limit as u16,
base_low: base as u16,
base_mid: (base >> 16) as u8,
access: 0x89, // Present, 64-bit TSS (available)
flags_limit_high: ((limit >> 16) as u8) & 0x0F,
base_high: (base >> 24) as u8,
base_upper: (base >> 32) as u32,
reserved: 0,
}
}
}
/// Task State Segment (TSS) for x86-64
///
/// The TSS is used primarily for:
/// - RSP0: Stack to use when transitioning from ring 3 to ring 0
/// - IST: Interrupt Stack Table for specific interrupts
#[repr(C, packed)]
pub struct Tss {
reserved0: u32,
/// Stack pointers for privilege levels 0-2
pub rsp0: u64,
pub rsp1: u64,
pub rsp2: u64,
reserved1: u64,
/// Interrupt Stack Table (IST) entries 1-7
pub ist: [u64; 7],
reserved2: u64,
reserved3: u16,
/// I/O map base address
pub iopb: u16,
}
impl Tss {
pub const fn new() -> Self {
Self {
reserved0: 0,
rsp0: 0,
rsp1: 0,
rsp2: 0,
reserved1: 0,
ist: [0; 7],
reserved2: 0,
reserved3: 0,
iopb: size_of::<Tss>() as u16,
}
}
} }
/// GDT pointer for LGDT instruction /// GDT pointer for LGDT instruction
@@ -63,53 +184,83 @@ pub struct GdtPointer {
base: u64, base: u64,
} }
/// Number of GDT entries /// Combined GDT structure with all entries
const GDT_ENTRIES: usize = 3;
/// Kernel GDT with null, code, and data segments
#[repr(C, align(16))] #[repr(C, align(16))]
pub struct Gdt { pub struct Gdt {
entries: [GdtEntry; GDT_ENTRIES], null: GdtEntry,
kernel_code: GdtEntry,
kernel_data: GdtEntry,
user_data: GdtEntry,
user_code: GdtEntry,
tss: TssDescriptor,
} }
impl Gdt { impl Gdt {
pub const fn new() -> Self { pub const fn new() -> Self {
Self { Self {
entries: [ null: GdtEntry::null(),
GdtEntry::null(), // 0x00: Null descriptor kernel_code: GdtEntry::kernel_code(),
GdtEntry::code64(), // 0x08: Kernel code segment kernel_data: GdtEntry::kernel_data(),
GdtEntry::data(), // 0x10: Kernel data segment user_data: GdtEntry::user_data(),
], user_code: GdtEntry::user_code(),
tss: TssDescriptor::null(),
} }
} }
/// Set the TSS descriptor
pub fn set_tss(&mut self, base: u64, limit: u32) {
self.tss = TssDescriptor::new(base, limit);
}
} }
/// Static kernel GDT (in higher-half memory) use core::cell::UnsafeCell;
static KERNEL_GDT: Gdt = Gdt::new();
/// Reload the GDT with the kernel-space GDT struct SyncGdt(UnsafeCell<Gdt>);
unsafe impl Sync for SyncGdt {}
struct SyncTss(UnsafeCell<Tss>);
unsafe impl Sync for SyncTss {}
/// Static kernel GDT
static KERNEL_GDT: SyncGdt = SyncGdt(UnsafeCell::new(Gdt::new()));
/// Static kernel TSS
static KERNEL_TSS: SyncTss = SyncTss(UnsafeCell::new(Tss::new()));
/// Initialize the GDT with TSS
/// ///
/// This should be called before removing identity mapping to ensure /// This sets up the full GDT including user segments and TSS,
/// the GDT is accessible after the low memory is unmapped. /// then loads it into the CPU.
pub fn reload() { pub fn init(kernel_stack: u64) {
let gdt = unsafe { &mut *KERNEL_GDT.0.get() };
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
// Set up TSS with kernel stack for ring 0
tss.rsp0 = kernel_stack;
// Update GDT with TSS descriptor
let tss_addr = tss as *const Tss as u64;
let tss_limit = (size_of::<Tss>() - 1) as u32;
gdt.set_tss(tss_addr, tss_limit);
// Load GDT
let gdt_size = size_of::<Gdt>();
let pointer = GdtPointer { let pointer = GdtPointer {
limit: (size_of::<[GdtEntry; GDT_ENTRIES]>() - 1) as u16, limit: (gdt_size - 1) as u16,
base: KERNEL_GDT.entries.as_ptr() as u64, base: gdt as *const Gdt as u64,
}; };
unsafe { unsafe {
// Load new GDT
asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags)); asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags));
// Reload code segment by doing a far return // Reload code segment
// Push SS, RSP, RFLAGS, CS, RIP and do IRETQ
asm!( asm!(
"push 0x10", // SS "push 0x10", // SS (kernel data)
"push rsp", // RSP "push rsp", // RSP
"add qword ptr [rsp], 8", // Adjust for the push "add qword ptr [rsp], 8",
"pushfq", // RFLAGS "pushfq", // RFLAGS
"push 0x08", // CS "push 0x08", // CS (kernel code)
"lea rax, [rip + 2f]", // RIP (address of label 2) "lea rax, [rip + 2f]",
"push rax", "push rax",
"iretq", "iretq",
"2:", "2:",
@@ -119,19 +270,90 @@ pub fn reload() {
"mov es, ax", "mov es, ax",
"mov fs, ax", "mov fs, ax",
"mov gs, ax", "mov gs, ax",
// SS is already set by IRETQ out("rax") _,
options(preserves_flags)
);
// Load TSS
asm!(
"ltr {0:x}",
in(reg) selectors::TSS,
options(nostack, preserves_flags)
);
}
}
/// Reload the GDT (called before removing identity mapping)
///
/// This is the simpler reload that doesn't reinitialize TSS.
pub fn reload() {
let gdt = unsafe { &*KERNEL_GDT.0.get() };
let gdt_size = size_of::<Gdt>();
let pointer = GdtPointer {
limit: (gdt_size - 1) as u16,
base: gdt as *const Gdt as u64,
};
unsafe {
asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags));
// Reload segments
asm!(
"push 0x10",
"push rsp",
"add qword ptr [rsp], 8",
"pushfq",
"push 0x08",
"lea rax, [rip + 2f]",
"push rax",
"iretq",
"2:",
"mov ax, 0x10",
"mov ds, ax",
"mov es, ax",
"mov fs, ax",
"mov gs, ax",
out("rax") _, out("rax") _,
options(preserves_flags) options(preserves_flags)
); );
} }
} }
/// Update TSS RSP0 (kernel stack for ring transitions)
pub fn set_kernel_stack(stack: u64) {
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
tss.rsp0 = stack;
}
/// Set an IST (Interrupt Stack Table) entry
///
/// IST entries are numbered 1-7 (index 0-6 in the array).
/// These provide dedicated stacks for specific interrupt handlers.
pub fn set_ist(ist_index: u8, stack: u64) {
if ist_index == 0 || ist_index > 7 {
return; // Invalid index
}
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
tss.ist[(ist_index - 1) as usize] = stack;
}
/// Get the kernel code segment selector /// Get the kernel code segment selector
pub const fn kernel_cs() -> u16 { pub const fn kernel_cs() -> u16 {
0x08 selectors::KERNEL_CODE
} }
/// Get the kernel data segment selector /// Get the kernel data segment selector
pub const fn kernel_ds() -> u16 { pub const fn kernel_ds() -> u16 {
0x10 selectors::KERNEL_DATA
}
/// Get the user code segment selector
pub const fn user_cs() -> u16 {
selectors::USER_CODE
}
/// Get the user data segment selector
pub const fn user_ds() -> u16 {
selectors::USER_DATA
} }

View File

@@ -123,6 +123,17 @@ impl Idt {
0, // No IST 0, // No IST
); );
} }
/// Set an interrupt handler with custom DPL
pub fn set_handler_dpl(&mut self, vector: u8, handler: u64, gate_type: GateType, dpl: u8) {
self.entries[vector as usize] = IdtEntry::new(
handler,
0x08, // Kernel code segment
gate_type,
dpl,
0, // No IST
);
}
} }
/// Interrupt stack frame pushed by CPU on interrupt/exception /// Interrupt stack frame pushed by CPU on interrupt/exception
@@ -271,6 +282,49 @@ unsafe fn load_idt(idt: *const Idt) {
} }
} }
/// Set an interrupt handler (public interface)
///
/// # Arguments
/// * `vector` - Interrupt vector number (0-255)
/// * `handler` - Handler function address
/// * `gate_type` - Type of gate (Interrupt or Trap)
/// * `dpl` - Descriptor Privilege Level (0 = kernel only, 3 = user callable)
pub fn set_handler(vector: u8, handler: u64, gate_type: GateType, dpl: u8) {
let idt = unsafe { &mut *STATIC_IDT.0.get() };
idt.set_handler_dpl(vector, handler, gate_type, dpl);
}
/// Set an interrupt handler with IST (Interrupt Stack Table) support
///
/// # Arguments
/// * `vector` - Interrupt vector number (0-255)
/// * `handler` - Handler function address
/// * `gate_type` - Type of gate (Interrupt or Trap)
/// * `ist` - IST index (1-7), or 0 for no IST
pub fn set_handler_with_ist(vector: u8, handler: u64, gate_type: GateType, ist: u8) {
let idt = unsafe { &mut *STATIC_IDT.0.get() };
idt.entries[vector as usize] = IdtEntry::new(
handler,
0x08, // Kernel code segment
gate_type,
0, // DPL 0 (kernel)
ist,
);
}
/// Configure the double fault handler to use an IST entry
///
/// This must be called after init() and after the IST stack has been
/// set up in the TSS via gdt::set_ist().
pub fn set_double_fault_ist(ist: u8) {
set_handler_with_ist(
vectors::DOUBLE_FAULT,
double_fault_handler as *const () as u64,
GateType::Trap,
ist,
);
}
// ============================================================================ // ============================================================================
// Exception Handlers // Exception Handlers
// ============================================================================ // ============================================================================
@@ -404,7 +458,6 @@ extern "C" fn rust_exception_handler(state: &ExceptionState) {
exception_name(state.vector as u8), state.vector).ok(); exception_name(state.vector as u8), state.vector).ok();
writeln!(serial, " Error code: {:#x}", state.error_code).ok(); writeln!(serial, " Error code: {:#x}", state.error_code).ok();
writeln!(serial, " RIP: {:#x} CS: {:#x}", state.rip, state.cs).ok(); writeln!(serial, " RIP: {:#x} CS: {:#x}", state.rip, state.cs).ok();
writeln!(serial, " RSP: {:#x} SS: {:#x}", state.rsp, state.ss).ok();
writeln!(serial, " RFLAGS: {:#x}", state.rflags).ok(); writeln!(serial, " RFLAGS: {:#x}", state.rflags).ok();
writeln!(serial, " RAX: {:#018x} RBX: {:#018x}", state.rax, state.rbx).ok(); writeln!(serial, " RAX: {:#018x} RBX: {:#018x}", state.rax, state.rbx).ok();
writeln!(serial, " RCX: {:#018x} RDX: {:#018x}", state.rcx, state.rdx).ok(); writeln!(serial, " RCX: {:#018x} RDX: {:#018x}", state.rcx, state.rdx).ok();

View File

@@ -1,7 +1,9 @@
//! x86_64 architecture support //! x86_64 architecture support
pub mod interrupts;
pub mod gdt; pub mod gdt;
pub mod interrupts;
pub mod pic;
pub mod syscall;
/// Halt the CPU until the next interrupt /// Halt the CPU until the next interrupt
#[inline] #[inline]

244
src/arch/x86_64/pic.rs Normal file
View File

@@ -0,0 +1,244 @@
//! 8259 Programmable Interrupt Controller (PIC) driver
//!
//! The legacy PIC must be properly configured even if we plan to use the APIC,
//! because its default IRQ mappings (0x08-0x0F, 0x70-0x77) conflict with CPU
//! exception vectors:
//!
//! - IRQ0 (Timer) -> Vector 0x08 (Double Fault!)
//! - IRQ1 (Keyboard) -> Vector 0x09
//! - IRQ7 (Spurious) -> Vector 0x0F
//!
//! This module remaps the PICs to vectors 0x20-0x2F and provides control
//! over interrupt masking.
use core::arch::asm;
/// PIC1 (master) command port
const PIC1_CMD: u16 = 0x20;
/// PIC1 (master) data port
const PIC1_DATA: u16 = 0x21;
/// PIC2 (slave) command port
const PIC2_CMD: u16 = 0xA0;
/// PIC2 (slave) data port
const PIC2_DATA: u16 = 0xA1;
/// ICW1: Initialization Command Word 1
const ICW1_INIT: u8 = 0x10;
const ICW1_ICW4: u8 = 0x01; // ICW4 needed
/// ICW4: Initialization Command Word 4
const ICW4_8086: u8 = 0x01; // 8086/88 mode
/// Vector offset for PIC1 IRQs (IRQ0-7 -> vectors 0x20-0x27)
pub const PIC1_OFFSET: u8 = 0x20;
/// Vector offset for PIC2 IRQs (IRQ8-15 -> vectors 0x28-0x2F)
pub const PIC2_OFFSET: u8 = 0x28;
/// IRQ numbers
pub mod irq {
pub const TIMER: u8 = 0;
pub const KEYBOARD: u8 = 1;
pub const CASCADE: u8 = 2; // Used internally for PIC1-PIC2 cascade
pub const COM2: u8 = 3;
pub const COM1: u8 = 4;
pub const LPT2: u8 = 5;
pub const FLOPPY: u8 = 6;
pub const LPT1: u8 = 7; // Also spurious IRQ
pub const RTC: u8 = 8;
pub const FREE1: u8 = 9;
pub const FREE2: u8 = 10;
pub const FREE3: u8 = 11;
pub const MOUSE: u8 = 12;
pub const FPU: u8 = 13;
pub const ATA_PRIMARY: u8 = 14;
pub const ATA_SECONDARY: u8 = 15;
}
/// Write a byte to an I/O port
#[inline]
unsafe fn outb(port: u16, value: u8) {
unsafe {
asm!(
"out dx, al",
in("dx") port,
in("al") value,
options(nostack, nomem, preserves_flags)
);
}
}
/// Read a byte from an I/O port
#[inline]
unsafe fn inb(port: u16) -> u8 {
let value: u8;
unsafe {
asm!(
"in al, dx",
in("dx") port,
out("al") value,
options(nostack, nomem, preserves_flags)
);
}
value
}
/// Small I/O delay for PIC timing requirements
#[inline]
unsafe fn io_wait() {
// Write to an unused port to create a small delay
// Port 0x80 is used for POST codes and is safe to write to
unsafe { outb(0x80, 0); }
}
/// Initialize and remap both PICs
///
/// This remaps:
/// - PIC1 (IRQ 0-7) to vectors 0x20-0x27
/// - PIC2 (IRQ 8-15) to vectors 0x28-0x2F
///
/// After initialization, all IRQs are masked (disabled).
pub fn init() {
unsafe {
// Save current masks
let mask1 = inb(PIC1_DATA);
let mask2 = inb(PIC2_DATA);
// ICW1: Start initialization sequence (cascade mode, ICW4 needed)
outb(PIC1_CMD, ICW1_INIT | ICW1_ICW4);
io_wait();
outb(PIC2_CMD, ICW1_INIT | ICW1_ICW4);
io_wait();
// ICW2: Set vector offsets
outb(PIC1_DATA, PIC1_OFFSET);
io_wait();
outb(PIC2_DATA, PIC2_OFFSET);
io_wait();
// ICW3: Configure cascade
// Tell PIC1 that PIC2 is at IRQ2 (bit 2 = 0x04)
outb(PIC1_DATA, 0x04);
io_wait();
// Tell PIC2 its cascade identity (IRQ2 = 2)
outb(PIC2_DATA, 0x02);
io_wait();
// ICW4: Set 8086 mode
outb(PIC1_DATA, ICW4_8086);
io_wait();
outb(PIC2_DATA, ICW4_8086);
io_wait();
// Mask all interrupts (we'll unmask specific ones as needed)
outb(PIC1_DATA, 0xFF);
outb(PIC2_DATA, 0xFF);
// Note: We intentionally mask all interrupts rather than restoring
// the old masks, since we want to start with a clean slate
let _ = (mask1, mask2); // Suppress unused warning
}
}
/// Disable the PIC entirely by masking all interrupts
///
/// This is useful when transitioning to APIC mode.
pub fn disable() {
unsafe {
outb(PIC1_DATA, 0xFF);
outb(PIC2_DATA, 0xFF);
}
}
/// Mask (disable) a specific IRQ
pub fn mask_irq(irq: u8) {
let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA };
let irq_bit = if irq < 8 { irq } else { irq - 8 };
unsafe {
let mask = inb(port) | (1 << irq_bit);
outb(port, mask);
}
}
/// Unmask (enable) a specific IRQ
pub fn unmask_irq(irq: u8) {
let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA };
let irq_bit = if irq < 8 { irq } else { irq - 8 };
unsafe {
let mask = inb(port) & !(1 << irq_bit);
outb(port, mask);
}
// If unmasking an IRQ on PIC2, also unmask the cascade IRQ on PIC1
if irq >= 8 {
unsafe {
let mask = inb(PIC1_DATA) & !(1 << irq::CASCADE);
outb(PIC1_DATA, mask);
}
}
}
/// Send End-Of-Interrupt (EOI) signal
///
/// This must be called at the end of an IRQ handler to acknowledge
/// the interrupt and allow further interrupts.
pub fn send_eoi(irq: u8) {
const EOI: u8 = 0x20;
unsafe {
// If IRQ came from PIC2, send EOI to both PICs
if irq >= 8 {
outb(PIC2_CMD, EOI);
}
outb(PIC1_CMD, EOI);
}
}
/// Check if an IRQ is a spurious IRQ
///
/// Spurious IRQs (IRQ7 or IRQ15) can occur due to electrical noise
/// or race conditions. They should be checked before handling.
pub fn is_spurious(irq: u8) -> bool {
const ISR_READ: u8 = 0x0B;
if irq == 7 {
// Check PIC1's In-Service Register
unsafe {
outb(PIC1_CMD, ISR_READ);
let isr = inb(PIC1_CMD);
// If bit 7 is not set, it's spurious
return (isr & 0x80) == 0;
}
} else if irq == 15 {
// Check PIC2's In-Service Register
unsafe {
outb(PIC2_CMD, ISR_READ);
let isr = inb(PIC2_CMD);
// If bit 7 is not set, it's spurious
if (isr & 0x80) == 0 {
// Still need to send EOI to PIC1 (for cascade)
outb(PIC1_CMD, 0x20);
return true;
}
}
}
false
}
/// Get the current IRQ mask for both PICs
pub fn get_mask() -> u16 {
unsafe {
let mask1 = inb(PIC1_DATA) as u16;
let mask2 = inb(PIC2_DATA) as u16;
mask1 | (mask2 << 8)
}
}
/// Set the IRQ mask for both PICs
pub fn set_mask(mask: u16) {
unsafe {
outb(PIC1_DATA, mask as u8);
outb(PIC2_DATA, (mask >> 8) as u8);
}
}

232
src/arch/x86_64/syscall.rs Normal file
View File

@@ -0,0 +1,232 @@
//! System Call Interface for x86-64
//!
//! This module implements system calls using INT 0x80.
//! This is simpler than SYSCALL/SYSRET and uses the existing IDT infrastructure.
//!
//! Syscall Convention:
//! - RAX = syscall number
//! - RDI, RSI, RDX, R10, R8, R9 = arguments
//! - RAX = return value
use core::arch::asm;
use crate::arch::x86_64::gdt;
/// System call interrupt vector
pub const SYSCALL_VECTOR: u8 = 0x80;
/// System call numbers
pub mod numbers {
pub const EXIT: u64 = 0;
pub const WRITE: u64 = 1;
pub const YIELD: u64 = 2;
pub const GETPID: u64 = 3;
}
/// Initialize the syscall interface
///
/// This adds the INT 0x80 handler to the IDT.
pub fn init() {
use crate::arch::x86_64::interrupts::{GateType, set_handler};
// Set up INT 0x80 as a trap gate with DPL 3 (user-callable)
set_handler(SYSCALL_VECTOR, syscall_entry as *const () as u64, GateType::Trap, 3);
}
/// Syscall entry point
///
/// This is registered as the INT 0x80 handler.
/// Stack on entry (pushed by CPU):
/// - SS, RSP, RFLAGS, CS, RIP (if from ring 3)
/// - Error code (none for INT)
#[unsafe(naked)]
extern "C" fn syscall_entry() {
core::arch::naked_asm!(
// No error code for software interrupts
// Save all registers
"push rax",
"push rbx",
"push rcx",
"push rdx",
"push rsi",
"push rdi",
"push rbp",
"push r8",
"push r9",
"push r10",
"push r11",
"push r12",
"push r13",
"push r14",
"push r15",
// Call Rust syscall handler
// First arg (RDI) = pointer to saved state
"mov rdi, rsp",
"call {handler}",
// Return value is in RAX, save it to the stack frame
"mov [rsp + 14*8], rax", // Overwrite saved RAX
// Restore all registers
"pop r15",
"pop r14",
"pop r13",
"pop r12",
"pop r11",
"pop r10",
"pop r9",
"pop r8",
"pop rbp",
"pop rdi",
"pop rsi",
"pop rdx",
"pop rcx",
"pop rbx",
"pop rax",
// Return from interrupt
"iretq",
handler = sym syscall_handler_rust,
);
}
/// Saved register state for syscall
#[repr(C)]
pub struct SyscallFrame {
pub r15: u64,
pub r14: u64,
pub r13: u64,
pub r12: u64,
pub r11: u64,
pub r10: u64,
pub r9: u64,
pub r8: u64,
pub rbp: u64,
pub rdi: u64,
pub rsi: u64,
pub rdx: u64,
pub rcx: u64,
pub rbx: u64,
pub rax: u64,
// CPU-pushed
pub rip: u64,
pub cs: u64,
pub rflags: u64,
pub rsp: u64,
pub ss: u64,
}
/// Rust syscall handler
extern "C" fn syscall_handler_rust(frame: &SyscallFrame) -> u64 {
let num = frame.rax;
let arg1 = frame.rdi;
let arg2 = frame.rsi;
let arg3 = frame.rdx;
let arg4 = frame.r10;
let arg5 = frame.r8;
match num {
numbers::EXIT => {
use crate::serial::SerialPort;
use core::fmt::Write;
let mut serial = unsafe { SerialPort::new(0x3F8) };
writeln!(serial, "\n[SYSCALL] exit({})", arg1).ok();
// For now, just halt. In a real OS, we'd terminate the process
// and schedule another one.
loop {
unsafe { asm!("cli; hlt", options(nostack, nomem)); }
}
}
numbers::WRITE => {
// write(fd, buf, len) -> bytes_written
if arg1 == 1 || arg1 == 2 {
// stdout or stderr -> serial
use crate::serial::SerialPort;
let mut serial = unsafe { SerialPort::new(0x3F8) };
let buf = arg2 as *const u8;
let len = arg3 as usize;
// Safety: we trust the user buffer for now
// In a real OS, we'd validate it's in user memory
for i in 0..len {
let c = unsafe { *buf.add(i) };
serial.write_byte(c);
}
len as u64
} else {
u64::MAX // -1 = error
}
}
numbers::YIELD => {
// No-op for single process kernel
0
}
numbers::GETPID => {
// Return current process ID
crate::process::current().pid as u64
}
_ => {
// Unknown syscall
use crate::serial::SerialPort;
use core::fmt::Write;
let mut serial = unsafe { SerialPort::new(0x3F8) };
writeln!(serial, "[SYSCALL] Unknown syscall: {}", num).ok();
u64::MAX
}
}
}
/// Make a syscall from user mode (for testing)
#[inline]
pub unsafe fn syscall0(num: u64) -> u64 {
let ret: u64;
unsafe {
asm!(
"int 0x80",
in("rax") num,
lateout("rax") ret,
options(nostack)
);
}
ret
}
#[inline]
pub unsafe fn syscall1(num: u64, arg1: u64) -> u64 {
let ret: u64;
unsafe {
asm!(
"int 0x80",
in("rax") num,
in("rdi") arg1,
lateout("rax") ret,
options(nostack)
);
}
ret
}
#[inline]
pub unsafe fn syscall3(num: u64, arg1: u64, arg2: u64, arg3: u64) -> u64 {
let ret: u64;
unsafe {
asm!(
"int 0x80",
in("rax") num,
in("rdi") arg1,
in("rsi") arg2,
in("rdx") arg3,
lateout("rax") ret,
options(nostack)
);
}
ret
}

View File

@@ -109,8 +109,15 @@ pub fn kernel_init(info: &BootInfo) -> ! {
// Get serial port for output // Get serial port for output
let mut serial = unsafe { SerialPort::new(0x3F8) }; let mut serial = unsafe { SerialPort::new(0x3F8) };
// CRITICAL: Initialize and remap the PIC first!
// The legacy PIC's default IRQ0 (timer) maps to vector 0x08, which conflicts
// with the Double Fault exception. This causes spurious "double faults" when
// the timer fires. We remap the PIC to vectors 0x20-0x2F and mask all IRQs.
arch::x86_64::pic::init();
writeln!(serial, "").ok(); writeln!(serial, "").ok();
writeln!(serial, ">>> Entering kernel_init()").ok(); writeln!(serial, ">>> Entering kernel_init()").ok();
writeln!(serial, " PIC remapped and masked").ok();
writeln!(serial, " Boot method: {:?}", info.boot_method).ok(); writeln!(serial, " Boot method: {:?}", info.boot_method).ok();
// Report memory information from boot // Report memory information from boot
@@ -295,11 +302,52 @@ pub fn kernel_init(info: &BootInfo) -> ! {
} }
} }
// Reload GDT to higher-half address before removing identity mapping // Initialize GDT with TSS for user mode support
// We need a kernel stack for ring 0 transitions from ring 3
writeln!(serial, "").ok(); writeln!(serial, "").ok();
writeln!(serial, ">>> Reloading GDT to higher-half...").ok(); writeln!(serial, ">>> Initializing GDT with TSS...").ok();
arch::x86_64::gdt::reload();
writeln!(serial, " GDT reloaded").ok(); // Allocate a kernel stack for syscall/interrupt handling from user mode
// We'll use 4 pages (16KB) for the kernel stack
let kernel_stack_base = VirtAddr::new(0xFFFFFE8000010000); // In temp region
// Allocate and map 4 pages for the kernel stack
for i in 0..4 {
let frame = memory::frame::allocate_frame().expect("Failed to allocate kernel stack");
let page_virt = VirtAddr::new(kernel_stack_base.as_u64() + (i * 0x1000) as u64);
memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA)
.expect("Failed to map kernel stack");
}
// Stack grows down, so point to top of the 4-page region
let kernel_stack_top = kernel_stack_base.as_u64() + 0x4000;
arch::x86_64::gdt::init(kernel_stack_top);
writeln!(serial, " GDT with TSS initialized").ok();
writeln!(serial, " Kernel stack at {:#x}", kernel_stack_top).ok();
// Initialize syscall interface (INT 0x80)
writeln!(serial, "").ok();
writeln!(serial, ">>> Initializing syscall interface...").ok();
arch::x86_64::syscall::init();
writeln!(serial, " INT 0x80 syscall handler installed").ok();
// Set up a dedicated stack for double fault handling (IST1)
// This ensures the double fault handler has a known-good stack even if
// the main stack is corrupted (e.g., during failed privilege transitions)
writeln!(serial, "").ok();
writeln!(serial, ">>> Setting up IST for double fault...").ok();
let ist1_stack_base = VirtAddr::new(0xFFFFFE8000020000); // Separate from kernel stack
// Allocate 4 pages (16KB) - must be enough for exception frame + handler execution
for i in 0..4 {
let frame = memory::frame::allocate_frame().expect("Failed to allocate IST1 stack");
let page_virt = VirtAddr::new(ist1_stack_base.as_u64() + (i * 0x1000) as u64);
memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA)
.expect("Failed to map IST1 stack");
}
let ist1_stack_top = ist1_stack_base.as_u64() + 0x4000; // 16KB stack
arch::x86_64::gdt::set_ist(1, ist1_stack_top);
arch::x86_64::interrupts::set_double_fault_ist(1);
writeln!(serial, " IST1 (double fault) stack at {:#x}", ist1_stack_top).ok();
// Remove identity mapping - no longer needed now that we're in higher-half // Remove identity mapping - no longer needed now that we're in higher-half
writeln!(serial, "").ok(); writeln!(serial, "").ok();
@@ -314,15 +362,162 @@ pub fn kernel_init(info: &BootInfo) -> ! {
writeln!(serial, " Identity mapping removed (PML4[0] cleared)").ok(); writeln!(serial, " Identity mapping removed (PML4[0] cleared)").ok();
} }
// Test user-mode execution
writeln!(serial, "").ok(); writeln!(serial, "").ok();
writeln!(serial, "Kernel initialization complete.").ok(); writeln!(serial, ">>> Testing user-mode execution...").ok();
writeln!(serial, "Halting CPU.").ok();
// Halt the CPU // Create a new process for user mode test
loop { let user_pid = process::create().expect("Failed to create user process");
unsafe { writeln!(serial, " Created user process {}", user_pid).ok();
core::arch::asm!("cli; hlt", options(nostack, nomem));
// Get the process's page table for mapping user pages
let user_process = process::get(user_pid).unwrap();
writeln!(serial, " Process page table: {:#x}", user_process.page_table).ok();
// Allocate frames for user code and stack
let user_code_frame = memory::frame::allocate_frame().expect("Failed to allocate user code frame");
let user_stack_frame = memory::frame::allocate_frame().expect("Failed to allocate user stack frame");
// User virtual addresses (in low memory, user-accessible)
let user_code_virt = VirtAddr::new(0x400000); // 4MB - typical user code location
let user_stack_virt = VirtAddr::new(0x800000); // 8MB - user stack base
// First switch to the user process's address space to set up its mappings
unsafe { process::switch_address_space(user_pid).expect("Failed to switch to user address space"); }
// Verify kernel stacks are accessible in user address space
// (They should be, since we copy kernel PML4 entries during process creation)
writeln!(serial, " Verifying kernel stack mappings...").ok();
if let Some(phys) = memory::paging::translate(VirtAddr::new(kernel_stack_top - 8)) {
writeln!(serial, " Kernel stack: {:#x} -> {:#x}", kernel_stack_top - 8, phys).ok();
} else {
panic!("Kernel stack not mapped in user address space!");
} }
if let Some(phys) = memory::paging::translate(VirtAddr::new(ist1_stack_top - 8)) {
writeln!(serial, " IST1 stack: {:#x} -> {:#x}", ist1_stack_top - 8, phys).ok();
} else {
panic!("IST1 stack not mapped in user address space!");
}
// Map user code page (readable, executable, user-accessible)
memory::paging::map_4kb(user_code_virt, user_code_frame.start_address(), memory::paging::flags::USER_CODE)
.expect("Failed to map user code");
writeln!(serial, " Mapped user code at {:#x}", user_code_virt).ok();
// Map user stack page (readable, writable, user-accessible)
memory::paging::map_4kb(user_stack_virt, user_stack_frame.start_address(), memory::paging::flags::USER_DATA)
.expect("Failed to map user stack");
writeln!(serial, " Mapped user stack at {:#x}", user_stack_virt).ok();
// Write a simple user program that:
// 1. Calls write(1, "Hello from user mode!\n", 22)
// 2. Calls exit(0)
let user_code_ptr = user_code_virt.as_u64() as *mut u8;
let message = b"Hello from user mode!\n";
let message_offset = 64u64; // Place message after code
unsafe {
let code: &[u8] = &[
// mov rax, 1 (WRITE syscall)
0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00,
// mov rdi, 1 (fd = stdout)
0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00,
// lea rsi, [rip + message_offset] - we'll use absolute address instead
// mov rsi, 0x400040 (message address = code_base + 64)
0x48, 0xbe,
((user_code_virt.as_u64() + message_offset) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 8) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 16) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 24) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 32) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 40) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 48) & 0xFF) as u8,
(((user_code_virt.as_u64() + message_offset) >> 56) & 0xFF) as u8,
// mov rdx, 22 (length)
0x48, 0xc7, 0xc2, 0x16, 0x00, 0x00, 0x00,
// int 0x80
0xcd, 0x80,
// mov rax, 0 (EXIT syscall)
0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00,
// mov rdi, 0 (exit code)
0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00,
// int 0x80
0xcd, 0x80,
// hlt (should never reach here)
0xf4,
];
// Write the code
for (i, &byte) in code.iter().enumerate() {
core::ptr::write_volatile(user_code_ptr.add(i), byte);
}
// Write the message after the code
let message_ptr = user_code_ptr.add(message_offset as usize);
for (i, &byte) in message.iter().enumerate() {
core::ptr::write_volatile(message_ptr.add(i), byte);
}
}
writeln!(serial, " Wrote user program ({} bytes code + {} bytes data)", 52, message.len()).ok();
// User stack pointer (top of stack page)
let user_stack_top = user_stack_virt.as_u64() + 0x1000;
// First, let's test that user mode works by running code in kernel
// that verifies the segments are correct
writeln!(serial, "").ok();
writeln!(serial, ">>> Testing IRETQ mechanism with kernel mode...").ok();
// Test: Do a simple kernel-to-kernel IRETQ to verify the mechanism
unsafe {
core::arch::asm!(
// Push a simple return frame for kernel mode
"push 0x10", // SS (kernel data)
"push rsp", // RSP (current stack)
"add qword ptr [rsp], 8", // Adjust for the push
"pushfq", // RFLAGS
"push 0x08", // CS (kernel code)
"lea rax, [rip + 2f]", // RIP (label 2)
"push rax",
"iretq",
"2:",
out("rax") _,
options(nostack)
);
}
writeln!(serial, " Kernel IRETQ test passed!").ok();
// Debug: Print the GDT segment descriptor values
writeln!(serial, "").ok();
writeln!(serial, ">>> Verifying GDT entries...").ok();
let user_cs = arch::x86_64::gdt::user_cs();
let user_ds = arch::x86_64::gdt::user_ds();
writeln!(serial, " USER_CS selector: {:#x}", user_cs).ok();
writeln!(serial, " USER_DS selector: {:#x}", user_ds).ok();
// Test loading user data segment while in kernel mode
// This should work: loading DPL=3 segment with RPL=3 while CPL=0
writeln!(serial, " Testing user segment load in kernel mode...").ok();
unsafe {
core::arch::asm!(
"mov ax, {0:x}",
"mov ds, ax", // This might fail with GPF if segment is invalid
"mov ax, 0x10", // Restore kernel data segment
"mov ds, ax",
in(reg) user_ds as u64,
out("rax") _,
options(nostack, preserves_flags)
);
}
writeln!(serial, " User segment load test passed!").ok();
writeln!(serial, "").ok();
writeln!(serial, ">>> Jumping to user mode (ring 3)...").ok();
writeln!(serial, " Entry: {:#x}, Stack: {:#x}", user_code_virt, user_stack_top).ok();
// Jump to user mode! (This won't return)
unsafe {
process::jump_to_user(user_code_virt.as_u64(), user_stack_top);
} }
} }

View File

@@ -444,15 +444,18 @@ pub fn remove_identity_mapping() {
// ============================================================================ // ============================================================================
/// Ensure a PML4 entry exists, creating a PDPT if necessary /// Ensure a PML4 entry exists, creating a PDPT if necessary
fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> { fn ensure_pml4_entry(pml4_idx: usize, page_flags: u64) -> Result<(), PagingError> {
let entry = read_pml4(pml4_idx); let entry = read_pml4(pml4_idx);
if !entry.is_present() { if !entry.is_present() {
let frame = allocate_frame()?; let frame = allocate_frame()?;
let phys = frame.start_address(); let phys = frame.start_address();
// Link the new PDPT into the PML4 first // Link the new PDPT into the PML4 first
// Use only table flags (PRESENT | WRITABLE) for intermediate entries // For user pages, the USER bit must be set in all intermediate entries
let table_flags = flags::PRESENT | flags::WRITABLE; let mut table_flags = flags::PRESENT | flags::WRITABLE;
if page_flags & flags::USER != 0 {
table_flags |= flags::USER;
}
let new_entry = PageTableEntry::new(phys, table_flags); let new_entry = PageTableEntry::new(phys, table_flags);
write_pml4(pml4_idx, new_entry); write_pml4(pml4_idx, new_entry);
@@ -462,13 +465,18 @@ fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> {
// Zero the new page table via recursive mapping // Zero the new page table via recursive mapping
// Now that PML4[pml4_idx] is set, pdpt_table_addr gives us access // Now that PML4[pml4_idx] is set, pdpt_table_addr gives us access
zero_page_table(pdpt_table_addr(pml4_idx)); zero_page_table(pdpt_table_addr(pml4_idx));
} else if page_flags & flags::USER != 0 && !entry.is_user() {
// Existing entry needs USER bit added
let mut updated = entry;
updated.set_flags(entry.flags() | flags::USER);
write_pml4(pml4_idx, updated);
} }
Ok(()) Ok(())
} }
/// Ensure a PDPT entry exists, creating a PD if necessary /// Ensure a PDPT entry exists, creating a PD if necessary
fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(), PagingError> { fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, page_flags: u64) -> Result<(), PagingError> {
ensure_pml4_entry(pml4_idx, flags)?; ensure_pml4_entry(pml4_idx, page_flags)?;
let entry = read_pdpt(pml4_idx, pdpt_idx); let entry = read_pdpt(pml4_idx, pdpt_idx);
if entry.is_huge() { if entry.is_huge() {
@@ -479,8 +487,11 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(),
let phys = frame.start_address(); let phys = frame.start_address();
// Link the new PD into the PDPT first // Link the new PD into the PDPT first
// Use only table flags for intermediate entries // For user pages, the USER bit must be set in all intermediate entries
let table_flags = flags::PRESENT | flags::WRITABLE; let mut table_flags = flags::PRESENT | flags::WRITABLE;
if page_flags & flags::USER != 0 {
table_flags |= flags::USER;
}
let new_entry = PageTableEntry::new(phys, table_flags); let new_entry = PageTableEntry::new(phys, table_flags);
write_pdpt(pml4_idx, pdpt_idx, new_entry); write_pdpt(pml4_idx, pdpt_idx, new_entry);
@@ -489,13 +500,18 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(),
// Zero the new page table via recursive mapping // Zero the new page table via recursive mapping
zero_page_table(pd_table_addr(pml4_idx, pdpt_idx)); zero_page_table(pd_table_addr(pml4_idx, pdpt_idx));
} else if page_flags & flags::USER != 0 && !entry.is_user() {
// Existing entry needs USER bit added
let mut updated = entry;
updated.set_flags(entry.flags() | flags::USER);
write_pdpt(pml4_idx, pdpt_idx, updated);
} }
Ok(()) Ok(())
} }
/// Ensure a PD entry exists, creating a PT if necessary /// Ensure a PD entry exists, creating a PT if necessary
fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64) -> Result<(), PagingError> { fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, page_flags: u64) -> Result<(), PagingError> {
ensure_pdpt_entry(pml4_idx, pdpt_idx, flags)?; ensure_pdpt_entry(pml4_idx, pdpt_idx, page_flags)?;
let entry = read_pd(pml4_idx, pdpt_idx, pd_idx); let entry = read_pd(pml4_idx, pdpt_idx, pd_idx);
if entry.is_huge() { if entry.is_huge() {
@@ -506,8 +522,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64)
let phys = frame.start_address(); let phys = frame.start_address();
// Link the new PT into the PD first // Link the new PT into the PD first
// Use only table flags for intermediate entries // For user pages, the USER bit must be set in all intermediate entries
let table_flags = flags::PRESENT | flags::WRITABLE; let mut table_flags = flags::PRESENT | flags::WRITABLE;
if page_flags & flags::USER != 0 {
table_flags |= flags::USER;
}
let new_entry = PageTableEntry::new(phys, table_flags); let new_entry = PageTableEntry::new(phys, table_flags);
write_pd(pml4_idx, pdpt_idx, pd_idx, new_entry); write_pd(pml4_idx, pdpt_idx, pd_idx, new_entry);
@@ -516,6 +535,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64)
// Zero the new page table via recursive mapping // Zero the new page table via recursive mapping
zero_page_table(pt_table_addr(pml4_idx, pdpt_idx, pd_idx)); zero_page_table(pt_table_addr(pml4_idx, pdpt_idx, pd_idx));
} else if page_flags & flags::USER != 0 && !entry.is_user() {
// Existing entry needs USER bit added
let mut updated = entry;
updated.set_flags(entry.flags() | flags::USER);
write_pd(pml4_idx, pdpt_idx, pd_idx, updated);
} }
Ok(()) Ok(())
} }

View File

@@ -219,12 +219,19 @@ fn init_page_table(pml4_phys: PhysAddr) -> Result<(), ProcessError> {
} }
// Copy kernel-space entries (256-511) from current PML4 // Copy kernel-space entries (256-511) from current PML4
// These include the recursive mapping (510) and kernel mapping (511) // EXCEPT for entry 510 (recursive mapping) which needs to point to THIS PML4
for i in 256..512 { for i in 256..512 {
if i == 510 {
// Set recursive mapping to point to this new PML4 itself
// Use PRESENT | WRITABLE flags (same as kernel recursive mapping)
let self_ref_entry = pml4_phys.as_u64() | flags::PRESENT | flags::WRITABLE;
core::ptr::write_volatile(pml4_ptr.add(i), self_ref_entry);
} else {
let entry = paging::read_pml4(i); let entry = paging::read_pml4(i);
core::ptr::write_volatile(pml4_ptr.add(i), entry.bits()); core::ptr::write_volatile(pml4_ptr.add(i), entry.bits());
} }
} }
}
// Unmap the temporary mapping (don't free the frame - it's the new page table!) // Unmap the temporary mapping (don't free the frame - it's the new page table!)
// We need to manually clear the mapping without freeing // We need to manually clear the mapping without freeing
@@ -321,3 +328,93 @@ pub fn switch_to_kernel() {
); );
} }
} }
/// Jump to user mode (ring 3)
///
/// This uses IRETQ to transition from ring 0 to ring 3.
///
/// # Arguments
/// * `entry` - User code entry point
/// * `stack` - User stack pointer
///
/// # Safety
/// The entry point and stack must be valid mapped addresses in user space.
/// The current process's page table must have proper user mappings.
pub unsafe fn jump_to_user(entry: u64, stack: u64) -> ! {
use crate::arch::x86_64::gdt;
use crate::serial::SerialPort;
use core::fmt::Write;
let user_cs = gdt::user_cs() as u64;
let user_ds = gdt::user_ds() as u64;
// Debug: Print what we're about to push
let mut serial = unsafe { SerialPort::new(0x3F8) };
writeln!(serial, " IRETQ frame: SS={:#x} RSP={:#x} RFLAGS=0x202 CS={:#x} RIP={:#x}",
user_ds, stack, user_cs, entry).ok();
// Verify the entry point is mapped and accessible
use crate::memory::paging;
use crate::memory::VirtAddr;
let entry_virt = VirtAddr::new(entry);
if let Some((phys, size, flags)) = paging::get_mapping_info(entry_virt) {
writeln!(serial, " Entry mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok();
} else {
writeln!(serial, " WARNING: Entry point {:#x} is NOT MAPPED!", entry).ok();
}
// Verify the stack is mapped
let stack_virt = VirtAddr::new(stack - 8); // Stack will be decremented
if let Some((phys, size, flags)) = paging::get_mapping_info(stack_virt) {
writeln!(serial, " Stack mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok();
} else {
writeln!(serial, " WARNING: Stack {:#x} is NOT MAPPED!", stack).ok();
}
// Flush TLB to ensure all page table changes are visible
// This reloads CR3 which flushes the entire TLB
unsafe {
let cr3: u64;
core::arch::asm!("mov {}, cr3", out(reg) cr3, options(nostack, preserves_flags));
core::arch::asm!("mov cr3, {}", in(reg) cr3, options(nostack, preserves_flags));
}
// IRETQ expects the stack to contain (top to bottom):
// [RSP+0] RIP - last pushed, first popped
// [RSP+8] CS
// [RSP+16] RFLAGS
// [RSP+24] RSP
// [RSP+32] SS - first pushed, last popped
//
// Note: DS/ES/FS/GS must be set to valid user selectors before IRETQ
// when transitioning to ring 3. Using null (0) is valid in 64-bit mode.
unsafe {
core::arch::asm!(
// Set DS/ES/FS/GS to null using r11 - avoid clobbering input registers
"xor r11d, r11d",
"mov ds, r11w",
"mov es, r11w",
"mov fs, r11w",
"mov gs, r11w",
// Memory barrier to ensure all stores are complete
"mfence",
// Build IRETQ frame on stack
"push {user_ss}", // SS
"push {stack}", // RSP
"push 0x202", // RFLAGS (IF=1, reserved bit 1 = 1)
"push {user_cs}", // CS
"push {entry}", // RIP
// Jump to user mode
"iretq",
user_cs = in(reg) user_cs,
user_ss = in(reg) user_ds, // SS same as DS for user mode
entry = in(reg) entry,
stack = in(reg) stack,
options(noreturn)
);
}
}