diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 3c57607..0bff758 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -1,12 +1,29 @@ //! Global Descriptor Table (GDT) for x86-64 //! -//! This module provides a kernel-space GDT that can be used after -//! identity mapping is removed. +//! This module provides the GDT with kernel and user segments, plus TSS. +//! +//! GDT Layout: +//! - 0x00: Null descriptor +//! - 0x08: Kernel code segment (ring 0) +//! - 0x10: Kernel data segment (ring 0) +//! - 0x18: User data segment (ring 3) +//! - 0x20: User code segment (ring 3) +//! - 0x28: TSS descriptor (16 bytes, spans 0x28-0x37) use core::arch::asm; use core::mem::size_of; -/// GDT entry (segment descriptor) +/// Segment selectors +pub mod selectors { + pub const NULL: u16 = 0x00; + pub const KERNEL_CODE: u16 = 0x08; + pub const KERNEL_DATA: u16 = 0x10; + pub const USER_DATA: u16 = 0x18 | 3; // RPL 3 + pub const USER_CODE: u16 = 0x20 | 3; // RPL 3 + pub const TSS: u16 = 0x28; +} + +/// GDT entry (segment descriptor) - 8 bytes #[repr(C, packed)] #[derive(Clone, Copy)] pub struct GdtEntry { @@ -31,29 +48,133 @@ impl GdtEntry { } } - /// Create a 64-bit code segment descriptor - pub const fn code64() -> Self { + /// Create a 64-bit kernel code segment + pub const fn kernel_code() -> Self { Self { limit_low: 0xFFFF, base_low: 0, base_mid: 0, - access: 0x9A, // Present, ring 0, code, exec/read - flags_limit_high: 0xAF, // 64-bit, limit high nibble + access: 0x9A, // Present, DPL 0, code, exec/read + flags_limit_high: 0xAF, // 64-bit, limit high base_high: 0, } } - /// Create a data segment descriptor - pub const fn data() -> Self { + /// Create a kernel data segment + pub const fn kernel_data() -> Self { Self { limit_low: 0xFFFF, base_low: 0, base_mid: 0, - access: 0x92, // Present, ring 0, data, read/write + access: 0x92, // Present, DPL 0, data, read/write flags_limit_high: 0xCF, // 32-bit, 4KB granularity base_high: 0, } } + + /// Create a 64-bit user code segment + pub const fn user_code() -> Self { + Self { + limit_low: 0xFFFF, + base_low: 0, + base_mid: 0, + access: 0xFA, // Present, DPL 3, code, exec/read + flags_limit_high: 0xAF, // 64-bit, limit high + base_high: 0, + } + } + + /// Create a user data segment + pub const fn user_data() -> Self { + Self { + limit_low: 0xFFFF, + base_low: 0, + base_mid: 0, + access: 0xF2, // Present, DPL 3, data, read/write + flags_limit_high: 0xCF, // 32-bit, 4KB granularity + base_high: 0, + } + } +} + +/// TSS descriptor (16 bytes in 64-bit mode) +#[repr(C, packed)] +#[derive(Clone, Copy)] +pub struct TssDescriptor { + limit_low: u16, + base_low: u16, + base_mid: u8, + access: u8, + flags_limit_high: u8, + base_high: u8, + base_upper: u32, + reserved: u32, +} + +impl TssDescriptor { + pub const fn null() -> Self { + Self { + limit_low: 0, + base_low: 0, + base_mid: 0, + access: 0, + flags_limit_high: 0, + base_high: 0, + base_upper: 0, + reserved: 0, + } + } + + /// Create a TSS descriptor for the given TSS address and size + pub fn new(base: u64, limit: u32) -> Self { + Self { + limit_low: limit as u16, + base_low: base as u16, + base_mid: (base >> 16) as u8, + access: 0x89, // Present, 64-bit TSS (available) + flags_limit_high: ((limit >> 16) as u8) & 0x0F, + base_high: (base >> 24) as u8, + base_upper: (base >> 32) as u32, + reserved: 0, + } + } +} + +/// Task State Segment (TSS) for x86-64 +/// +/// The TSS is used primarily for: +/// - RSP0: Stack to use when transitioning from ring 3 to ring 0 +/// - IST: Interrupt Stack Table for specific interrupts +#[repr(C, packed)] +pub struct Tss { + reserved0: u32, + /// Stack pointers for privilege levels 0-2 + pub rsp0: u64, + pub rsp1: u64, + pub rsp2: u64, + reserved1: u64, + /// Interrupt Stack Table (IST) entries 1-7 + pub ist: [u64; 7], + reserved2: u64, + reserved3: u16, + /// I/O map base address + pub iopb: u16, +} + +impl Tss { + pub const fn new() -> Self { + Self { + reserved0: 0, + rsp0: 0, + rsp1: 0, + rsp2: 0, + reserved1: 0, + ist: [0; 7], + reserved2: 0, + reserved3: 0, + iopb: size_of::() as u16, + } + } } /// GDT pointer for LGDT instruction @@ -63,53 +184,83 @@ pub struct GdtPointer { base: u64, } -/// Number of GDT entries -const GDT_ENTRIES: usize = 3; - -/// Kernel GDT with null, code, and data segments +/// Combined GDT structure with all entries #[repr(C, align(16))] pub struct Gdt { - entries: [GdtEntry; GDT_ENTRIES], + null: GdtEntry, + kernel_code: GdtEntry, + kernel_data: GdtEntry, + user_data: GdtEntry, + user_code: GdtEntry, + tss: TssDescriptor, } impl Gdt { pub const fn new() -> Self { Self { - entries: [ - GdtEntry::null(), // 0x00: Null descriptor - GdtEntry::code64(), // 0x08: Kernel code segment - GdtEntry::data(), // 0x10: Kernel data segment - ], + null: GdtEntry::null(), + kernel_code: GdtEntry::kernel_code(), + kernel_data: GdtEntry::kernel_data(), + user_data: GdtEntry::user_data(), + user_code: GdtEntry::user_code(), + tss: TssDescriptor::null(), } } + + /// Set the TSS descriptor + pub fn set_tss(&mut self, base: u64, limit: u32) { + self.tss = TssDescriptor::new(base, limit); + } } -/// Static kernel GDT (in higher-half memory) -static KERNEL_GDT: Gdt = Gdt::new(); +use core::cell::UnsafeCell; -/// Reload the GDT with the kernel-space GDT +struct SyncGdt(UnsafeCell); +unsafe impl Sync for SyncGdt {} + +struct SyncTss(UnsafeCell); +unsafe impl Sync for SyncTss {} + +/// Static kernel GDT +static KERNEL_GDT: SyncGdt = SyncGdt(UnsafeCell::new(Gdt::new())); + +/// Static kernel TSS +static KERNEL_TSS: SyncTss = SyncTss(UnsafeCell::new(Tss::new())); + +/// Initialize the GDT with TSS /// -/// This should be called before removing identity mapping to ensure -/// the GDT is accessible after the low memory is unmapped. -pub fn reload() { +/// This sets up the full GDT including user segments and TSS, +/// then loads it into the CPU. +pub fn init(kernel_stack: u64) { + let gdt = unsafe { &mut *KERNEL_GDT.0.get() }; + let tss = unsafe { &mut *KERNEL_TSS.0.get() }; + + // Set up TSS with kernel stack for ring 0 + tss.rsp0 = kernel_stack; + + // Update GDT with TSS descriptor + let tss_addr = tss as *const Tss as u64; + let tss_limit = (size_of::() - 1) as u32; + gdt.set_tss(tss_addr, tss_limit); + + // Load GDT + let gdt_size = size_of::(); let pointer = GdtPointer { - limit: (size_of::<[GdtEntry; GDT_ENTRIES]>() - 1) as u16, - base: KERNEL_GDT.entries.as_ptr() as u64, + limit: (gdt_size - 1) as u16, + base: gdt as *const Gdt as u64, }; unsafe { - // Load new GDT asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags)); - // Reload code segment by doing a far return - // Push SS, RSP, RFLAGS, CS, RIP and do IRETQ + // Reload code segment asm!( - "push 0x10", // SS + "push 0x10", // SS (kernel data) "push rsp", // RSP - "add qword ptr [rsp], 8", // Adjust for the push + "add qword ptr [rsp], 8", "pushfq", // RFLAGS - "push 0x08", // CS - "lea rax, [rip + 2f]", // RIP (address of label 2) + "push 0x08", // CS (kernel code) + "lea rax, [rip + 2f]", "push rax", "iretq", "2:", @@ -119,19 +270,90 @@ pub fn reload() { "mov es, ax", "mov fs, ax", "mov gs, ax", - // SS is already set by IRETQ + out("rax") _, + options(preserves_flags) + ); + + // Load TSS + asm!( + "ltr {0:x}", + in(reg) selectors::TSS, + options(nostack, preserves_flags) + ); + } +} + +/// Reload the GDT (called before removing identity mapping) +/// +/// This is the simpler reload that doesn't reinitialize TSS. +pub fn reload() { + let gdt = unsafe { &*KERNEL_GDT.0.get() }; + + let gdt_size = size_of::(); + let pointer = GdtPointer { + limit: (gdt_size - 1) as u16, + base: gdt as *const Gdt as u64, + }; + + unsafe { + asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags)); + + // Reload segments + asm!( + "push 0x10", + "push rsp", + "add qword ptr [rsp], 8", + "pushfq", + "push 0x08", + "lea rax, [rip + 2f]", + "push rax", + "iretq", + "2:", + "mov ax, 0x10", + "mov ds, ax", + "mov es, ax", + "mov fs, ax", + "mov gs, ax", out("rax") _, options(preserves_flags) ); } } +/// Update TSS RSP0 (kernel stack for ring transitions) +pub fn set_kernel_stack(stack: u64) { + let tss = unsafe { &mut *KERNEL_TSS.0.get() }; + tss.rsp0 = stack; +} + +/// Set an IST (Interrupt Stack Table) entry +/// +/// IST entries are numbered 1-7 (index 0-6 in the array). +/// These provide dedicated stacks for specific interrupt handlers. +pub fn set_ist(ist_index: u8, stack: u64) { + if ist_index == 0 || ist_index > 7 { + return; // Invalid index + } + let tss = unsafe { &mut *KERNEL_TSS.0.get() }; + tss.ist[(ist_index - 1) as usize] = stack; +} + /// Get the kernel code segment selector pub const fn kernel_cs() -> u16 { - 0x08 + selectors::KERNEL_CODE } /// Get the kernel data segment selector pub const fn kernel_ds() -> u16 { - 0x10 + selectors::KERNEL_DATA +} + +/// Get the user code segment selector +pub const fn user_cs() -> u16 { + selectors::USER_CODE +} + +/// Get the user data segment selector +pub const fn user_ds() -> u16 { + selectors::USER_DATA } diff --git a/src/arch/x86_64/interrupts.rs b/src/arch/x86_64/interrupts.rs index a6dd88a..febb22e 100644 --- a/src/arch/x86_64/interrupts.rs +++ b/src/arch/x86_64/interrupts.rs @@ -123,6 +123,17 @@ impl Idt { 0, // No IST ); } + + /// Set an interrupt handler with custom DPL + pub fn set_handler_dpl(&mut self, vector: u8, handler: u64, gate_type: GateType, dpl: u8) { + self.entries[vector as usize] = IdtEntry::new( + handler, + 0x08, // Kernel code segment + gate_type, + dpl, + 0, // No IST + ); + } } /// Interrupt stack frame pushed by CPU on interrupt/exception @@ -271,6 +282,49 @@ unsafe fn load_idt(idt: *const Idt) { } } +/// Set an interrupt handler (public interface) +/// +/// # Arguments +/// * `vector` - Interrupt vector number (0-255) +/// * `handler` - Handler function address +/// * `gate_type` - Type of gate (Interrupt or Trap) +/// * `dpl` - Descriptor Privilege Level (0 = kernel only, 3 = user callable) +pub fn set_handler(vector: u8, handler: u64, gate_type: GateType, dpl: u8) { + let idt = unsafe { &mut *STATIC_IDT.0.get() }; + idt.set_handler_dpl(vector, handler, gate_type, dpl); +} + +/// Set an interrupt handler with IST (Interrupt Stack Table) support +/// +/// # Arguments +/// * `vector` - Interrupt vector number (0-255) +/// * `handler` - Handler function address +/// * `gate_type` - Type of gate (Interrupt or Trap) +/// * `ist` - IST index (1-7), or 0 for no IST +pub fn set_handler_with_ist(vector: u8, handler: u64, gate_type: GateType, ist: u8) { + let idt = unsafe { &mut *STATIC_IDT.0.get() }; + idt.entries[vector as usize] = IdtEntry::new( + handler, + 0x08, // Kernel code segment + gate_type, + 0, // DPL 0 (kernel) + ist, + ); +} + +/// Configure the double fault handler to use an IST entry +/// +/// This must be called after init() and after the IST stack has been +/// set up in the TSS via gdt::set_ist(). +pub fn set_double_fault_ist(ist: u8) { + set_handler_with_ist( + vectors::DOUBLE_FAULT, + double_fault_handler as *const () as u64, + GateType::Trap, + ist, + ); +} + // ============================================================================ // Exception Handlers // ============================================================================ @@ -404,7 +458,6 @@ extern "C" fn rust_exception_handler(state: &ExceptionState) { exception_name(state.vector as u8), state.vector).ok(); writeln!(serial, " Error code: {:#x}", state.error_code).ok(); writeln!(serial, " RIP: {:#x} CS: {:#x}", state.rip, state.cs).ok(); - writeln!(serial, " RSP: {:#x} SS: {:#x}", state.rsp, state.ss).ok(); writeln!(serial, " RFLAGS: {:#x}", state.rflags).ok(); writeln!(serial, " RAX: {:#018x} RBX: {:#018x}", state.rax, state.rbx).ok(); writeln!(serial, " RCX: {:#018x} RDX: {:#018x}", state.rcx, state.rdx).ok(); diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 753524f..6a8e862 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,7 +1,9 @@ //! x86_64 architecture support -pub mod interrupts; pub mod gdt; +pub mod interrupts; +pub mod pic; +pub mod syscall; /// Halt the CPU until the next interrupt #[inline] diff --git a/src/arch/x86_64/pic.rs b/src/arch/x86_64/pic.rs new file mode 100644 index 0000000..7343dd8 --- /dev/null +++ b/src/arch/x86_64/pic.rs @@ -0,0 +1,244 @@ +//! 8259 Programmable Interrupt Controller (PIC) driver +//! +//! The legacy PIC must be properly configured even if we plan to use the APIC, +//! because its default IRQ mappings (0x08-0x0F, 0x70-0x77) conflict with CPU +//! exception vectors: +//! +//! - IRQ0 (Timer) -> Vector 0x08 (Double Fault!) +//! - IRQ1 (Keyboard) -> Vector 0x09 +//! - IRQ7 (Spurious) -> Vector 0x0F +//! +//! This module remaps the PICs to vectors 0x20-0x2F and provides control +//! over interrupt masking. + +use core::arch::asm; + +/// PIC1 (master) command port +const PIC1_CMD: u16 = 0x20; +/// PIC1 (master) data port +const PIC1_DATA: u16 = 0x21; +/// PIC2 (slave) command port +const PIC2_CMD: u16 = 0xA0; +/// PIC2 (slave) data port +const PIC2_DATA: u16 = 0xA1; + +/// ICW1: Initialization Command Word 1 +const ICW1_INIT: u8 = 0x10; +const ICW1_ICW4: u8 = 0x01; // ICW4 needed + +/// ICW4: Initialization Command Word 4 +const ICW4_8086: u8 = 0x01; // 8086/88 mode + +/// Vector offset for PIC1 IRQs (IRQ0-7 -> vectors 0x20-0x27) +pub const PIC1_OFFSET: u8 = 0x20; +/// Vector offset for PIC2 IRQs (IRQ8-15 -> vectors 0x28-0x2F) +pub const PIC2_OFFSET: u8 = 0x28; + +/// IRQ numbers +pub mod irq { + pub const TIMER: u8 = 0; + pub const KEYBOARD: u8 = 1; + pub const CASCADE: u8 = 2; // Used internally for PIC1-PIC2 cascade + pub const COM2: u8 = 3; + pub const COM1: u8 = 4; + pub const LPT2: u8 = 5; + pub const FLOPPY: u8 = 6; + pub const LPT1: u8 = 7; // Also spurious IRQ + pub const RTC: u8 = 8; + pub const FREE1: u8 = 9; + pub const FREE2: u8 = 10; + pub const FREE3: u8 = 11; + pub const MOUSE: u8 = 12; + pub const FPU: u8 = 13; + pub const ATA_PRIMARY: u8 = 14; + pub const ATA_SECONDARY: u8 = 15; +} + +/// Write a byte to an I/O port +#[inline] +unsafe fn outb(port: u16, value: u8) { + unsafe { + asm!( + "out dx, al", + in("dx") port, + in("al") value, + options(nostack, nomem, preserves_flags) + ); + } +} + +/// Read a byte from an I/O port +#[inline] +unsafe fn inb(port: u16) -> u8 { + let value: u8; + unsafe { + asm!( + "in al, dx", + in("dx") port, + out("al") value, + options(nostack, nomem, preserves_flags) + ); + } + value +} + +/// Small I/O delay for PIC timing requirements +#[inline] +unsafe fn io_wait() { + // Write to an unused port to create a small delay + // Port 0x80 is used for POST codes and is safe to write to + unsafe { outb(0x80, 0); } +} + +/// Initialize and remap both PICs +/// +/// This remaps: +/// - PIC1 (IRQ 0-7) to vectors 0x20-0x27 +/// - PIC2 (IRQ 8-15) to vectors 0x28-0x2F +/// +/// After initialization, all IRQs are masked (disabled). +pub fn init() { + unsafe { + // Save current masks + let mask1 = inb(PIC1_DATA); + let mask2 = inb(PIC2_DATA); + + // ICW1: Start initialization sequence (cascade mode, ICW4 needed) + outb(PIC1_CMD, ICW1_INIT | ICW1_ICW4); + io_wait(); + outb(PIC2_CMD, ICW1_INIT | ICW1_ICW4); + io_wait(); + + // ICW2: Set vector offsets + outb(PIC1_DATA, PIC1_OFFSET); + io_wait(); + outb(PIC2_DATA, PIC2_OFFSET); + io_wait(); + + // ICW3: Configure cascade + // Tell PIC1 that PIC2 is at IRQ2 (bit 2 = 0x04) + outb(PIC1_DATA, 0x04); + io_wait(); + // Tell PIC2 its cascade identity (IRQ2 = 2) + outb(PIC2_DATA, 0x02); + io_wait(); + + // ICW4: Set 8086 mode + outb(PIC1_DATA, ICW4_8086); + io_wait(); + outb(PIC2_DATA, ICW4_8086); + io_wait(); + + // Mask all interrupts (we'll unmask specific ones as needed) + outb(PIC1_DATA, 0xFF); + outb(PIC2_DATA, 0xFF); + + // Note: We intentionally mask all interrupts rather than restoring + // the old masks, since we want to start with a clean slate + let _ = (mask1, mask2); // Suppress unused warning + } +} + +/// Disable the PIC entirely by masking all interrupts +/// +/// This is useful when transitioning to APIC mode. +pub fn disable() { + unsafe { + outb(PIC1_DATA, 0xFF); + outb(PIC2_DATA, 0xFF); + } +} + +/// Mask (disable) a specific IRQ +pub fn mask_irq(irq: u8) { + let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA }; + let irq_bit = if irq < 8 { irq } else { irq - 8 }; + + unsafe { + let mask = inb(port) | (1 << irq_bit); + outb(port, mask); + } +} + +/// Unmask (enable) a specific IRQ +pub fn unmask_irq(irq: u8) { + let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA }; + let irq_bit = if irq < 8 { irq } else { irq - 8 }; + + unsafe { + let mask = inb(port) & !(1 << irq_bit); + outb(port, mask); + } + + // If unmasking an IRQ on PIC2, also unmask the cascade IRQ on PIC1 + if irq >= 8 { + unsafe { + let mask = inb(PIC1_DATA) & !(1 << irq::CASCADE); + outb(PIC1_DATA, mask); + } + } +} + +/// Send End-Of-Interrupt (EOI) signal +/// +/// This must be called at the end of an IRQ handler to acknowledge +/// the interrupt and allow further interrupts. +pub fn send_eoi(irq: u8) { + const EOI: u8 = 0x20; + + unsafe { + // If IRQ came from PIC2, send EOI to both PICs + if irq >= 8 { + outb(PIC2_CMD, EOI); + } + outb(PIC1_CMD, EOI); + } +} + +/// Check if an IRQ is a spurious IRQ +/// +/// Spurious IRQs (IRQ7 or IRQ15) can occur due to electrical noise +/// or race conditions. They should be checked before handling. +pub fn is_spurious(irq: u8) -> bool { + const ISR_READ: u8 = 0x0B; + + if irq == 7 { + // Check PIC1's In-Service Register + unsafe { + outb(PIC1_CMD, ISR_READ); + let isr = inb(PIC1_CMD); + // If bit 7 is not set, it's spurious + return (isr & 0x80) == 0; + } + } else if irq == 15 { + // Check PIC2's In-Service Register + unsafe { + outb(PIC2_CMD, ISR_READ); + let isr = inb(PIC2_CMD); + // If bit 7 is not set, it's spurious + if (isr & 0x80) == 0 { + // Still need to send EOI to PIC1 (for cascade) + outb(PIC1_CMD, 0x20); + return true; + } + } + } + false +} + +/// Get the current IRQ mask for both PICs +pub fn get_mask() -> u16 { + unsafe { + let mask1 = inb(PIC1_DATA) as u16; + let mask2 = inb(PIC2_DATA) as u16; + mask1 | (mask2 << 8) + } +} + +/// Set the IRQ mask for both PICs +pub fn set_mask(mask: u16) { + unsafe { + outb(PIC1_DATA, mask as u8); + outb(PIC2_DATA, (mask >> 8) as u8); + } +} diff --git a/src/arch/x86_64/syscall.rs b/src/arch/x86_64/syscall.rs new file mode 100644 index 0000000..0aa1c3b --- /dev/null +++ b/src/arch/x86_64/syscall.rs @@ -0,0 +1,232 @@ +//! System Call Interface for x86-64 +//! +//! This module implements system calls using INT 0x80. +//! This is simpler than SYSCALL/SYSRET and uses the existing IDT infrastructure. +//! +//! Syscall Convention: +//! - RAX = syscall number +//! - RDI, RSI, RDX, R10, R8, R9 = arguments +//! - RAX = return value + +use core::arch::asm; +use crate::arch::x86_64::gdt; + +/// System call interrupt vector +pub const SYSCALL_VECTOR: u8 = 0x80; + +/// System call numbers +pub mod numbers { + pub const EXIT: u64 = 0; + pub const WRITE: u64 = 1; + pub const YIELD: u64 = 2; + pub const GETPID: u64 = 3; +} + +/// Initialize the syscall interface +/// +/// This adds the INT 0x80 handler to the IDT. +pub fn init() { + use crate::arch::x86_64::interrupts::{GateType, set_handler}; + + // Set up INT 0x80 as a trap gate with DPL 3 (user-callable) + set_handler(SYSCALL_VECTOR, syscall_entry as *const () as u64, GateType::Trap, 3); +} + +/// Syscall entry point +/// +/// This is registered as the INT 0x80 handler. +/// Stack on entry (pushed by CPU): +/// - SS, RSP, RFLAGS, CS, RIP (if from ring 3) +/// - Error code (none for INT) +#[unsafe(naked)] +extern "C" fn syscall_entry() { + core::arch::naked_asm!( + // No error code for software interrupts + // Save all registers + "push rax", + "push rbx", + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "push rbp", + "push r8", + "push r9", + "push r10", + "push r11", + "push r12", + "push r13", + "push r14", + "push r15", + + // Call Rust syscall handler + // First arg (RDI) = pointer to saved state + "mov rdi, rsp", + "call {handler}", + + // Return value is in RAX, save it to the stack frame + "mov [rsp + 14*8], rax", // Overwrite saved RAX + + // Restore all registers + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rbp", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "pop rbx", + "pop rax", + + // Return from interrupt + "iretq", + + handler = sym syscall_handler_rust, + ); +} + +/// Saved register state for syscall +#[repr(C)] +pub struct SyscallFrame { + pub r15: u64, + pub r14: u64, + pub r13: u64, + pub r12: u64, + pub r11: u64, + pub r10: u64, + pub r9: u64, + pub r8: u64, + pub rbp: u64, + pub rdi: u64, + pub rsi: u64, + pub rdx: u64, + pub rcx: u64, + pub rbx: u64, + pub rax: u64, + // CPU-pushed + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} + +/// Rust syscall handler +extern "C" fn syscall_handler_rust(frame: &SyscallFrame) -> u64 { + let num = frame.rax; + let arg1 = frame.rdi; + let arg2 = frame.rsi; + let arg3 = frame.rdx; + let arg4 = frame.r10; + let arg5 = frame.r8; + + match num { + numbers::EXIT => { + use crate::serial::SerialPort; + use core::fmt::Write; + let mut serial = unsafe { SerialPort::new(0x3F8) }; + writeln!(serial, "\n[SYSCALL] exit({})", arg1).ok(); + + // For now, just halt. In a real OS, we'd terminate the process + // and schedule another one. + loop { + unsafe { asm!("cli; hlt", options(nostack, nomem)); } + } + } + + numbers::WRITE => { + // write(fd, buf, len) -> bytes_written + if arg1 == 1 || arg1 == 2 { + // stdout or stderr -> serial + use crate::serial::SerialPort; + let mut serial = unsafe { SerialPort::new(0x3F8) }; + + let buf = arg2 as *const u8; + let len = arg3 as usize; + + // Safety: we trust the user buffer for now + // In a real OS, we'd validate it's in user memory + for i in 0..len { + let c = unsafe { *buf.add(i) }; + serial.write_byte(c); + } + + len as u64 + } else { + u64::MAX // -1 = error + } + } + + numbers::YIELD => { + // No-op for single process kernel + 0 + } + + numbers::GETPID => { + // Return current process ID + crate::process::current().pid as u64 + } + + _ => { + // Unknown syscall + use crate::serial::SerialPort; + use core::fmt::Write; + let mut serial = unsafe { SerialPort::new(0x3F8) }; + writeln!(serial, "[SYSCALL] Unknown syscall: {}", num).ok(); + u64::MAX + } + } +} + +/// Make a syscall from user mode (for testing) +#[inline] +pub unsafe fn syscall0(num: u64) -> u64 { + let ret: u64; + unsafe { + asm!( + "int 0x80", + in("rax") num, + lateout("rax") ret, + options(nostack) + ); + } + ret +} + +#[inline] +pub unsafe fn syscall1(num: u64, arg1: u64) -> u64 { + let ret: u64; + unsafe { + asm!( + "int 0x80", + in("rax") num, + in("rdi") arg1, + lateout("rax") ret, + options(nostack) + ); + } + ret +} + +#[inline] +pub unsafe fn syscall3(num: u64, arg1: u64, arg2: u64, arg3: u64) -> u64 { + let ret: u64; + unsafe { + asm!( + "int 0x80", + in("rax") num, + in("rdi") arg1, + in("rsi") arg2, + in("rdx") arg3, + lateout("rax") ret, + options(nostack) + ); + } + ret +} diff --git a/src/lib.rs b/src/lib.rs index 5105755..a0922a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -109,8 +109,15 @@ pub fn kernel_init(info: &BootInfo) -> ! { // Get serial port for output let mut serial = unsafe { SerialPort::new(0x3F8) }; + // CRITICAL: Initialize and remap the PIC first! + // The legacy PIC's default IRQ0 (timer) maps to vector 0x08, which conflicts + // with the Double Fault exception. This causes spurious "double faults" when + // the timer fires. We remap the PIC to vectors 0x20-0x2F and mask all IRQs. + arch::x86_64::pic::init(); + writeln!(serial, "").ok(); writeln!(serial, ">>> Entering kernel_init()").ok(); + writeln!(serial, " PIC remapped and masked").ok(); writeln!(serial, " Boot method: {:?}", info.boot_method).ok(); // Report memory information from boot @@ -295,11 +302,52 @@ pub fn kernel_init(info: &BootInfo) -> ! { } } - // Reload GDT to higher-half address before removing identity mapping + // Initialize GDT with TSS for user mode support + // We need a kernel stack for ring 0 transitions from ring 3 writeln!(serial, "").ok(); - writeln!(serial, ">>> Reloading GDT to higher-half...").ok(); - arch::x86_64::gdt::reload(); - writeln!(serial, " GDT reloaded").ok(); + writeln!(serial, ">>> Initializing GDT with TSS...").ok(); + + // Allocate a kernel stack for syscall/interrupt handling from user mode + // We'll use 4 pages (16KB) for the kernel stack + let kernel_stack_base = VirtAddr::new(0xFFFFFE8000010000); // In temp region + + // Allocate and map 4 pages for the kernel stack + for i in 0..4 { + let frame = memory::frame::allocate_frame().expect("Failed to allocate kernel stack"); + let page_virt = VirtAddr::new(kernel_stack_base.as_u64() + (i * 0x1000) as u64); + memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA) + .expect("Failed to map kernel stack"); + } + + // Stack grows down, so point to top of the 4-page region + let kernel_stack_top = kernel_stack_base.as_u64() + 0x4000; + arch::x86_64::gdt::init(kernel_stack_top); + writeln!(serial, " GDT with TSS initialized").ok(); + writeln!(serial, " Kernel stack at {:#x}", kernel_stack_top).ok(); + + // Initialize syscall interface (INT 0x80) + writeln!(serial, "").ok(); + writeln!(serial, ">>> Initializing syscall interface...").ok(); + arch::x86_64::syscall::init(); + writeln!(serial, " INT 0x80 syscall handler installed").ok(); + + // Set up a dedicated stack for double fault handling (IST1) + // This ensures the double fault handler has a known-good stack even if + // the main stack is corrupted (e.g., during failed privilege transitions) + writeln!(serial, "").ok(); + writeln!(serial, ">>> Setting up IST for double fault...").ok(); + let ist1_stack_base = VirtAddr::new(0xFFFFFE8000020000); // Separate from kernel stack + // Allocate 4 pages (16KB) - must be enough for exception frame + handler execution + for i in 0..4 { + let frame = memory::frame::allocate_frame().expect("Failed to allocate IST1 stack"); + let page_virt = VirtAddr::new(ist1_stack_base.as_u64() + (i * 0x1000) as u64); + memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA) + .expect("Failed to map IST1 stack"); + } + let ist1_stack_top = ist1_stack_base.as_u64() + 0x4000; // 16KB stack + arch::x86_64::gdt::set_ist(1, ist1_stack_top); + arch::x86_64::interrupts::set_double_fault_ist(1); + writeln!(serial, " IST1 (double fault) stack at {:#x}", ist1_stack_top).ok(); // Remove identity mapping - no longer needed now that we're in higher-half writeln!(serial, "").ok(); @@ -314,15 +362,162 @@ pub fn kernel_init(info: &BootInfo) -> ! { writeln!(serial, " Identity mapping removed (PML4[0] cleared)").ok(); } + // Test user-mode execution writeln!(serial, "").ok(); - writeln!(serial, "Kernel initialization complete.").ok(); - writeln!(serial, "Halting CPU.").ok(); + writeln!(serial, ">>> Testing user-mode execution...").ok(); - // Halt the CPU - loop { - unsafe { - core::arch::asm!("cli; hlt", options(nostack, nomem)); + // Create a new process for user mode test + let user_pid = process::create().expect("Failed to create user process"); + writeln!(serial, " Created user process {}", user_pid).ok(); + + // Get the process's page table for mapping user pages + let user_process = process::get(user_pid).unwrap(); + writeln!(serial, " Process page table: {:#x}", user_process.page_table).ok(); + + // Allocate frames for user code and stack + let user_code_frame = memory::frame::allocate_frame().expect("Failed to allocate user code frame"); + let user_stack_frame = memory::frame::allocate_frame().expect("Failed to allocate user stack frame"); + + // User virtual addresses (in low memory, user-accessible) + let user_code_virt = VirtAddr::new(0x400000); // 4MB - typical user code location + let user_stack_virt = VirtAddr::new(0x800000); // 8MB - user stack base + + // First switch to the user process's address space to set up its mappings + unsafe { process::switch_address_space(user_pid).expect("Failed to switch to user address space"); } + + // Verify kernel stacks are accessible in user address space + // (They should be, since we copy kernel PML4 entries during process creation) + writeln!(serial, " Verifying kernel stack mappings...").ok(); + if let Some(phys) = memory::paging::translate(VirtAddr::new(kernel_stack_top - 8)) { + writeln!(serial, " Kernel stack: {:#x} -> {:#x}", kernel_stack_top - 8, phys).ok(); + } else { + panic!("Kernel stack not mapped in user address space!"); + } + if let Some(phys) = memory::paging::translate(VirtAddr::new(ist1_stack_top - 8)) { + writeln!(serial, " IST1 stack: {:#x} -> {:#x}", ist1_stack_top - 8, phys).ok(); + } else { + panic!("IST1 stack not mapped in user address space!"); + } + + // Map user code page (readable, executable, user-accessible) + memory::paging::map_4kb(user_code_virt, user_code_frame.start_address(), memory::paging::flags::USER_CODE) + .expect("Failed to map user code"); + writeln!(serial, " Mapped user code at {:#x}", user_code_virt).ok(); + + // Map user stack page (readable, writable, user-accessible) + memory::paging::map_4kb(user_stack_virt, user_stack_frame.start_address(), memory::paging::flags::USER_DATA) + .expect("Failed to map user stack"); + writeln!(serial, " Mapped user stack at {:#x}", user_stack_virt).ok(); + + // Write a simple user program that: + // 1. Calls write(1, "Hello from user mode!\n", 22) + // 2. Calls exit(0) + let user_code_ptr = user_code_virt.as_u64() as *mut u8; + let message = b"Hello from user mode!\n"; + let message_offset = 64u64; // Place message after code + + unsafe { + let code: &[u8] = &[ + // mov rax, 1 (WRITE syscall) + 0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00, + // mov rdi, 1 (fd = stdout) + 0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00, + // lea rsi, [rip + message_offset] - we'll use absolute address instead + // mov rsi, 0x400040 (message address = code_base + 64) + 0x48, 0xbe, + ((user_code_virt.as_u64() + message_offset) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 8) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 16) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 24) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 32) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 40) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 48) & 0xFF) as u8, + (((user_code_virt.as_u64() + message_offset) >> 56) & 0xFF) as u8, + // mov rdx, 22 (length) + 0x48, 0xc7, 0xc2, 0x16, 0x00, 0x00, 0x00, + // int 0x80 + 0xcd, 0x80, + // mov rax, 0 (EXIT syscall) + 0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00, + // mov rdi, 0 (exit code) + 0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00, + // int 0x80 + 0xcd, 0x80, + // hlt (should never reach here) + 0xf4, + ]; + + // Write the code + for (i, &byte) in code.iter().enumerate() { + core::ptr::write_volatile(user_code_ptr.add(i), byte); } + + // Write the message after the code + let message_ptr = user_code_ptr.add(message_offset as usize); + for (i, &byte) in message.iter().enumerate() { + core::ptr::write_volatile(message_ptr.add(i), byte); + } + } + writeln!(serial, " Wrote user program ({} bytes code + {} bytes data)", 52, message.len()).ok(); + + // User stack pointer (top of stack page) + let user_stack_top = user_stack_virt.as_u64() + 0x1000; + + // First, let's test that user mode works by running code in kernel + // that verifies the segments are correct + writeln!(serial, "").ok(); + writeln!(serial, ">>> Testing IRETQ mechanism with kernel mode...").ok(); + + // Test: Do a simple kernel-to-kernel IRETQ to verify the mechanism + unsafe { + core::arch::asm!( + // Push a simple return frame for kernel mode + "push 0x10", // SS (kernel data) + "push rsp", // RSP (current stack) + "add qword ptr [rsp], 8", // Adjust for the push + "pushfq", // RFLAGS + "push 0x08", // CS (kernel code) + "lea rax, [rip + 2f]", // RIP (label 2) + "push rax", + "iretq", + "2:", + out("rax") _, + options(nostack) + ); + } + writeln!(serial, " Kernel IRETQ test passed!").ok(); + + // Debug: Print the GDT segment descriptor values + writeln!(serial, "").ok(); + writeln!(serial, ">>> Verifying GDT entries...").ok(); + let user_cs = arch::x86_64::gdt::user_cs(); + let user_ds = arch::x86_64::gdt::user_ds(); + writeln!(serial, " USER_CS selector: {:#x}", user_cs).ok(); + writeln!(serial, " USER_DS selector: {:#x}", user_ds).ok(); + + // Test loading user data segment while in kernel mode + // This should work: loading DPL=3 segment with RPL=3 while CPL=0 + writeln!(serial, " Testing user segment load in kernel mode...").ok(); + unsafe { + core::arch::asm!( + "mov ax, {0:x}", + "mov ds, ax", // This might fail with GPF if segment is invalid + "mov ax, 0x10", // Restore kernel data segment + "mov ds, ax", + in(reg) user_ds as u64, + out("rax") _, + options(nostack, preserves_flags) + ); + } + writeln!(serial, " User segment load test passed!").ok(); + + writeln!(serial, "").ok(); + writeln!(serial, ">>> Jumping to user mode (ring 3)...").ok(); + writeln!(serial, " Entry: {:#x}, Stack: {:#x}", user_code_virt, user_stack_top).ok(); + + // Jump to user mode! (This won't return) + unsafe { + process::jump_to_user(user_code_virt.as_u64(), user_stack_top); } } diff --git a/src/memory/paging.rs b/src/memory/paging.rs index cd5f1fe..e78a398 100644 --- a/src/memory/paging.rs +++ b/src/memory/paging.rs @@ -444,15 +444,18 @@ pub fn remove_identity_mapping() { // ============================================================================ /// Ensure a PML4 entry exists, creating a PDPT if necessary -fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> { +fn ensure_pml4_entry(pml4_idx: usize, page_flags: u64) -> Result<(), PagingError> { let entry = read_pml4(pml4_idx); if !entry.is_present() { let frame = allocate_frame()?; let phys = frame.start_address(); // Link the new PDPT into the PML4 first - // Use only table flags (PRESENT | WRITABLE) for intermediate entries - let table_flags = flags::PRESENT | flags::WRITABLE; + // For user pages, the USER bit must be set in all intermediate entries + let mut table_flags = flags::PRESENT | flags::WRITABLE; + if page_flags & flags::USER != 0 { + table_flags |= flags::USER; + } let new_entry = PageTableEntry::new(phys, table_flags); write_pml4(pml4_idx, new_entry); @@ -462,13 +465,18 @@ fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> { // Zero the new page table via recursive mapping // Now that PML4[pml4_idx] is set, pdpt_table_addr gives us access zero_page_table(pdpt_table_addr(pml4_idx)); + } else if page_flags & flags::USER != 0 && !entry.is_user() { + // Existing entry needs USER bit added + let mut updated = entry; + updated.set_flags(entry.flags() | flags::USER); + write_pml4(pml4_idx, updated); } Ok(()) } /// Ensure a PDPT entry exists, creating a PD if necessary -fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(), PagingError> { - ensure_pml4_entry(pml4_idx, flags)?; +fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, page_flags: u64) -> Result<(), PagingError> { + ensure_pml4_entry(pml4_idx, page_flags)?; let entry = read_pdpt(pml4_idx, pdpt_idx); if entry.is_huge() { @@ -479,8 +487,11 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(), let phys = frame.start_address(); // Link the new PD into the PDPT first - // Use only table flags for intermediate entries - let table_flags = flags::PRESENT | flags::WRITABLE; + // For user pages, the USER bit must be set in all intermediate entries + let mut table_flags = flags::PRESENT | flags::WRITABLE; + if page_flags & flags::USER != 0 { + table_flags |= flags::USER; + } let new_entry = PageTableEntry::new(phys, table_flags); write_pdpt(pml4_idx, pdpt_idx, new_entry); @@ -489,13 +500,18 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(), // Zero the new page table via recursive mapping zero_page_table(pd_table_addr(pml4_idx, pdpt_idx)); + } else if page_flags & flags::USER != 0 && !entry.is_user() { + // Existing entry needs USER bit added + let mut updated = entry; + updated.set_flags(entry.flags() | flags::USER); + write_pdpt(pml4_idx, pdpt_idx, updated); } Ok(()) } /// Ensure a PD entry exists, creating a PT if necessary -fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64) -> Result<(), PagingError> { - ensure_pdpt_entry(pml4_idx, pdpt_idx, flags)?; +fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, page_flags: u64) -> Result<(), PagingError> { + ensure_pdpt_entry(pml4_idx, pdpt_idx, page_flags)?; let entry = read_pd(pml4_idx, pdpt_idx, pd_idx); if entry.is_huge() { @@ -506,8 +522,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64) let phys = frame.start_address(); // Link the new PT into the PD first - // Use only table flags for intermediate entries - let table_flags = flags::PRESENT | flags::WRITABLE; + // For user pages, the USER bit must be set in all intermediate entries + let mut table_flags = flags::PRESENT | flags::WRITABLE; + if page_flags & flags::USER != 0 { + table_flags |= flags::USER; + } let new_entry = PageTableEntry::new(phys, table_flags); write_pd(pml4_idx, pdpt_idx, pd_idx, new_entry); @@ -516,6 +535,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64) // Zero the new page table via recursive mapping zero_page_table(pt_table_addr(pml4_idx, pdpt_idx, pd_idx)); + } else if page_flags & flags::USER != 0 && !entry.is_user() { + // Existing entry needs USER bit added + let mut updated = entry; + updated.set_flags(entry.flags() | flags::USER); + write_pd(pml4_idx, pdpt_idx, pd_idx, updated); } Ok(()) } diff --git a/src/process/mod.rs b/src/process/mod.rs index 19532e8..ba3d782 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -219,10 +219,17 @@ fn init_page_table(pml4_phys: PhysAddr) -> Result<(), ProcessError> { } // Copy kernel-space entries (256-511) from current PML4 - // These include the recursive mapping (510) and kernel mapping (511) + // EXCEPT for entry 510 (recursive mapping) which needs to point to THIS PML4 for i in 256..512 { - let entry = paging::read_pml4(i); - core::ptr::write_volatile(pml4_ptr.add(i), entry.bits()); + if i == 510 { + // Set recursive mapping to point to this new PML4 itself + // Use PRESENT | WRITABLE flags (same as kernel recursive mapping) + let self_ref_entry = pml4_phys.as_u64() | flags::PRESENT | flags::WRITABLE; + core::ptr::write_volatile(pml4_ptr.add(i), self_ref_entry); + } else { + let entry = paging::read_pml4(i); + core::ptr::write_volatile(pml4_ptr.add(i), entry.bits()); + } } } @@ -321,3 +328,93 @@ pub fn switch_to_kernel() { ); } } + +/// Jump to user mode (ring 3) +/// +/// This uses IRETQ to transition from ring 0 to ring 3. +/// +/// # Arguments +/// * `entry` - User code entry point +/// * `stack` - User stack pointer +/// +/// # Safety +/// The entry point and stack must be valid mapped addresses in user space. +/// The current process's page table must have proper user mappings. +pub unsafe fn jump_to_user(entry: u64, stack: u64) -> ! { + use crate::arch::x86_64::gdt; + use crate::serial::SerialPort; + use core::fmt::Write; + + let user_cs = gdt::user_cs() as u64; + let user_ds = gdt::user_ds() as u64; + + // Debug: Print what we're about to push + let mut serial = unsafe { SerialPort::new(0x3F8) }; + writeln!(serial, " IRETQ frame: SS={:#x} RSP={:#x} RFLAGS=0x202 CS={:#x} RIP={:#x}", + user_ds, stack, user_cs, entry).ok(); + + // Verify the entry point is mapped and accessible + use crate::memory::paging; + use crate::memory::VirtAddr; + let entry_virt = VirtAddr::new(entry); + if let Some((phys, size, flags)) = paging::get_mapping_info(entry_virt) { + writeln!(serial, " Entry mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok(); + } else { + writeln!(serial, " WARNING: Entry point {:#x} is NOT MAPPED!", entry).ok(); + } + + // Verify the stack is mapped + let stack_virt = VirtAddr::new(stack - 8); // Stack will be decremented + if let Some((phys, size, flags)) = paging::get_mapping_info(stack_virt) { + writeln!(serial, " Stack mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok(); + } else { + writeln!(serial, " WARNING: Stack {:#x} is NOT MAPPED!", stack).ok(); + } + + // Flush TLB to ensure all page table changes are visible + // This reloads CR3 which flushes the entire TLB + unsafe { + let cr3: u64; + core::arch::asm!("mov {}, cr3", out(reg) cr3, options(nostack, preserves_flags)); + core::arch::asm!("mov cr3, {}", in(reg) cr3, options(nostack, preserves_flags)); + } + + // IRETQ expects the stack to contain (top to bottom): + // [RSP+0] RIP - last pushed, first popped + // [RSP+8] CS + // [RSP+16] RFLAGS + // [RSP+24] RSP + // [RSP+32] SS - first pushed, last popped + // + // Note: DS/ES/FS/GS must be set to valid user selectors before IRETQ + // when transitioning to ring 3. Using null (0) is valid in 64-bit mode. + unsafe { + core::arch::asm!( + // Set DS/ES/FS/GS to null using r11 - avoid clobbering input registers + "xor r11d, r11d", + "mov ds, r11w", + "mov es, r11w", + "mov fs, r11w", + "mov gs, r11w", + + // Memory barrier to ensure all stores are complete + "mfence", + + // Build IRETQ frame on stack + "push {user_ss}", // SS + "push {stack}", // RSP + "push 0x202", // RFLAGS (IF=1, reserved bit 1 = 1) + "push {user_cs}", // CS + "push {entry}", // RIP + + // Jump to user mode + "iretq", + + user_cs = in(reg) user_cs, + user_ss = in(reg) user_ds, // SS same as DS for user mode + entry = in(reg) entry, + stack = in(reg) stack, + options(noreturn) + ); + } +}