mirror of
https://github.com/xomboverlord/xomb.git
synced 2026-01-11 10:16:36 +01:00
Add user mode execution with syscall support
- Add PIC driver to remap IRQs 0-15 to vectors 0x20-0x2F, preventing conflicts with CPU exception vectors (IRQ0/timer was hitting vector 0x08/Double Fault) - Extend GDT with user code/data segments (ring 3) and TSS for privilege level transitions - Add INT 0x80 syscall handler supporting write() and exit() syscalls - Configure IST1 for double fault handler to ensure reliable exception handling during privilege transitions - Propagate USER flag through all intermediate page table entries - Add jump_to_user() for transitioning to ring 3 via IRETQ The kernel now successfully executes user-mode programs that can print to serial output and exit cleanly via syscalls.
This commit is contained in:
@@ -1,12 +1,29 @@
|
||||
//! Global Descriptor Table (GDT) for x86-64
|
||||
//!
|
||||
//! This module provides a kernel-space GDT that can be used after
|
||||
//! identity mapping is removed.
|
||||
//! This module provides the GDT with kernel and user segments, plus TSS.
|
||||
//!
|
||||
//! GDT Layout:
|
||||
//! - 0x00: Null descriptor
|
||||
//! - 0x08: Kernel code segment (ring 0)
|
||||
//! - 0x10: Kernel data segment (ring 0)
|
||||
//! - 0x18: User data segment (ring 3)
|
||||
//! - 0x20: User code segment (ring 3)
|
||||
//! - 0x28: TSS descriptor (16 bytes, spans 0x28-0x37)
|
||||
|
||||
use core::arch::asm;
|
||||
use core::mem::size_of;
|
||||
|
||||
/// GDT entry (segment descriptor)
|
||||
/// Segment selectors
|
||||
pub mod selectors {
|
||||
pub const NULL: u16 = 0x00;
|
||||
pub const KERNEL_CODE: u16 = 0x08;
|
||||
pub const KERNEL_DATA: u16 = 0x10;
|
||||
pub const USER_DATA: u16 = 0x18 | 3; // RPL 3
|
||||
pub const USER_CODE: u16 = 0x20 | 3; // RPL 3
|
||||
pub const TSS: u16 = 0x28;
|
||||
}
|
||||
|
||||
/// GDT entry (segment descriptor) - 8 bytes
|
||||
#[repr(C, packed)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GdtEntry {
|
||||
@@ -31,29 +48,133 @@ impl GdtEntry {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a 64-bit code segment descriptor
|
||||
pub const fn code64() -> Self {
|
||||
/// Create a 64-bit kernel code segment
|
||||
pub const fn kernel_code() -> Self {
|
||||
Self {
|
||||
limit_low: 0xFFFF,
|
||||
base_low: 0,
|
||||
base_mid: 0,
|
||||
access: 0x9A, // Present, ring 0, code, exec/read
|
||||
flags_limit_high: 0xAF, // 64-bit, limit high nibble
|
||||
access: 0x9A, // Present, DPL 0, code, exec/read
|
||||
flags_limit_high: 0xAF, // 64-bit, limit high
|
||||
base_high: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a data segment descriptor
|
||||
pub const fn data() -> Self {
|
||||
/// Create a kernel data segment
|
||||
pub const fn kernel_data() -> Self {
|
||||
Self {
|
||||
limit_low: 0xFFFF,
|
||||
base_low: 0,
|
||||
base_mid: 0,
|
||||
access: 0x92, // Present, ring 0, data, read/write
|
||||
access: 0x92, // Present, DPL 0, data, read/write
|
||||
flags_limit_high: 0xCF, // 32-bit, 4KB granularity
|
||||
base_high: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a 64-bit user code segment
|
||||
pub const fn user_code() -> Self {
|
||||
Self {
|
||||
limit_low: 0xFFFF,
|
||||
base_low: 0,
|
||||
base_mid: 0,
|
||||
access: 0xFA, // Present, DPL 3, code, exec/read
|
||||
flags_limit_high: 0xAF, // 64-bit, limit high
|
||||
base_high: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a user data segment
|
||||
pub const fn user_data() -> Self {
|
||||
Self {
|
||||
limit_low: 0xFFFF,
|
||||
base_low: 0,
|
||||
base_mid: 0,
|
||||
access: 0xF2, // Present, DPL 3, data, read/write
|
||||
flags_limit_high: 0xCF, // 32-bit, 4KB granularity
|
||||
base_high: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TSS descriptor (16 bytes in 64-bit mode)
|
||||
#[repr(C, packed)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct TssDescriptor {
|
||||
limit_low: u16,
|
||||
base_low: u16,
|
||||
base_mid: u8,
|
||||
access: u8,
|
||||
flags_limit_high: u8,
|
||||
base_high: u8,
|
||||
base_upper: u32,
|
||||
reserved: u32,
|
||||
}
|
||||
|
||||
impl TssDescriptor {
|
||||
pub const fn null() -> Self {
|
||||
Self {
|
||||
limit_low: 0,
|
||||
base_low: 0,
|
||||
base_mid: 0,
|
||||
access: 0,
|
||||
flags_limit_high: 0,
|
||||
base_high: 0,
|
||||
base_upper: 0,
|
||||
reserved: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a TSS descriptor for the given TSS address and size
|
||||
pub fn new(base: u64, limit: u32) -> Self {
|
||||
Self {
|
||||
limit_low: limit as u16,
|
||||
base_low: base as u16,
|
||||
base_mid: (base >> 16) as u8,
|
||||
access: 0x89, // Present, 64-bit TSS (available)
|
||||
flags_limit_high: ((limit >> 16) as u8) & 0x0F,
|
||||
base_high: (base >> 24) as u8,
|
||||
base_upper: (base >> 32) as u32,
|
||||
reserved: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Task State Segment (TSS) for x86-64
|
||||
///
|
||||
/// The TSS is used primarily for:
|
||||
/// - RSP0: Stack to use when transitioning from ring 3 to ring 0
|
||||
/// - IST: Interrupt Stack Table for specific interrupts
|
||||
#[repr(C, packed)]
|
||||
pub struct Tss {
|
||||
reserved0: u32,
|
||||
/// Stack pointers for privilege levels 0-2
|
||||
pub rsp0: u64,
|
||||
pub rsp1: u64,
|
||||
pub rsp2: u64,
|
||||
reserved1: u64,
|
||||
/// Interrupt Stack Table (IST) entries 1-7
|
||||
pub ist: [u64; 7],
|
||||
reserved2: u64,
|
||||
reserved3: u16,
|
||||
/// I/O map base address
|
||||
pub iopb: u16,
|
||||
}
|
||||
|
||||
impl Tss {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
reserved0: 0,
|
||||
rsp0: 0,
|
||||
rsp1: 0,
|
||||
rsp2: 0,
|
||||
reserved1: 0,
|
||||
ist: [0; 7],
|
||||
reserved2: 0,
|
||||
reserved3: 0,
|
||||
iopb: size_of::<Tss>() as u16,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GDT pointer for LGDT instruction
|
||||
@@ -63,53 +184,83 @@ pub struct GdtPointer {
|
||||
base: u64,
|
||||
}
|
||||
|
||||
/// Number of GDT entries
|
||||
const GDT_ENTRIES: usize = 3;
|
||||
|
||||
/// Kernel GDT with null, code, and data segments
|
||||
/// Combined GDT structure with all entries
|
||||
#[repr(C, align(16))]
|
||||
pub struct Gdt {
|
||||
entries: [GdtEntry; GDT_ENTRIES],
|
||||
null: GdtEntry,
|
||||
kernel_code: GdtEntry,
|
||||
kernel_data: GdtEntry,
|
||||
user_data: GdtEntry,
|
||||
user_code: GdtEntry,
|
||||
tss: TssDescriptor,
|
||||
}
|
||||
|
||||
impl Gdt {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
entries: [
|
||||
GdtEntry::null(), // 0x00: Null descriptor
|
||||
GdtEntry::code64(), // 0x08: Kernel code segment
|
||||
GdtEntry::data(), // 0x10: Kernel data segment
|
||||
],
|
||||
null: GdtEntry::null(),
|
||||
kernel_code: GdtEntry::kernel_code(),
|
||||
kernel_data: GdtEntry::kernel_data(),
|
||||
user_data: GdtEntry::user_data(),
|
||||
user_code: GdtEntry::user_code(),
|
||||
tss: TssDescriptor::null(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the TSS descriptor
|
||||
pub fn set_tss(&mut self, base: u64, limit: u32) {
|
||||
self.tss = TssDescriptor::new(base, limit);
|
||||
}
|
||||
}
|
||||
|
||||
/// Static kernel GDT (in higher-half memory)
|
||||
static KERNEL_GDT: Gdt = Gdt::new();
|
||||
use core::cell::UnsafeCell;
|
||||
|
||||
/// Reload the GDT with the kernel-space GDT
|
||||
struct SyncGdt(UnsafeCell<Gdt>);
|
||||
unsafe impl Sync for SyncGdt {}
|
||||
|
||||
struct SyncTss(UnsafeCell<Tss>);
|
||||
unsafe impl Sync for SyncTss {}
|
||||
|
||||
/// Static kernel GDT
|
||||
static KERNEL_GDT: SyncGdt = SyncGdt(UnsafeCell::new(Gdt::new()));
|
||||
|
||||
/// Static kernel TSS
|
||||
static KERNEL_TSS: SyncTss = SyncTss(UnsafeCell::new(Tss::new()));
|
||||
|
||||
/// Initialize the GDT with TSS
|
||||
///
|
||||
/// This should be called before removing identity mapping to ensure
|
||||
/// the GDT is accessible after the low memory is unmapped.
|
||||
pub fn reload() {
|
||||
/// This sets up the full GDT including user segments and TSS,
|
||||
/// then loads it into the CPU.
|
||||
pub fn init(kernel_stack: u64) {
|
||||
let gdt = unsafe { &mut *KERNEL_GDT.0.get() };
|
||||
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
|
||||
|
||||
// Set up TSS with kernel stack for ring 0
|
||||
tss.rsp0 = kernel_stack;
|
||||
|
||||
// Update GDT with TSS descriptor
|
||||
let tss_addr = tss as *const Tss as u64;
|
||||
let tss_limit = (size_of::<Tss>() - 1) as u32;
|
||||
gdt.set_tss(tss_addr, tss_limit);
|
||||
|
||||
// Load GDT
|
||||
let gdt_size = size_of::<Gdt>();
|
||||
let pointer = GdtPointer {
|
||||
limit: (size_of::<[GdtEntry; GDT_ENTRIES]>() - 1) as u16,
|
||||
base: KERNEL_GDT.entries.as_ptr() as u64,
|
||||
limit: (gdt_size - 1) as u16,
|
||||
base: gdt as *const Gdt as u64,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
// Load new GDT
|
||||
asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags));
|
||||
|
||||
// Reload code segment by doing a far return
|
||||
// Push SS, RSP, RFLAGS, CS, RIP and do IRETQ
|
||||
// Reload code segment
|
||||
asm!(
|
||||
"push 0x10", // SS
|
||||
"push 0x10", // SS (kernel data)
|
||||
"push rsp", // RSP
|
||||
"add qword ptr [rsp], 8", // Adjust for the push
|
||||
"add qword ptr [rsp], 8",
|
||||
"pushfq", // RFLAGS
|
||||
"push 0x08", // CS
|
||||
"lea rax, [rip + 2f]", // RIP (address of label 2)
|
||||
"push 0x08", // CS (kernel code)
|
||||
"lea rax, [rip + 2f]",
|
||||
"push rax",
|
||||
"iretq",
|
||||
"2:",
|
||||
@@ -119,19 +270,90 @@ pub fn reload() {
|
||||
"mov es, ax",
|
||||
"mov fs, ax",
|
||||
"mov gs, ax",
|
||||
// SS is already set by IRETQ
|
||||
out("rax") _,
|
||||
options(preserves_flags)
|
||||
);
|
||||
|
||||
// Load TSS
|
||||
asm!(
|
||||
"ltr {0:x}",
|
||||
in(reg) selectors::TSS,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reload the GDT (called before removing identity mapping)
|
||||
///
|
||||
/// This is the simpler reload that doesn't reinitialize TSS.
|
||||
pub fn reload() {
|
||||
let gdt = unsafe { &*KERNEL_GDT.0.get() };
|
||||
|
||||
let gdt_size = size_of::<Gdt>();
|
||||
let pointer = GdtPointer {
|
||||
limit: (gdt_size - 1) as u16,
|
||||
base: gdt as *const Gdt as u64,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
asm!("lgdt [{}]", in(reg) &pointer, options(nostack, preserves_flags));
|
||||
|
||||
// Reload segments
|
||||
asm!(
|
||||
"push 0x10",
|
||||
"push rsp",
|
||||
"add qword ptr [rsp], 8",
|
||||
"pushfq",
|
||||
"push 0x08",
|
||||
"lea rax, [rip + 2f]",
|
||||
"push rax",
|
||||
"iretq",
|
||||
"2:",
|
||||
"mov ax, 0x10",
|
||||
"mov ds, ax",
|
||||
"mov es, ax",
|
||||
"mov fs, ax",
|
||||
"mov gs, ax",
|
||||
out("rax") _,
|
||||
options(preserves_flags)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Update TSS RSP0 (kernel stack for ring transitions)
|
||||
pub fn set_kernel_stack(stack: u64) {
|
||||
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
|
||||
tss.rsp0 = stack;
|
||||
}
|
||||
|
||||
/// Set an IST (Interrupt Stack Table) entry
|
||||
///
|
||||
/// IST entries are numbered 1-7 (index 0-6 in the array).
|
||||
/// These provide dedicated stacks for specific interrupt handlers.
|
||||
pub fn set_ist(ist_index: u8, stack: u64) {
|
||||
if ist_index == 0 || ist_index > 7 {
|
||||
return; // Invalid index
|
||||
}
|
||||
let tss = unsafe { &mut *KERNEL_TSS.0.get() };
|
||||
tss.ist[(ist_index - 1) as usize] = stack;
|
||||
}
|
||||
|
||||
/// Get the kernel code segment selector
|
||||
pub const fn kernel_cs() -> u16 {
|
||||
0x08
|
||||
selectors::KERNEL_CODE
|
||||
}
|
||||
|
||||
/// Get the kernel data segment selector
|
||||
pub const fn kernel_ds() -> u16 {
|
||||
0x10
|
||||
selectors::KERNEL_DATA
|
||||
}
|
||||
|
||||
/// Get the user code segment selector
|
||||
pub const fn user_cs() -> u16 {
|
||||
selectors::USER_CODE
|
||||
}
|
||||
|
||||
/// Get the user data segment selector
|
||||
pub const fn user_ds() -> u16 {
|
||||
selectors::USER_DATA
|
||||
}
|
||||
|
||||
@@ -123,6 +123,17 @@ impl Idt {
|
||||
0, // No IST
|
||||
);
|
||||
}
|
||||
|
||||
/// Set an interrupt handler with custom DPL
|
||||
pub fn set_handler_dpl(&mut self, vector: u8, handler: u64, gate_type: GateType, dpl: u8) {
|
||||
self.entries[vector as usize] = IdtEntry::new(
|
||||
handler,
|
||||
0x08, // Kernel code segment
|
||||
gate_type,
|
||||
dpl,
|
||||
0, // No IST
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Interrupt stack frame pushed by CPU on interrupt/exception
|
||||
@@ -271,6 +282,49 @@ unsafe fn load_idt(idt: *const Idt) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set an interrupt handler (public interface)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `vector` - Interrupt vector number (0-255)
|
||||
/// * `handler` - Handler function address
|
||||
/// * `gate_type` - Type of gate (Interrupt or Trap)
|
||||
/// * `dpl` - Descriptor Privilege Level (0 = kernel only, 3 = user callable)
|
||||
pub fn set_handler(vector: u8, handler: u64, gate_type: GateType, dpl: u8) {
|
||||
let idt = unsafe { &mut *STATIC_IDT.0.get() };
|
||||
idt.set_handler_dpl(vector, handler, gate_type, dpl);
|
||||
}
|
||||
|
||||
/// Set an interrupt handler with IST (Interrupt Stack Table) support
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `vector` - Interrupt vector number (0-255)
|
||||
/// * `handler` - Handler function address
|
||||
/// * `gate_type` - Type of gate (Interrupt or Trap)
|
||||
/// * `ist` - IST index (1-7), or 0 for no IST
|
||||
pub fn set_handler_with_ist(vector: u8, handler: u64, gate_type: GateType, ist: u8) {
|
||||
let idt = unsafe { &mut *STATIC_IDT.0.get() };
|
||||
idt.entries[vector as usize] = IdtEntry::new(
|
||||
handler,
|
||||
0x08, // Kernel code segment
|
||||
gate_type,
|
||||
0, // DPL 0 (kernel)
|
||||
ist,
|
||||
);
|
||||
}
|
||||
|
||||
/// Configure the double fault handler to use an IST entry
|
||||
///
|
||||
/// This must be called after init() and after the IST stack has been
|
||||
/// set up in the TSS via gdt::set_ist().
|
||||
pub fn set_double_fault_ist(ist: u8) {
|
||||
set_handler_with_ist(
|
||||
vectors::DOUBLE_FAULT,
|
||||
double_fault_handler as *const () as u64,
|
||||
GateType::Trap,
|
||||
ist,
|
||||
);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Exception Handlers
|
||||
// ============================================================================
|
||||
@@ -404,7 +458,6 @@ extern "C" fn rust_exception_handler(state: &ExceptionState) {
|
||||
exception_name(state.vector as u8), state.vector).ok();
|
||||
writeln!(serial, " Error code: {:#x}", state.error_code).ok();
|
||||
writeln!(serial, " RIP: {:#x} CS: {:#x}", state.rip, state.cs).ok();
|
||||
writeln!(serial, " RSP: {:#x} SS: {:#x}", state.rsp, state.ss).ok();
|
||||
writeln!(serial, " RFLAGS: {:#x}", state.rflags).ok();
|
||||
writeln!(serial, " RAX: {:#018x} RBX: {:#018x}", state.rax, state.rbx).ok();
|
||||
writeln!(serial, " RCX: {:#018x} RDX: {:#018x}", state.rcx, state.rdx).ok();
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! x86_64 architecture support
|
||||
|
||||
pub mod interrupts;
|
||||
pub mod gdt;
|
||||
pub mod interrupts;
|
||||
pub mod pic;
|
||||
pub mod syscall;
|
||||
|
||||
/// Halt the CPU until the next interrupt
|
||||
#[inline]
|
||||
|
||||
244
src/arch/x86_64/pic.rs
Normal file
244
src/arch/x86_64/pic.rs
Normal file
@@ -0,0 +1,244 @@
|
||||
//! 8259 Programmable Interrupt Controller (PIC) driver
|
||||
//!
|
||||
//! The legacy PIC must be properly configured even if we plan to use the APIC,
|
||||
//! because its default IRQ mappings (0x08-0x0F, 0x70-0x77) conflict with CPU
|
||||
//! exception vectors:
|
||||
//!
|
||||
//! - IRQ0 (Timer) -> Vector 0x08 (Double Fault!)
|
||||
//! - IRQ1 (Keyboard) -> Vector 0x09
|
||||
//! - IRQ7 (Spurious) -> Vector 0x0F
|
||||
//!
|
||||
//! This module remaps the PICs to vectors 0x20-0x2F and provides control
|
||||
//! over interrupt masking.
|
||||
|
||||
use core::arch::asm;
|
||||
|
||||
/// PIC1 (master) command port
|
||||
const PIC1_CMD: u16 = 0x20;
|
||||
/// PIC1 (master) data port
|
||||
const PIC1_DATA: u16 = 0x21;
|
||||
/// PIC2 (slave) command port
|
||||
const PIC2_CMD: u16 = 0xA0;
|
||||
/// PIC2 (slave) data port
|
||||
const PIC2_DATA: u16 = 0xA1;
|
||||
|
||||
/// ICW1: Initialization Command Word 1
|
||||
const ICW1_INIT: u8 = 0x10;
|
||||
const ICW1_ICW4: u8 = 0x01; // ICW4 needed
|
||||
|
||||
/// ICW4: Initialization Command Word 4
|
||||
const ICW4_8086: u8 = 0x01; // 8086/88 mode
|
||||
|
||||
/// Vector offset for PIC1 IRQs (IRQ0-7 -> vectors 0x20-0x27)
|
||||
pub const PIC1_OFFSET: u8 = 0x20;
|
||||
/// Vector offset for PIC2 IRQs (IRQ8-15 -> vectors 0x28-0x2F)
|
||||
pub const PIC2_OFFSET: u8 = 0x28;
|
||||
|
||||
/// IRQ numbers
|
||||
pub mod irq {
|
||||
pub const TIMER: u8 = 0;
|
||||
pub const KEYBOARD: u8 = 1;
|
||||
pub const CASCADE: u8 = 2; // Used internally for PIC1-PIC2 cascade
|
||||
pub const COM2: u8 = 3;
|
||||
pub const COM1: u8 = 4;
|
||||
pub const LPT2: u8 = 5;
|
||||
pub const FLOPPY: u8 = 6;
|
||||
pub const LPT1: u8 = 7; // Also spurious IRQ
|
||||
pub const RTC: u8 = 8;
|
||||
pub const FREE1: u8 = 9;
|
||||
pub const FREE2: u8 = 10;
|
||||
pub const FREE3: u8 = 11;
|
||||
pub const MOUSE: u8 = 12;
|
||||
pub const FPU: u8 = 13;
|
||||
pub const ATA_PRIMARY: u8 = 14;
|
||||
pub const ATA_SECONDARY: u8 = 15;
|
||||
}
|
||||
|
||||
/// Write a byte to an I/O port
|
||||
#[inline]
|
||||
unsafe fn outb(port: u16, value: u8) {
|
||||
unsafe {
|
||||
asm!(
|
||||
"out dx, al",
|
||||
in("dx") port,
|
||||
in("al") value,
|
||||
options(nostack, nomem, preserves_flags)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a byte from an I/O port
|
||||
#[inline]
|
||||
unsafe fn inb(port: u16) -> u8 {
|
||||
let value: u8;
|
||||
unsafe {
|
||||
asm!(
|
||||
"in al, dx",
|
||||
in("dx") port,
|
||||
out("al") value,
|
||||
options(nostack, nomem, preserves_flags)
|
||||
);
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
/// Small I/O delay for PIC timing requirements
|
||||
#[inline]
|
||||
unsafe fn io_wait() {
|
||||
// Write to an unused port to create a small delay
|
||||
// Port 0x80 is used for POST codes and is safe to write to
|
||||
unsafe { outb(0x80, 0); }
|
||||
}
|
||||
|
||||
/// Initialize and remap both PICs
|
||||
///
|
||||
/// This remaps:
|
||||
/// - PIC1 (IRQ 0-7) to vectors 0x20-0x27
|
||||
/// - PIC2 (IRQ 8-15) to vectors 0x28-0x2F
|
||||
///
|
||||
/// After initialization, all IRQs are masked (disabled).
|
||||
pub fn init() {
|
||||
unsafe {
|
||||
// Save current masks
|
||||
let mask1 = inb(PIC1_DATA);
|
||||
let mask2 = inb(PIC2_DATA);
|
||||
|
||||
// ICW1: Start initialization sequence (cascade mode, ICW4 needed)
|
||||
outb(PIC1_CMD, ICW1_INIT | ICW1_ICW4);
|
||||
io_wait();
|
||||
outb(PIC2_CMD, ICW1_INIT | ICW1_ICW4);
|
||||
io_wait();
|
||||
|
||||
// ICW2: Set vector offsets
|
||||
outb(PIC1_DATA, PIC1_OFFSET);
|
||||
io_wait();
|
||||
outb(PIC2_DATA, PIC2_OFFSET);
|
||||
io_wait();
|
||||
|
||||
// ICW3: Configure cascade
|
||||
// Tell PIC1 that PIC2 is at IRQ2 (bit 2 = 0x04)
|
||||
outb(PIC1_DATA, 0x04);
|
||||
io_wait();
|
||||
// Tell PIC2 its cascade identity (IRQ2 = 2)
|
||||
outb(PIC2_DATA, 0x02);
|
||||
io_wait();
|
||||
|
||||
// ICW4: Set 8086 mode
|
||||
outb(PIC1_DATA, ICW4_8086);
|
||||
io_wait();
|
||||
outb(PIC2_DATA, ICW4_8086);
|
||||
io_wait();
|
||||
|
||||
// Mask all interrupts (we'll unmask specific ones as needed)
|
||||
outb(PIC1_DATA, 0xFF);
|
||||
outb(PIC2_DATA, 0xFF);
|
||||
|
||||
// Note: We intentionally mask all interrupts rather than restoring
|
||||
// the old masks, since we want to start with a clean slate
|
||||
let _ = (mask1, mask2); // Suppress unused warning
|
||||
}
|
||||
}
|
||||
|
||||
/// Disable the PIC entirely by masking all interrupts
|
||||
///
|
||||
/// This is useful when transitioning to APIC mode.
|
||||
pub fn disable() {
|
||||
unsafe {
|
||||
outb(PIC1_DATA, 0xFF);
|
||||
outb(PIC2_DATA, 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mask (disable) a specific IRQ
|
||||
pub fn mask_irq(irq: u8) {
|
||||
let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA };
|
||||
let irq_bit = if irq < 8 { irq } else { irq - 8 };
|
||||
|
||||
unsafe {
|
||||
let mask = inb(port) | (1 << irq_bit);
|
||||
outb(port, mask);
|
||||
}
|
||||
}
|
||||
|
||||
/// Unmask (enable) a specific IRQ
|
||||
pub fn unmask_irq(irq: u8) {
|
||||
let port = if irq < 8 { PIC1_DATA } else { PIC2_DATA };
|
||||
let irq_bit = if irq < 8 { irq } else { irq - 8 };
|
||||
|
||||
unsafe {
|
||||
let mask = inb(port) & !(1 << irq_bit);
|
||||
outb(port, mask);
|
||||
}
|
||||
|
||||
// If unmasking an IRQ on PIC2, also unmask the cascade IRQ on PIC1
|
||||
if irq >= 8 {
|
||||
unsafe {
|
||||
let mask = inb(PIC1_DATA) & !(1 << irq::CASCADE);
|
||||
outb(PIC1_DATA, mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Send End-Of-Interrupt (EOI) signal
|
||||
///
|
||||
/// This must be called at the end of an IRQ handler to acknowledge
|
||||
/// the interrupt and allow further interrupts.
|
||||
pub fn send_eoi(irq: u8) {
|
||||
const EOI: u8 = 0x20;
|
||||
|
||||
unsafe {
|
||||
// If IRQ came from PIC2, send EOI to both PICs
|
||||
if irq >= 8 {
|
||||
outb(PIC2_CMD, EOI);
|
||||
}
|
||||
outb(PIC1_CMD, EOI);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if an IRQ is a spurious IRQ
|
||||
///
|
||||
/// Spurious IRQs (IRQ7 or IRQ15) can occur due to electrical noise
|
||||
/// or race conditions. They should be checked before handling.
|
||||
pub fn is_spurious(irq: u8) -> bool {
|
||||
const ISR_READ: u8 = 0x0B;
|
||||
|
||||
if irq == 7 {
|
||||
// Check PIC1's In-Service Register
|
||||
unsafe {
|
||||
outb(PIC1_CMD, ISR_READ);
|
||||
let isr = inb(PIC1_CMD);
|
||||
// If bit 7 is not set, it's spurious
|
||||
return (isr & 0x80) == 0;
|
||||
}
|
||||
} else if irq == 15 {
|
||||
// Check PIC2's In-Service Register
|
||||
unsafe {
|
||||
outb(PIC2_CMD, ISR_READ);
|
||||
let isr = inb(PIC2_CMD);
|
||||
// If bit 7 is not set, it's spurious
|
||||
if (isr & 0x80) == 0 {
|
||||
// Still need to send EOI to PIC1 (for cascade)
|
||||
outb(PIC1_CMD, 0x20);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Get the current IRQ mask for both PICs
|
||||
pub fn get_mask() -> u16 {
|
||||
unsafe {
|
||||
let mask1 = inb(PIC1_DATA) as u16;
|
||||
let mask2 = inb(PIC2_DATA) as u16;
|
||||
mask1 | (mask2 << 8)
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the IRQ mask for both PICs
|
||||
pub fn set_mask(mask: u16) {
|
||||
unsafe {
|
||||
outb(PIC1_DATA, mask as u8);
|
||||
outb(PIC2_DATA, (mask >> 8) as u8);
|
||||
}
|
||||
}
|
||||
232
src/arch/x86_64/syscall.rs
Normal file
232
src/arch/x86_64/syscall.rs
Normal file
@@ -0,0 +1,232 @@
|
||||
//! System Call Interface for x86-64
|
||||
//!
|
||||
//! This module implements system calls using INT 0x80.
|
||||
//! This is simpler than SYSCALL/SYSRET and uses the existing IDT infrastructure.
|
||||
//!
|
||||
//! Syscall Convention:
|
||||
//! - RAX = syscall number
|
||||
//! - RDI, RSI, RDX, R10, R8, R9 = arguments
|
||||
//! - RAX = return value
|
||||
|
||||
use core::arch::asm;
|
||||
use crate::arch::x86_64::gdt;
|
||||
|
||||
/// System call interrupt vector
|
||||
pub const SYSCALL_VECTOR: u8 = 0x80;
|
||||
|
||||
/// System call numbers
|
||||
pub mod numbers {
|
||||
pub const EXIT: u64 = 0;
|
||||
pub const WRITE: u64 = 1;
|
||||
pub const YIELD: u64 = 2;
|
||||
pub const GETPID: u64 = 3;
|
||||
}
|
||||
|
||||
/// Initialize the syscall interface
|
||||
///
|
||||
/// This adds the INT 0x80 handler to the IDT.
|
||||
pub fn init() {
|
||||
use crate::arch::x86_64::interrupts::{GateType, set_handler};
|
||||
|
||||
// Set up INT 0x80 as a trap gate with DPL 3 (user-callable)
|
||||
set_handler(SYSCALL_VECTOR, syscall_entry as *const () as u64, GateType::Trap, 3);
|
||||
}
|
||||
|
||||
/// Syscall entry point
|
||||
///
|
||||
/// This is registered as the INT 0x80 handler.
|
||||
/// Stack on entry (pushed by CPU):
|
||||
/// - SS, RSP, RFLAGS, CS, RIP (if from ring 3)
|
||||
/// - Error code (none for INT)
|
||||
#[unsafe(naked)]
|
||||
extern "C" fn syscall_entry() {
|
||||
core::arch::naked_asm!(
|
||||
// No error code for software interrupts
|
||||
// Save all registers
|
||||
"push rax",
|
||||
"push rbx",
|
||||
"push rcx",
|
||||
"push rdx",
|
||||
"push rsi",
|
||||
"push rdi",
|
||||
"push rbp",
|
||||
"push r8",
|
||||
"push r9",
|
||||
"push r10",
|
||||
"push r11",
|
||||
"push r12",
|
||||
"push r13",
|
||||
"push r14",
|
||||
"push r15",
|
||||
|
||||
// Call Rust syscall handler
|
||||
// First arg (RDI) = pointer to saved state
|
||||
"mov rdi, rsp",
|
||||
"call {handler}",
|
||||
|
||||
// Return value is in RAX, save it to the stack frame
|
||||
"mov [rsp + 14*8], rax", // Overwrite saved RAX
|
||||
|
||||
// Restore all registers
|
||||
"pop r15",
|
||||
"pop r14",
|
||||
"pop r13",
|
||||
"pop r12",
|
||||
"pop r11",
|
||||
"pop r10",
|
||||
"pop r9",
|
||||
"pop r8",
|
||||
"pop rbp",
|
||||
"pop rdi",
|
||||
"pop rsi",
|
||||
"pop rdx",
|
||||
"pop rcx",
|
||||
"pop rbx",
|
||||
"pop rax",
|
||||
|
||||
// Return from interrupt
|
||||
"iretq",
|
||||
|
||||
handler = sym syscall_handler_rust,
|
||||
);
|
||||
}
|
||||
|
||||
/// Saved register state for syscall
|
||||
#[repr(C)]
|
||||
pub struct SyscallFrame {
|
||||
pub r15: u64,
|
||||
pub r14: u64,
|
||||
pub r13: u64,
|
||||
pub r12: u64,
|
||||
pub r11: u64,
|
||||
pub r10: u64,
|
||||
pub r9: u64,
|
||||
pub r8: u64,
|
||||
pub rbp: u64,
|
||||
pub rdi: u64,
|
||||
pub rsi: u64,
|
||||
pub rdx: u64,
|
||||
pub rcx: u64,
|
||||
pub rbx: u64,
|
||||
pub rax: u64,
|
||||
// CPU-pushed
|
||||
pub rip: u64,
|
||||
pub cs: u64,
|
||||
pub rflags: u64,
|
||||
pub rsp: u64,
|
||||
pub ss: u64,
|
||||
}
|
||||
|
||||
/// Rust syscall handler
|
||||
extern "C" fn syscall_handler_rust(frame: &SyscallFrame) -> u64 {
|
||||
let num = frame.rax;
|
||||
let arg1 = frame.rdi;
|
||||
let arg2 = frame.rsi;
|
||||
let arg3 = frame.rdx;
|
||||
let arg4 = frame.r10;
|
||||
let arg5 = frame.r8;
|
||||
|
||||
match num {
|
||||
numbers::EXIT => {
|
||||
use crate::serial::SerialPort;
|
||||
use core::fmt::Write;
|
||||
let mut serial = unsafe { SerialPort::new(0x3F8) };
|
||||
writeln!(serial, "\n[SYSCALL] exit({})", arg1).ok();
|
||||
|
||||
// For now, just halt. In a real OS, we'd terminate the process
|
||||
// and schedule another one.
|
||||
loop {
|
||||
unsafe { asm!("cli; hlt", options(nostack, nomem)); }
|
||||
}
|
||||
}
|
||||
|
||||
numbers::WRITE => {
|
||||
// write(fd, buf, len) -> bytes_written
|
||||
if arg1 == 1 || arg1 == 2 {
|
||||
// stdout or stderr -> serial
|
||||
use crate::serial::SerialPort;
|
||||
let mut serial = unsafe { SerialPort::new(0x3F8) };
|
||||
|
||||
let buf = arg2 as *const u8;
|
||||
let len = arg3 as usize;
|
||||
|
||||
// Safety: we trust the user buffer for now
|
||||
// In a real OS, we'd validate it's in user memory
|
||||
for i in 0..len {
|
||||
let c = unsafe { *buf.add(i) };
|
||||
serial.write_byte(c);
|
||||
}
|
||||
|
||||
len as u64
|
||||
} else {
|
||||
u64::MAX // -1 = error
|
||||
}
|
||||
}
|
||||
|
||||
numbers::YIELD => {
|
||||
// No-op for single process kernel
|
||||
0
|
||||
}
|
||||
|
||||
numbers::GETPID => {
|
||||
// Return current process ID
|
||||
crate::process::current().pid as u64
|
||||
}
|
||||
|
||||
_ => {
|
||||
// Unknown syscall
|
||||
use crate::serial::SerialPort;
|
||||
use core::fmt::Write;
|
||||
let mut serial = unsafe { SerialPort::new(0x3F8) };
|
||||
writeln!(serial, "[SYSCALL] Unknown syscall: {}", num).ok();
|
||||
u64::MAX
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a syscall from user mode (for testing)
|
||||
#[inline]
|
||||
pub unsafe fn syscall0(num: u64) -> u64 {
|
||||
let ret: u64;
|
||||
unsafe {
|
||||
asm!(
|
||||
"int 0x80",
|
||||
in("rax") num,
|
||||
lateout("rax") ret,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn syscall1(num: u64, arg1: u64) -> u64 {
|
||||
let ret: u64;
|
||||
unsafe {
|
||||
asm!(
|
||||
"int 0x80",
|
||||
in("rax") num,
|
||||
in("rdi") arg1,
|
||||
lateout("rax") ret,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn syscall3(num: u64, arg1: u64, arg2: u64, arg3: u64) -> u64 {
|
||||
let ret: u64;
|
||||
unsafe {
|
||||
asm!(
|
||||
"int 0x80",
|
||||
in("rax") num,
|
||||
in("rdi") arg1,
|
||||
in("rsi") arg2,
|
||||
in("rdx") arg3,
|
||||
lateout("rax") ret,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
ret
|
||||
}
|
||||
215
src/lib.rs
215
src/lib.rs
@@ -109,8 +109,15 @@ pub fn kernel_init(info: &BootInfo) -> ! {
|
||||
// Get serial port for output
|
||||
let mut serial = unsafe { SerialPort::new(0x3F8) };
|
||||
|
||||
// CRITICAL: Initialize and remap the PIC first!
|
||||
// The legacy PIC's default IRQ0 (timer) maps to vector 0x08, which conflicts
|
||||
// with the Double Fault exception. This causes spurious "double faults" when
|
||||
// the timer fires. We remap the PIC to vectors 0x20-0x2F and mask all IRQs.
|
||||
arch::x86_64::pic::init();
|
||||
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Entering kernel_init()").ok();
|
||||
writeln!(serial, " PIC remapped and masked").ok();
|
||||
writeln!(serial, " Boot method: {:?}", info.boot_method).ok();
|
||||
|
||||
// Report memory information from boot
|
||||
@@ -295,11 +302,52 @@ pub fn kernel_init(info: &BootInfo) -> ! {
|
||||
}
|
||||
}
|
||||
|
||||
// Reload GDT to higher-half address before removing identity mapping
|
||||
// Initialize GDT with TSS for user mode support
|
||||
// We need a kernel stack for ring 0 transitions from ring 3
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Reloading GDT to higher-half...").ok();
|
||||
arch::x86_64::gdt::reload();
|
||||
writeln!(serial, " GDT reloaded").ok();
|
||||
writeln!(serial, ">>> Initializing GDT with TSS...").ok();
|
||||
|
||||
// Allocate a kernel stack for syscall/interrupt handling from user mode
|
||||
// We'll use 4 pages (16KB) for the kernel stack
|
||||
let kernel_stack_base = VirtAddr::new(0xFFFFFE8000010000); // In temp region
|
||||
|
||||
// Allocate and map 4 pages for the kernel stack
|
||||
for i in 0..4 {
|
||||
let frame = memory::frame::allocate_frame().expect("Failed to allocate kernel stack");
|
||||
let page_virt = VirtAddr::new(kernel_stack_base.as_u64() + (i * 0x1000) as u64);
|
||||
memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA)
|
||||
.expect("Failed to map kernel stack");
|
||||
}
|
||||
|
||||
// Stack grows down, so point to top of the 4-page region
|
||||
let kernel_stack_top = kernel_stack_base.as_u64() + 0x4000;
|
||||
arch::x86_64::gdt::init(kernel_stack_top);
|
||||
writeln!(serial, " GDT with TSS initialized").ok();
|
||||
writeln!(serial, " Kernel stack at {:#x}", kernel_stack_top).ok();
|
||||
|
||||
// Initialize syscall interface (INT 0x80)
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Initializing syscall interface...").ok();
|
||||
arch::x86_64::syscall::init();
|
||||
writeln!(serial, " INT 0x80 syscall handler installed").ok();
|
||||
|
||||
// Set up a dedicated stack for double fault handling (IST1)
|
||||
// This ensures the double fault handler has a known-good stack even if
|
||||
// the main stack is corrupted (e.g., during failed privilege transitions)
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Setting up IST for double fault...").ok();
|
||||
let ist1_stack_base = VirtAddr::new(0xFFFFFE8000020000); // Separate from kernel stack
|
||||
// Allocate 4 pages (16KB) - must be enough for exception frame + handler execution
|
||||
for i in 0..4 {
|
||||
let frame = memory::frame::allocate_frame().expect("Failed to allocate IST1 stack");
|
||||
let page_virt = VirtAddr::new(ist1_stack_base.as_u64() + (i * 0x1000) as u64);
|
||||
memory::paging::map_4kb(page_virt, frame.start_address(), memory::paging::flags::KERNEL_DATA)
|
||||
.expect("Failed to map IST1 stack");
|
||||
}
|
||||
let ist1_stack_top = ist1_stack_base.as_u64() + 0x4000; // 16KB stack
|
||||
arch::x86_64::gdt::set_ist(1, ist1_stack_top);
|
||||
arch::x86_64::interrupts::set_double_fault_ist(1);
|
||||
writeln!(serial, " IST1 (double fault) stack at {:#x}", ist1_stack_top).ok();
|
||||
|
||||
// Remove identity mapping - no longer needed now that we're in higher-half
|
||||
writeln!(serial, "").ok();
|
||||
@@ -314,15 +362,162 @@ pub fn kernel_init(info: &BootInfo) -> ! {
|
||||
writeln!(serial, " Identity mapping removed (PML4[0] cleared)").ok();
|
||||
}
|
||||
|
||||
// Test user-mode execution
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, "Kernel initialization complete.").ok();
|
||||
writeln!(serial, "Halting CPU.").ok();
|
||||
writeln!(serial, ">>> Testing user-mode execution...").ok();
|
||||
|
||||
// Halt the CPU
|
||||
loop {
|
||||
unsafe {
|
||||
core::arch::asm!("cli; hlt", options(nostack, nomem));
|
||||
// Create a new process for user mode test
|
||||
let user_pid = process::create().expect("Failed to create user process");
|
||||
writeln!(serial, " Created user process {}", user_pid).ok();
|
||||
|
||||
// Get the process's page table for mapping user pages
|
||||
let user_process = process::get(user_pid).unwrap();
|
||||
writeln!(serial, " Process page table: {:#x}", user_process.page_table).ok();
|
||||
|
||||
// Allocate frames for user code and stack
|
||||
let user_code_frame = memory::frame::allocate_frame().expect("Failed to allocate user code frame");
|
||||
let user_stack_frame = memory::frame::allocate_frame().expect("Failed to allocate user stack frame");
|
||||
|
||||
// User virtual addresses (in low memory, user-accessible)
|
||||
let user_code_virt = VirtAddr::new(0x400000); // 4MB - typical user code location
|
||||
let user_stack_virt = VirtAddr::new(0x800000); // 8MB - user stack base
|
||||
|
||||
// First switch to the user process's address space to set up its mappings
|
||||
unsafe { process::switch_address_space(user_pid).expect("Failed to switch to user address space"); }
|
||||
|
||||
// Verify kernel stacks are accessible in user address space
|
||||
// (They should be, since we copy kernel PML4 entries during process creation)
|
||||
writeln!(serial, " Verifying kernel stack mappings...").ok();
|
||||
if let Some(phys) = memory::paging::translate(VirtAddr::new(kernel_stack_top - 8)) {
|
||||
writeln!(serial, " Kernel stack: {:#x} -> {:#x}", kernel_stack_top - 8, phys).ok();
|
||||
} else {
|
||||
panic!("Kernel stack not mapped in user address space!");
|
||||
}
|
||||
if let Some(phys) = memory::paging::translate(VirtAddr::new(ist1_stack_top - 8)) {
|
||||
writeln!(serial, " IST1 stack: {:#x} -> {:#x}", ist1_stack_top - 8, phys).ok();
|
||||
} else {
|
||||
panic!("IST1 stack not mapped in user address space!");
|
||||
}
|
||||
|
||||
// Map user code page (readable, executable, user-accessible)
|
||||
memory::paging::map_4kb(user_code_virt, user_code_frame.start_address(), memory::paging::flags::USER_CODE)
|
||||
.expect("Failed to map user code");
|
||||
writeln!(serial, " Mapped user code at {:#x}", user_code_virt).ok();
|
||||
|
||||
// Map user stack page (readable, writable, user-accessible)
|
||||
memory::paging::map_4kb(user_stack_virt, user_stack_frame.start_address(), memory::paging::flags::USER_DATA)
|
||||
.expect("Failed to map user stack");
|
||||
writeln!(serial, " Mapped user stack at {:#x}", user_stack_virt).ok();
|
||||
|
||||
// Write a simple user program that:
|
||||
// 1. Calls write(1, "Hello from user mode!\n", 22)
|
||||
// 2. Calls exit(0)
|
||||
let user_code_ptr = user_code_virt.as_u64() as *mut u8;
|
||||
let message = b"Hello from user mode!\n";
|
||||
let message_offset = 64u64; // Place message after code
|
||||
|
||||
unsafe {
|
||||
let code: &[u8] = &[
|
||||
// mov rax, 1 (WRITE syscall)
|
||||
0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00,
|
||||
// mov rdi, 1 (fd = stdout)
|
||||
0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00,
|
||||
// lea rsi, [rip + message_offset] - we'll use absolute address instead
|
||||
// mov rsi, 0x400040 (message address = code_base + 64)
|
||||
0x48, 0xbe,
|
||||
((user_code_virt.as_u64() + message_offset) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 8) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 16) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 24) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 32) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 40) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 48) & 0xFF) as u8,
|
||||
(((user_code_virt.as_u64() + message_offset) >> 56) & 0xFF) as u8,
|
||||
// mov rdx, 22 (length)
|
||||
0x48, 0xc7, 0xc2, 0x16, 0x00, 0x00, 0x00,
|
||||
// int 0x80
|
||||
0xcd, 0x80,
|
||||
// mov rax, 0 (EXIT syscall)
|
||||
0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00,
|
||||
// mov rdi, 0 (exit code)
|
||||
0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00,
|
||||
// int 0x80
|
||||
0xcd, 0x80,
|
||||
// hlt (should never reach here)
|
||||
0xf4,
|
||||
];
|
||||
|
||||
// Write the code
|
||||
for (i, &byte) in code.iter().enumerate() {
|
||||
core::ptr::write_volatile(user_code_ptr.add(i), byte);
|
||||
}
|
||||
|
||||
// Write the message after the code
|
||||
let message_ptr = user_code_ptr.add(message_offset as usize);
|
||||
for (i, &byte) in message.iter().enumerate() {
|
||||
core::ptr::write_volatile(message_ptr.add(i), byte);
|
||||
}
|
||||
}
|
||||
writeln!(serial, " Wrote user program ({} bytes code + {} bytes data)", 52, message.len()).ok();
|
||||
|
||||
// User stack pointer (top of stack page)
|
||||
let user_stack_top = user_stack_virt.as_u64() + 0x1000;
|
||||
|
||||
// First, let's test that user mode works by running code in kernel
|
||||
// that verifies the segments are correct
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Testing IRETQ mechanism with kernel mode...").ok();
|
||||
|
||||
// Test: Do a simple kernel-to-kernel IRETQ to verify the mechanism
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
// Push a simple return frame for kernel mode
|
||||
"push 0x10", // SS (kernel data)
|
||||
"push rsp", // RSP (current stack)
|
||||
"add qword ptr [rsp], 8", // Adjust for the push
|
||||
"pushfq", // RFLAGS
|
||||
"push 0x08", // CS (kernel code)
|
||||
"lea rax, [rip + 2f]", // RIP (label 2)
|
||||
"push rax",
|
||||
"iretq",
|
||||
"2:",
|
||||
out("rax") _,
|
||||
options(nostack)
|
||||
);
|
||||
}
|
||||
writeln!(serial, " Kernel IRETQ test passed!").ok();
|
||||
|
||||
// Debug: Print the GDT segment descriptor values
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Verifying GDT entries...").ok();
|
||||
let user_cs = arch::x86_64::gdt::user_cs();
|
||||
let user_ds = arch::x86_64::gdt::user_ds();
|
||||
writeln!(serial, " USER_CS selector: {:#x}", user_cs).ok();
|
||||
writeln!(serial, " USER_DS selector: {:#x}", user_ds).ok();
|
||||
|
||||
// Test loading user data segment while in kernel mode
|
||||
// This should work: loading DPL=3 segment with RPL=3 while CPL=0
|
||||
writeln!(serial, " Testing user segment load in kernel mode...").ok();
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"mov ax, {0:x}",
|
||||
"mov ds, ax", // This might fail with GPF if segment is invalid
|
||||
"mov ax, 0x10", // Restore kernel data segment
|
||||
"mov ds, ax",
|
||||
in(reg) user_ds as u64,
|
||||
out("rax") _,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
}
|
||||
writeln!(serial, " User segment load test passed!").ok();
|
||||
|
||||
writeln!(serial, "").ok();
|
||||
writeln!(serial, ">>> Jumping to user mode (ring 3)...").ok();
|
||||
writeln!(serial, " Entry: {:#x}, Stack: {:#x}", user_code_virt, user_stack_top).ok();
|
||||
|
||||
// Jump to user mode! (This won't return)
|
||||
unsafe {
|
||||
process::jump_to_user(user_code_virt.as_u64(), user_stack_top);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -444,15 +444,18 @@ pub fn remove_identity_mapping() {
|
||||
// ============================================================================
|
||||
|
||||
/// Ensure a PML4 entry exists, creating a PDPT if necessary
|
||||
fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> {
|
||||
fn ensure_pml4_entry(pml4_idx: usize, page_flags: u64) -> Result<(), PagingError> {
|
||||
let entry = read_pml4(pml4_idx);
|
||||
if !entry.is_present() {
|
||||
let frame = allocate_frame()?;
|
||||
let phys = frame.start_address();
|
||||
|
||||
// Link the new PDPT into the PML4 first
|
||||
// Use only table flags (PRESENT | WRITABLE) for intermediate entries
|
||||
let table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
// For user pages, the USER bit must be set in all intermediate entries
|
||||
let mut table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
if page_flags & flags::USER != 0 {
|
||||
table_flags |= flags::USER;
|
||||
}
|
||||
let new_entry = PageTableEntry::new(phys, table_flags);
|
||||
write_pml4(pml4_idx, new_entry);
|
||||
|
||||
@@ -462,13 +465,18 @@ fn ensure_pml4_entry(pml4_idx: usize, _flags: u64) -> Result<(), PagingError> {
|
||||
// Zero the new page table via recursive mapping
|
||||
// Now that PML4[pml4_idx] is set, pdpt_table_addr gives us access
|
||||
zero_page_table(pdpt_table_addr(pml4_idx));
|
||||
} else if page_flags & flags::USER != 0 && !entry.is_user() {
|
||||
// Existing entry needs USER bit added
|
||||
let mut updated = entry;
|
||||
updated.set_flags(entry.flags() | flags::USER);
|
||||
write_pml4(pml4_idx, updated);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure a PDPT entry exists, creating a PD if necessary
|
||||
fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(), PagingError> {
|
||||
ensure_pml4_entry(pml4_idx, flags)?;
|
||||
fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, page_flags: u64) -> Result<(), PagingError> {
|
||||
ensure_pml4_entry(pml4_idx, page_flags)?;
|
||||
|
||||
let entry = read_pdpt(pml4_idx, pdpt_idx);
|
||||
if entry.is_huge() {
|
||||
@@ -479,8 +487,11 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(),
|
||||
let phys = frame.start_address();
|
||||
|
||||
// Link the new PD into the PDPT first
|
||||
// Use only table flags for intermediate entries
|
||||
let table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
// For user pages, the USER bit must be set in all intermediate entries
|
||||
let mut table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
if page_flags & flags::USER != 0 {
|
||||
table_flags |= flags::USER;
|
||||
}
|
||||
let new_entry = PageTableEntry::new(phys, table_flags);
|
||||
write_pdpt(pml4_idx, pdpt_idx, new_entry);
|
||||
|
||||
@@ -489,13 +500,18 @@ fn ensure_pdpt_entry(pml4_idx: usize, pdpt_idx: usize, flags: u64) -> Result<(),
|
||||
|
||||
// Zero the new page table via recursive mapping
|
||||
zero_page_table(pd_table_addr(pml4_idx, pdpt_idx));
|
||||
} else if page_flags & flags::USER != 0 && !entry.is_user() {
|
||||
// Existing entry needs USER bit added
|
||||
let mut updated = entry;
|
||||
updated.set_flags(entry.flags() | flags::USER);
|
||||
write_pdpt(pml4_idx, pdpt_idx, updated);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure a PD entry exists, creating a PT if necessary
|
||||
fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64) -> Result<(), PagingError> {
|
||||
ensure_pdpt_entry(pml4_idx, pdpt_idx, flags)?;
|
||||
fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, page_flags: u64) -> Result<(), PagingError> {
|
||||
ensure_pdpt_entry(pml4_idx, pdpt_idx, page_flags)?;
|
||||
|
||||
let entry = read_pd(pml4_idx, pdpt_idx, pd_idx);
|
||||
if entry.is_huge() {
|
||||
@@ -506,8 +522,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64)
|
||||
let phys = frame.start_address();
|
||||
|
||||
// Link the new PT into the PD first
|
||||
// Use only table flags for intermediate entries
|
||||
let table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
// For user pages, the USER bit must be set in all intermediate entries
|
||||
let mut table_flags = flags::PRESENT | flags::WRITABLE;
|
||||
if page_flags & flags::USER != 0 {
|
||||
table_flags |= flags::USER;
|
||||
}
|
||||
let new_entry = PageTableEntry::new(phys, table_flags);
|
||||
write_pd(pml4_idx, pdpt_idx, pd_idx, new_entry);
|
||||
|
||||
@@ -516,6 +535,11 @@ fn ensure_pd_entry(pml4_idx: usize, pdpt_idx: usize, pd_idx: usize, flags: u64)
|
||||
|
||||
// Zero the new page table via recursive mapping
|
||||
zero_page_table(pt_table_addr(pml4_idx, pdpt_idx, pd_idx));
|
||||
} else if page_flags & flags::USER != 0 && !entry.is_user() {
|
||||
// Existing entry needs USER bit added
|
||||
let mut updated = entry;
|
||||
updated.set_flags(entry.flags() | flags::USER);
|
||||
write_pd(pml4_idx, pdpt_idx, pd_idx, updated);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -219,10 +219,17 @@ fn init_page_table(pml4_phys: PhysAddr) -> Result<(), ProcessError> {
|
||||
}
|
||||
|
||||
// Copy kernel-space entries (256-511) from current PML4
|
||||
// These include the recursive mapping (510) and kernel mapping (511)
|
||||
// EXCEPT for entry 510 (recursive mapping) which needs to point to THIS PML4
|
||||
for i in 256..512 {
|
||||
let entry = paging::read_pml4(i);
|
||||
core::ptr::write_volatile(pml4_ptr.add(i), entry.bits());
|
||||
if i == 510 {
|
||||
// Set recursive mapping to point to this new PML4 itself
|
||||
// Use PRESENT | WRITABLE flags (same as kernel recursive mapping)
|
||||
let self_ref_entry = pml4_phys.as_u64() | flags::PRESENT | flags::WRITABLE;
|
||||
core::ptr::write_volatile(pml4_ptr.add(i), self_ref_entry);
|
||||
} else {
|
||||
let entry = paging::read_pml4(i);
|
||||
core::ptr::write_volatile(pml4_ptr.add(i), entry.bits());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -321,3 +328,93 @@ pub fn switch_to_kernel() {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Jump to user mode (ring 3)
|
||||
///
|
||||
/// This uses IRETQ to transition from ring 0 to ring 3.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `entry` - User code entry point
|
||||
/// * `stack` - User stack pointer
|
||||
///
|
||||
/// # Safety
|
||||
/// The entry point and stack must be valid mapped addresses in user space.
|
||||
/// The current process's page table must have proper user mappings.
|
||||
pub unsafe fn jump_to_user(entry: u64, stack: u64) -> ! {
|
||||
use crate::arch::x86_64::gdt;
|
||||
use crate::serial::SerialPort;
|
||||
use core::fmt::Write;
|
||||
|
||||
let user_cs = gdt::user_cs() as u64;
|
||||
let user_ds = gdt::user_ds() as u64;
|
||||
|
||||
// Debug: Print what we're about to push
|
||||
let mut serial = unsafe { SerialPort::new(0x3F8) };
|
||||
writeln!(serial, " IRETQ frame: SS={:#x} RSP={:#x} RFLAGS=0x202 CS={:#x} RIP={:#x}",
|
||||
user_ds, stack, user_cs, entry).ok();
|
||||
|
||||
// Verify the entry point is mapped and accessible
|
||||
use crate::memory::paging;
|
||||
use crate::memory::VirtAddr;
|
||||
let entry_virt = VirtAddr::new(entry);
|
||||
if let Some((phys, size, flags)) = paging::get_mapping_info(entry_virt) {
|
||||
writeln!(serial, " Entry mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok();
|
||||
} else {
|
||||
writeln!(serial, " WARNING: Entry point {:#x} is NOT MAPPED!", entry).ok();
|
||||
}
|
||||
|
||||
// Verify the stack is mapped
|
||||
let stack_virt = VirtAddr::new(stack - 8); // Stack will be decremented
|
||||
if let Some((phys, size, flags)) = paging::get_mapping_info(stack_virt) {
|
||||
writeln!(serial, " Stack mapping: phys={:#x} size={:?} flags={:#x}", phys, size, flags).ok();
|
||||
} else {
|
||||
writeln!(serial, " WARNING: Stack {:#x} is NOT MAPPED!", stack).ok();
|
||||
}
|
||||
|
||||
// Flush TLB to ensure all page table changes are visible
|
||||
// This reloads CR3 which flushes the entire TLB
|
||||
unsafe {
|
||||
let cr3: u64;
|
||||
core::arch::asm!("mov {}, cr3", out(reg) cr3, options(nostack, preserves_flags));
|
||||
core::arch::asm!("mov cr3, {}", in(reg) cr3, options(nostack, preserves_flags));
|
||||
}
|
||||
|
||||
// IRETQ expects the stack to contain (top to bottom):
|
||||
// [RSP+0] RIP - last pushed, first popped
|
||||
// [RSP+8] CS
|
||||
// [RSP+16] RFLAGS
|
||||
// [RSP+24] RSP
|
||||
// [RSP+32] SS - first pushed, last popped
|
||||
//
|
||||
// Note: DS/ES/FS/GS must be set to valid user selectors before IRETQ
|
||||
// when transitioning to ring 3. Using null (0) is valid in 64-bit mode.
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
// Set DS/ES/FS/GS to null using r11 - avoid clobbering input registers
|
||||
"xor r11d, r11d",
|
||||
"mov ds, r11w",
|
||||
"mov es, r11w",
|
||||
"mov fs, r11w",
|
||||
"mov gs, r11w",
|
||||
|
||||
// Memory barrier to ensure all stores are complete
|
||||
"mfence",
|
||||
|
||||
// Build IRETQ frame on stack
|
||||
"push {user_ss}", // SS
|
||||
"push {stack}", // RSP
|
||||
"push 0x202", // RFLAGS (IF=1, reserved bit 1 = 1)
|
||||
"push {user_cs}", // CS
|
||||
"push {entry}", // RIP
|
||||
|
||||
// Jump to user mode
|
||||
"iretq",
|
||||
|
||||
user_cs = in(reg) user_cs,
|
||||
user_ss = in(reg) user_ds, // SS same as DS for user mode
|
||||
entry = in(reg) entry,
|
||||
stack = in(reg) stack,
|
||||
options(noreturn)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user