;; VM for MJ, CS 164, Fall 05 ;; - Revised by dbindel ;; - Based on Fall 02 VM for Tiger by rjf ;; - Based loosely on code from _Paradigms of AI Programming_ ;; Copyright (c) 1991 Peter Norvig. ;; Code is stored in a vector. Execution starts at the beginning, ;; and continues until an exit instruction. ;; ;; Data is stored on the heap or in a run-time stack. There are two ;; pointers associated with a stack: the stack pointer, and the frame ;; pointer. Function arguments and local data are referred to by offset ;; from the frame pointer. ;; ;; To call a function, push two blank slots onto the stack and then ;; push all the function arguments. Then execute "call f n". ;; The address f is the destination address of the function call; ;; n is the number of arguments to the function. The call instruction ;; will save the program counter and frame pointer in the two slots ;; before the argument list, then sets the new frame pointer to the ;; start of the args list, and sets the new stack pointer to the end ;; of the args list. There is an indirect call instruction, "calli n" ;; which is identical to call except that it pops the address from the ;; top of the stack. ;; ;; Typically, one wants to reserve a few more slots for local ;; variables; the instruction "frame m" pushes zeros onto the stack ;; until there are m slots between the frame pointer and the stack ;; pointer. ;; ;; The return function pops a return value from the stack, restores ;; the old frame pointer and program counter, moves the stack pointer ;; back to before where the return address and return frame pointer ;; were stored, and pushes the return value back onto the stack. ;; ;; Opcodes are: ;; ;; lvar i - Gets the ith variable from the stack frame ;; lset i - Sets the ith variable from the stack frame ;; pop - Pops the stack ;; swap - Swaps the top two elements ;; dup i - Duplicates the ith entry from top of stack ;; addi i - Adds an immediate value to the top of stack ;; alloc - Pops a size from the stack; allocates a new array of that size ;; alen - Pops an array, pushes the length ;; mem - Pops index and array; pushes array[index]. ;; smem - Pops value, index, and array; sets array[index] = value. ;; pushi i - Push an immediate value ;; pusha i - Push an address ;; ;; jump a - Jumps to a ;; jumpz a - Pops val, jumps to a if it's zero ;; jumpn a - Pops val, jumps to a if it isn't zero ;; jumpi - Pops an address and jumps to it ;; ;; call f n - Calls function f with n arguments. ;; calli n - Calls a function with n arguments. Address popped from stack. ;; frame m - Push zeros onto the stack until there are m slots in the frame. ;; return - Returns from a call. Stack should contain just the return val. ;; ;; +, *, -, <, and - Operations on two variables (e.g. pop a, b, push a-b) ;; not - Operates on one variable ;; ;; print - Pops an integer and prints it ;; read - Reads an integer and pushes it ;; exit s - Exits with status code s ;; ;; debug - Ignored by the machine; may be used for debugging hooks ;; break - Aborts execution; may be useful for debugging ;; --- Two-pass assembler --- ;; The purpose of the assembler is to resolve the mapping between symbolic ;; addresses (like L0 or L1) and real addresses. The assemble routine ;; returns both the code with all the addresses replaced by real addresses, ;; *and* the lookup table of addresses. The second output isn't strictly ;; necessary, but it's handy for debugging. (defun label-p (inst) (symbolp inst)) (defun assemble (code) "Turn a list of instructions into a code vector" (let* ((labels (asm-first-pass code)) (bcode (asm-second-pass code labels))) (values bcode labels))) (defun asm-first-pass (code) "Return the label assoc list" (let ((length 0) (labels nil)) (dolist (inst code) (if (label-p inst) (push (cons inst length) labels) (incf length))) labels)) (defun asm-second-pass (code labels) "Put code into code-vector, adjusting for labels." (coerce (map 'list #'(lambda (inst) (if (member (car inst) '(jump jumpn jumpz pusha call)) `(,(car inst) ,(cdr (assoc (second inst) labels)) ,@(cddr inst)) inst)) (remove-if #'label-p code)) 'vector)) ;; --- Pretty-print the assembly code, before or after assembly --- ;; When we pretty-print the code before running it through the two-pass ;; assembler, we can print out all the symbolic labels. When we've ;; compiled to bytecode, we've lost that symbolic information (though ;; it may still live somewhere other than the code vector). (defun pprint-code (code &optional (stdout t)) "Print all the instructions in a code list or vector" (cond ((vectorp code) (pprint-code-vec code stdout)) ((listp code) (pprint-code-list code stdout)) (t (error "Non-code argument to pprint-code")))) (defun pprint-code-list (code &optional (stdout t)) "Pretty print the assembler code in a list." (map 'list #'(lambda (inst) (if (label-p inst) (format stdout "~%~a:" inst) (format stdout "~% ~{~a ~}" inst))) code) nil) (defun pprint-code-vec (code &optional (stdout t)) "Pretty print a code vector" (let ((counter 0)) (map 'vector #'(lambda (inst) (format stdout "~%~8d: ~{~a ~}" counter inst) (incf counter)) code)) nil) ;; --- Virtual machine state --- ;; The virtual machine state is packaged up in a structure so that we ;; can do things like abort a computation halfway through and look at ;; the machine state -- without resorting to globals. This also means ;; that you can run your code in one virtual machine and my code in ;; another, and compare the machine states at specified breakpoints. ;; Besides the usual stuff (stack, pointer registers, I/O information, ;; code vector), there is also space for some debugging stuff: the ;; labels list from the assembler, a function to be called if there's ;; a program crash, and a hook for a tracer function to be called before ;; each instruction is executed. (defstruct vm-state code ; Code vector (stack (make-array 1000 :initial-element 0)) ; Run-time stack (fp 0) ; Frame pointer (sp 0) ; Stack pointer (pc 0) ; Program counter inst ; Current instruction (stdout t) ; Output stream (stderr t) ; Error stream (stdin nil) ; Input stream labels ; Label info (for debug) crash-hook ; Fatal error handler debug-hook) ; Debugger hook (defun vm-push (item vm) "Push an item on the VM stack" (setf (aref (vm-state-stack vm) (vm-state-sp vm)) item) (incf (vm-state-sp vm))) (defun vm-pop (vm) "Pop an item from the VM stack" (decf (vm-state-sp vm)) (aref (vm-state-stack vm) (vm-state-sp vm))) (defmacro vm-frame (offset vm) "Access a location relative to the stack frame" `(aref (vm-state-stack ,vm) (+ (vm-state-fp ,vm) ,offset))) (defmacro vm-stack (offset vm) "Access a location relative to the stack frame" `(aref (vm-state-stack ,vm) (- (vm-state-sp ,vm) (1+ ,offset)))) (defun vm-restart (vm) "Reset the VM to the start of execution" (setf (vm-state-fp vm) 0) (setf (vm-state-sp vm) 0) (setf (vm-state-pc vm) 0)) (defun vm-read-int (vm) "Read an integer and push it onto the stack" (let ((result (if (vm-state-stdin vm) (read (vm-state-stdin vm)) (progn (format t "~%Enter number: ") (read))))) (vm-push (if (integerp result) result 0) vm))) ;; --- Debug utilities --- ;; If we want to view the machine state, we probably don't want to see ;; *everything* simultaneously. So the vm-state-print function shows ;; just the pertinent information: the instruction, registers, and the ;; contents of the current stack frame. ;; ;; I provide two simple tracing utilities based on the state printer. ;; The first (vm-tracer) prints the machine state and pauses after every ;; instruction. If you want to bail out, you can enter 'q'; otherwise, ;; you keep hitting enter, and the machine keeps processing instructions. ;; More useful is the vm-method-tracer utility, which automatically turns ;; tracing on when the PC passes a particular label (e.g. the start of ;; a function), and turns the tracing off again immediately after the return. ;; ;; With the combination of the vm-debug-hook and the debug function, you ;; can do pretty much anything you want. You can use this facility to write ;; a fancy debugger (gdb-vm?), or to branch off and build facilities that ;; the VM doesn't naturally have. For example, if you decide that you ;; want to be able to put in pseudo-instructions like ;; ;; (debug print msg "-- Entering tricky code --") ;; ;; then you can do it. After all, most real machines have facilities ;; something like this (though these types of things run much more slowly ;; than ordinary instructions). Why should they have all the fun? (defun vm-state-print (vm) "Print VM state" (format t "~%Inst: ~{~a ~}" (vm-state-inst vm)) (format t "~%pc: ~a" (vm-state-pc vm)) (format t "~%sp: ~a" (vm-state-sp vm)) (format t "~%fp: ~a" (vm-state-fp vm)) (format t "~%Stack frame:") (do ((i (- (vm-state-sp vm) 1) (decf i))) ((< i (vm-state-fp vm))) (format t "~% ~3d: ~a" i (aref (vm-state-stack vm) i)))) (defun vm-tracer (vm) "Simple debugger: trace step-by-step through execution" (vm-state-print vm) (if (eq (read-char) #\q) (error "Abort from trace"))) (defun vm-method-tracer (label) "Just trace from the given label until the next return" (let ((trace-on nil)) #'(lambda (vm) (when (eq (car (rassoc (- (vm-state-pc vm) 1) (vm-state-labels vm))) label) (setf trace-on t)) (when trace-on (vm-tracer vm)) (when (equal (vm-state-inst vm) '(return)) (setf trace-on nil))))) ;; --- Run virtual machine --- ;; The virtual machine code is pretty short, and the source is ;; probably the best description of the instruction set. Note that ;; the instruction execution is wrapped in a handler-case clause (the ;; Lisp equivalent of try/catch), so if you put in a malformed ;; instruction, underflow the stack, etc. then you will get a clean ;; exit with the error text and a little diagnostic info. If you ;; want, you can launch into a debugger when the system errors out; ;; this is what crash-hook is for. You can also deliberately bail ;; with an error using the "break" instruction; this may be useful for ;; breakpoint debugging. (defun vm-fetch (vm) "Fetch an instruction and update the program counter" (setf (vm-state-inst vm) (aref (vm-state-code vm) (vm-state-pc vm))) (incf (vm-state-pc vm)) (vm-state-inst vm)) (defun run-vm (vm) "Run the code in a virtual machine." (labels ((vpush (v) (vm-push v vm)) (vpop () (vm-pop vm)) (fp () (vm-state-fp vm)) (sp () (vm-state-sp vm)) (pc () (vm-state-pc vm)) (frame (i) (vm-frame i vm)) (stack (i) (vm-stack i vm)) (set-fp (fp) (setf (vm-state-fp vm) fp)) (set-sp (sp) (setf (vm-state-sp vm) sp)) (set-pc (pc) (setf (vm-state-pc vm) pc)) (set-frame (i v) (setf (vm-frame i vm) v)) (set-stack (i v) (setf (vm-stack i vm) v)) (a1 () (second (vm-state-inst vm))) (a2 () (third (vm-state-inst vm)))) (handler-case (loop (vm-fetch vm) ; Fetch instruction / update PC (when (vm-state-debug-hook vm) ; Call debugging hooks (funcall (vm-state-debug-hook vm) vm)) (case (car (vm-state-inst vm)) ;; Variable/stack manipulation instructions (lvar (vpush (frame (a1)))) (lset (set-frame (a1) (vpop))) (pop (vpop)) (dup (vpush (stack (a1)))) (swap (let ((x (vpop)) (y (vpop))) (vpush x) (vpush y))) (addi (vpush (+ (a1) (vpop)))) (alloc (vpush (make-array (vpop) :initial-element 0))) (alen (vpush (length (vpop)))) (mem (let ((ind (vpop)) (base (vpop))) (vpush (elt base ind)))) (smem (let ((val (vpop)) (ind (vpop)) (base (vpop))) (setf (elt base ind) val))) (pushi (vpush (a1))) (pusha (vpush (a1))) ;; Branching instructions: (jump (set-pc (a1))) (jumpz (if (equal 0 (vpop)) (set-pc (a1)))) (jumpn (if (not (equal 0 (vpop))) (set-pc (a1)))) (jumpi (set-pc (vpop))) ;; Function call/return instructions: (call (let ((old-pc (pc)) (old-fp (fp))) (set-pc (a1)) (set-fp (- (sp) (a2))) (set-frame -1 old-pc) (set-frame -2 old-fp))) (calli (let ((addr (vpop)) (old-pc (pc)) (old-fp (fp))) (set-pc addr) (set-fp (- (sp) (a1))) (set-frame -1 old-pc) (set-frame -2 old-fp))) (frame (while (< (sp) (+ (fp) (a1))) (vpush 0))) (return (let ((ret-val (vpop)) (ret-pc (frame -1)) (ret-fp (frame -2))) (set-sp (- (fp) 2)) (set-fp ret-fp) (set-pc ret-pc) (vpush ret-val))) ;; Arithmetic and logical operations: (+ (vpush (+ (vpop) (vpop)))) (* (vpush (* (vpop) (vpop)))) (- (vpush (- (vpop) (vpop)))) (< (vpush (if (< (vpop) (vpop)) 1 0))) (not (vpush (if (eq (vpop) 0) 1 0))) (and (let ((op1 (vpop)) (op2 (vpop))) (vpush (if (or (eq op1 0) (eq op2 0)) 0 1)))) ;; Other: (print (format (vm-state-stdout vm) "~%~A" (vpop))) (read (vm-read-int vm)) (exit (format (vm-state-stderr vm) "~%Exited with code ~a~%" (a1)) (return)) ;; Debugger instructions (debug) (break (format (vm-state-stderr vm) "~%Encountered breakpoint at ~a" (pc))) (t (error "Unknown opcode: ~a" (vm-state-inst vm))))) (error (pe) (format t "~%Caught error: ~a" pe) (if (vm-state-crash-hook vm) (funcall (vm-state-crash-hook vm) vm) (vm-state-print vm))))))