diff -urN -X dontdiff linux-2.5.33/arch/i386/config.in linux-2.5.33.ltt.lockless/arch/i386/config.in --- linux-2.5.33/arch/i386/config.in Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/config.in Sun Sep 8 23:00:20 2002 @@ -399,6 +399,8 @@ source net/bluetooth/Config.in +source drivers/trace/Config.in + mainmenu_option next_comment comment 'Kernel hacking' if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff -urN -X dontdiff linux-2.5.33/arch/i386/kernel/entry.S linux-2.5.33.ltt.lockless/arch/i386/kernel/entry.S --- linux-2.5.33/arch/i386/kernel/entry.S Sat Aug 31 15:04:53 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/kernel/entry.S Sun Sep 8 23:00:20 2002 @@ -233,9 +233,27 @@ testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebx) jnz syscall_trace_entry syscall_call: +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + movl syscall_entry_trace_active, %eax + cmpl $1, %eax # are we tracing system call entries + jne no_syscall_entry_trace + movl %esp, %eax # copy the stack pointer + pushl %eax # pass the stack pointer copy + call trace_real_syscall_entry + addl $4,%esp # return stack to state before pass +no_syscall_entry_trace: + movl ORIG_EAX(%esp),%eax # restore eax to it's original content +#endif call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + movl syscall_exit_trace_active, %eax + cmpl $1, %eax # are we tracing system call exits + jne no_syscall_exit_trace + call trace_real_syscall_exit +no_syscall_exit_trace: +#endif cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret diff -urN -X dontdiff linux-2.5.33/arch/i386/kernel/irq.c linux-2.5.33.ltt.lockless/arch/i386/kernel/irq.c --- linux-2.5.33/arch/i386/kernel/irq.c Sat Aug 31 15:04:48 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/kernel/irq.c Sun Sep 8 23:00:20 2002 @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -202,6 +204,8 @@ { int status = 1; /* Force the "do bottom halves" bit */ + TRACE_IRQ_ENTRY(irq, !(user_mode(regs))); + if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); @@ -214,6 +218,8 @@ add_interrupt_randomness(irq); local_irq_disable(); + TRACE_IRQ_EXIT(); + return status; } diff -urN -X dontdiff linux-2.5.33/arch/i386/kernel/process.c linux-2.5.33.ltt.lockless/arch/i386/kernel/process.c --- linux-2.5.33/arch/i386/kernel/process.c Sat Aug 31 15:04:45 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/kernel/process.c Sun Sep 8 23:00:20 2002 @@ -34,6 +34,8 @@ #include #include +#include + #include #include #include @@ -505,6 +507,10 @@ /* Ok, create the new process.. */ p = do_fork(flags | CLONE_VM, 0, ®s, 0, NULL); +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + if(!IS_ERR(p)) + TRACE_PROCESS(TRACE_EV_PROCESS_KTHREAD, p->pid, (int) fn); +#endif return IS_ERR(p) ? PTR_ERR(p) : p->pid; } diff -urN -X dontdiff linux-2.5.33/arch/i386/kernel/sys_i386.c linux-2.5.33.ltt.lockless/arch/i386/kernel/sys_i386.c --- linux-2.5.33/arch/i386/kernel/sys_i386.c Sat Aug 31 15:04:59 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/kernel/sys_i386.c Sun Sep 8 23:00:20 2002 @@ -19,6 +19,8 @@ #include #include +#include + #include #include @@ -137,6 +139,8 @@ version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + TRACE_IPC(TRACE_EV_IPC_CALL, call, first); + switch (call) { case SEMOP: return sys_semop (first, (struct sembuf *)ptr, second); diff -urN -X dontdiff linux-2.5.33/arch/i386/kernel/traps.c linux-2.5.33.ltt.lockless/arch/i386/kernel/traps.c --- linux-2.5.33/arch/i386/kernel/traps.c Sat Aug 31 15:04:51 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/kernel/traps.c Sun Sep 8 23:00:20 2002 @@ -28,6 +28,8 @@ #include #endif +#include + #ifdef CONFIG_MCA #include #include @@ -275,6 +277,82 @@ printk("Kernel BUG\n"); } +/* Trace related code */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; + trace_syscall_entry trace_syscall_event; + + /* Set the syscall ID */ + trace_syscall_event.syscall_id = (uint8_t) regs->orig_eax; + + /* Set the address in any case */ + trace_syscall_event.address = regs->eip; + + /* Are we in the kernel (This is a kernel thread)? */ + if(!(regs->xcs & 3)) + /* Don't go digining anywhere */ + goto trace_syscall_end; + + /* Get the trace configuration */ + if(trace_get_config(&use_depth, + &use_bounds, + &seek_depth, + (void*)&lower_bound, + (void*)&upper_bound) < 0) + goto trace_syscall_end; + + /* Do we have to search for an eip address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack (bottom address since stacks grow downward) */ + stack = (unsigned long*) regs->esp; + + /* Keep on going until we reach the end of the process' stack limit (wherever it may be) */ + while(!get_user(addr, stack)) + { + /* Does this LOOK LIKE an address in the program */ + if((addr > current->mm->start_code) + &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + stack++; + } + } + +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} + +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + spinlock_t die_lock = SPIN_LOCK_UNLOCKED; void die(const char * str, struct pt_regs * regs, long err) @@ -308,6 +386,8 @@ static void inline do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs * regs, long error_code, siginfo_t *info) { + TRACE_TRAP_ENTRY(trapnr, regs->eip); + if (vm86 && regs->eflags & VM_MASK) goto vm86_trap; @@ -322,6 +402,7 @@ force_sig_info(signr, info, tsk); else force_sig(signr, tsk); + TRACE_TRAP_EXIT(); return; } @@ -347,14 +428,17 @@ regs->eip = fixup; else die(str, regs, error_code); + TRACE_TRAP_EXIT(); return; } vm86_trap: { int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); if (ret) goto trap_signal; + TRACE_TRAP_EXIT(); return; } + TRACE_TRAP_EXIT(); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -414,11 +498,15 @@ current->thread.error_code = error_code; current->thread.trap_no = 13; + TRACE_TRAP_ENTRY(13, regs->eip); force_sig(SIGSEGV, current); + TRACE_TRAP_EXIT(); return; gp_in_vm86: + TRACE_TRAP_ENTRY(13, regs->eip); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + TRACE_TRAP_EXIT(); return; gp_in_kernel: @@ -478,6 +566,11 @@ { unsigned char reason = inb(0x61); +#ifndef CONFIG_SMP /* On an SMP machine NMIs are used to implement a watchdog and will hang + the machine if traced. */ + TRACE_TRAP_ENTRY(2, regs->eip); +#endif + ++nmi_count(smp_processor_id()); if (!(reason & 0xc0)) { @@ -488,10 +581,12 @@ */ if (nmi_watchdog) { nmi_watchdog_tick(regs); + TRACE_TRAP_EXIT(); return; } #endif unknown_nmi_error(reason, regs); + TRACE_TRAP_EXIT(); return; } if (reason & 0x80) @@ -506,6 +601,8 @@ inb(0x71); /* dummy */ outb(0x0f, 0x70); inb(0x71); /* dummy */ + + TRACE_TRAP_EXIT(); } /* @@ -579,7 +676,9 @@ */ info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : (void *)regs->eip; + TRACE_TRAP_ENTRY(1, regs->eip); force_sig_info(SIGTRAP, &info, tsk); + TRACE_TRAP_EXIT(); /* Disable additional traps. They'll be re-enabled when * the signal is delivered. @@ -591,7 +690,9 @@ return; debug_vm86: + TRACE_TRAP_ENTRY(1, regs->eip); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + TRACE_TRAP_EXIT(); return; clear_TF: @@ -740,10 +841,12 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, long error_code) { + TRACE_TRAP_ENTRY(16, regs->eip); #if 0 /* No need to warn about this any longer. */ printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif + TRACE_TRAP_EXIT(); } /* @@ -772,8 +875,10 @@ { printk("math-emulation not enabled and no coprocessor found.\n"); printk("killing %s.\n",current->comm); + TRACE_TRAP_ENTRY(7, 0); force_sig(SIGFPE,current); schedule(); + TRACE_TRAP_EXIT(); } #endif /* CONFIG_MATH_EMULATION */ @@ -805,7 +910,6 @@ "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ } while (0) - /* * This needs to use 'idt_table' rather than 'idt', and * thus use the _nonmapped_ version of the IDT, as the diff -urN -X dontdiff linux-2.5.33/arch/i386/mm/fault.c linux-2.5.33.ltt.lockless/arch/i386/mm/fault.c --- linux-2.5.33/arch/i386/mm/fault.c Sat Aug 31 15:04:45 2002 +++ linux-2.5.33.ltt.lockless/arch/i386/mm/fault.c Sun Sep 8 23:00:20 2002 @@ -20,6 +20,8 @@ #include #include /* For unblank_screen() */ +#include + #include #include #include @@ -180,6 +182,8 @@ mm = tsk->mm; info.si_code = SEGV_MAPERR; + TRACE_TRAP_ENTRY(14, regs->eip); + /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. @@ -264,6 +268,7 @@ tsk->thread.screen_bitmap |= 1 << bit; } up_read(&mm->mmap_sem); + TRACE_TRAP_EXIT(); return; /* @@ -283,6 +288,7 @@ /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); + TRACE_TRAP_EXIT(); return; } @@ -297,6 +303,7 @@ if (nr == 6) { do_invalid_op(regs, 0); + TRACE_TRAP_EXIT(); return; } } @@ -306,6 +313,7 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; + TRACE_TRAP_EXIT(); return; } @@ -379,6 +387,7 @@ /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; + TRACE_TRAP_EXIT(); return; vmalloc_fault: @@ -412,6 +421,8 @@ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; + TRACE_TRAP_EXIT(); return; } + TRACE_TRAP_EXIT(); } diff -urN -X dontdiff linux-2.5.33/arch/mips/config.in linux-2.5.33.ltt.lockless/arch/mips/config.in --- linux-2.5.33/arch/mips/config.in Sat Aug 31 15:05:30 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/config.in Sun Sep 8 23:00:20 2002 @@ -477,6 +477,8 @@ source drivers/usb/Config.in +source drivers/trace/Config.in + mainmenu_option next_comment comment 'Kernel hacking' diff -urN -X dontdiff linux-2.5.33/arch/mips/ddb5476/irq.c linux-2.5.33.ltt.lockless/arch/mips/ddb5476/irq.c --- linux-2.5.33/arch/mips/ddb5476/irq.c Sat Aug 31 15:05:31 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/ddb5476/irq.c Sun Sep 8 23:00:20 2002 @@ -3,6 +3,10 @@ * * Copyright (C) 2000 Geert Uytterhoeven * Sony Software Development Center Europe (SDCE), Brussels + * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * */ #include #include @@ -12,6 +16,8 @@ #include #include +#include + #include #include #include @@ -184,6 +190,7 @@ /* Handle the timer interrupt first */ if (mask & (1 << NILE4_INT_GPT)) { nile4_disable_irq(NILE4_INT_GPT); + TRACE_IRQ_ENTRY(nile4_to_irq(NILE4_INT_GPT), ((regs->cp0_status & ST0_KSU) == KSU_KERNEL)); do_IRQ(nile4_to_irq(NILE4_INT_GPT), regs); nile4_enable_irq(NILE4_INT_GPT); mask &= ~(1 << NILE4_INT_GPT); @@ -193,8 +200,10 @@ nile4_disable_irq(nile4_irq); if (nile4_irq == NILE4_INT_INTC) { int i8259_irq = nile4_i8259_iack(); + TRACE_IRQ_ENTRY(i8259_irq, ((regs->cp0_status & ST0_KSU) == KSU_KERNEL)); i8259_do_irq(i8259_irq, regs); } else { + TRACE_IRQ_ENTRY(nile4_to_irq(nile4_irq), ((regs->cp0_status & ST0_KSU) == KSU_KERNEL)); do_IRQ(nile4_to_irq(nile4_irq), regs); } nile4_enable_irq(nile4_irq); @@ -204,6 +213,7 @@ ddb5476_led_d3(0); ddb5476_led_hex(nesting < 16 ? nesting : 15); #endif + TRACE_IRQ_EXIT(); } void ddb_local1_irqdispatch(void) diff -urN -X dontdiff linux-2.5.33/arch/mips/kernel/irq.c linux-2.5.33.ltt.lockless/arch/mips/kernel/irq.c --- linux-2.5.33/arch/mips/kernel/irq.c Sat Aug 31 15:04:50 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/kernel/irq.c Sun Sep 8 23:00:20 2002 @@ -7,6 +7,10 @@ * * Copyright (C) 1992 Linus Torvalds * Copyright (C) 1994 - 2000 Ralf Baechle + * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * */ #include #include @@ -19,6 +23,8 @@ #include #include +#include + #include /* @@ -244,6 +250,8 @@ struct irqaction * action; unsigned int status; + TRACE_IRQ_ENTRY(irq, ((regs->cp0_status & ST0_KSU) == KSU_KERNEL)); + kstat.irqs[cpu][irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); @@ -303,6 +311,8 @@ desc->handler->end(irq); spin_unlock(&desc->lock); + TRACE_IRQ_EXIT(); + if (softirq_pending(cpu)) do_softirq(); return 1; diff -urN -X dontdiff linux-2.5.33/arch/mips/kernel/scall_o32.S linux-2.5.33.ltt.lockless/arch/mips/kernel/scall_o32.S --- linux-2.5.33/arch/mips/kernel/scall_o32.S Sat Aug 31 15:05:31 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/kernel/scall_o32.S Sun Sep 8 23:00:20 2002 @@ -4,7 +4,12 @@ * for more details. * * Copyright (C) 1997, 1998, 1999, 2000 by Ralf Baechle + * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * */ + #include #include #include @@ -48,6 +53,10 @@ bgez t0, stackargs stack_done: +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + sw a3, PT_R26(sp) # save for syscall restart + b ltt_trace_a_syscall +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ sw a3, PT_R26(sp) # save for syscall restart #error lw t0, TASK_PTRACE($28) # syscall tracing enabled? andi t0, PT_TRACESYS @@ -98,6 +107,35 @@ /* ------------------------------------------------------------------------ */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +ltt_trace_a_syscall: + SAVE_STATIC + sw t2, PT_R1(sp) + move a0, sp + jal trace_real_syscall_entry + lw t2, PT_R1(sp) + + lw a0, PT_R4(sp) # Restore argument registers + lw a1, PT_R5(sp) + lw a2, PT_R6(sp) + lw a3, PT_R7(sp) + jalr t2 + + li t0, -EMAXERRNO - 1 # error? + sltu t0, t0, v0 + sw t0, PT_R7(sp) # set error flag + beqz t0, 1f + + negu v0 # error + sw v0, PT_R0(sp) # set flag for syscall restarting +1: sw v0, PT_R2(sp) # result + + jal trace_real_syscall_exit + j o32_ret_from_sys_call +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + +/* ------------------------------------------------------------------------ */ + trace_a_syscall: SAVE_STATIC sw t2, PT_R1(sp) diff -urN -X dontdiff linux-2.5.33/arch/mips/kernel/time.c linux-2.5.33.ltt.lockless/arch/mips/kernel/time.c --- linux-2.5.33/arch/mips/kernel/time.c Sat Aug 31 15:04:59 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/kernel/time.c Sun Sep 8 23:00:20 2002 @@ -368,6 +368,8 @@ { int cpu = smp_processor_id(); + TRACE_IRQ_ENTRY(irq, ((regs->cp0_status & ST0_KSU) == KSU_KERNEL)); + irq_enter(cpu, irq); kstat.irqs[cpu][irq]++; @@ -376,6 +378,8 @@ irq_exit(cpu, irq); + TRACE_IRQ_EXIT(); + if (softirq_pending(cpu)) do_softirq(); } diff -urN -X dontdiff linux-2.5.33/arch/mips/kernel/traps.c linux-2.5.33.ltt.lockless/arch/mips/kernel/traps.c --- linux-2.5.33/arch/mips/kernel/traps.c Sat Aug 31 15:04:58 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/kernel/traps.c Sun Sep 8 23:00:20 2002 @@ -10,6 +10,10 @@ * * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com * Copyright (C) 2000, 01 MIPS Technologies, Inc. + * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * */ #include #include @@ -20,6 +24,8 @@ #include #include +#include + #include #include #include @@ -34,6 +40,7 @@ #include #include #include +#include /* * Machine specific interrupt handlers @@ -186,6 +193,86 @@ } } +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ +#if 0 + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; +#endif + trace_syscall_entry trace_syscall_event; + + /* Set the syscall ID */ + trace_syscall_event.syscall_id = (uint8_t) (regs->regs[2] - __NR_Linux); /* v0 */ + + /* Set the address in any case */ + trace_syscall_event.address = regs->cp0_epc; + + /* Are we in the kernel (This is a kernel thread)? */ + if((regs->cp0_status & ST0_KSU) == KSU_KERNEL) + /* Don't go digining anywhere */ + goto trace_syscall_end; // takuzo: why? noone wants to monitor kernel threads? + +#if 0 // takuzo: I'll just do it later + /* Get the trace configuration */ + if(trace_get_config(&use_depth, + &use_bounds, + &seek_depth, + (void*)&lower_bound, + (void*)&upper_bound) < 0) + goto trace_syscall_end; + + /* Do we have to search for an eip address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack (bottom address since stacks grow downward) */ + stack = (unsigned long*) regs->esp; + + /* Keep on going until we reach the end of the process' stack limit (wherever it may be) */ + while(!get_user(addr, stack)) + { + /* Does this LOOK LIKE an address in the program */ + if((addr > current->mm->start_code) + &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + stack++; + } + } +#endif /* 0 */ + +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} + +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} + +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + spinlock_t die_lock; extern void __die(const char * str, struct pt_regs * regs, const char *where, @@ -312,20 +399,28 @@ asmlinkage void do_ibe(struct pt_regs *regs) { +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); ibe_board_handler(regs); +// TRACE_TRAP_EXIT(); } asmlinkage void do_dbe(struct pt_regs *regs) { +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); dbe_board_handler(regs); +// TRACE_TRAP_EXIT(); } asmlinkage void do_ov(struct pt_regs *regs) { - if (compute_return_epc(regs)) - return; +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); + if (compute_return_epc(regs)) { +// TRACE_TRAP_EXIT(); + return; + } force_sig(SIGFPE, current); +// TRACE_TRAP_EXIT(); } /* @@ -333,6 +428,7 @@ */ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) { +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); if (fcr31 & FPU_CSR_UNI_X) { extern void save_fp(struct task_struct *); extern void restore_fp(struct task_struct *); @@ -366,14 +462,18 @@ if (sig) force_sig(sig, current); +// TRACE_TRAP_EXIT(); return; } - if (compute_return_epc(regs)) + if (compute_return_epc(regs)){ +// TRACE_TRAP_EXIT(); return; + } force_sig(SIGFPE, current); printk(KERN_DEBUG "Sent send SIGFPE to %s\n", current->comm); +// TRACE_TRAP_EXIT(); } static inline int get_insn_opcode(struct pt_regs *regs, unsigned int *opcode) @@ -395,6 +495,8 @@ unsigned int opcode, bcode; unsigned int *epc; +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); + epc = (unsigned int *) regs->cp0_epc + ((regs->cp0_cause & CAUSEF_BD) != 0); if (get_user(opcode, epc)) @@ -428,10 +530,12 @@ default: force_sig(SIGTRAP, current); } +// TRACE_TRAP_EXIT(); return; sigsegv: force_sig(SIGSEGV, current); +// TRACE_TRAP_EXIT(); } asmlinkage void do_tr(struct pt_regs *regs) @@ -493,21 +597,27 @@ if (!user_mode(regs)) BUG(); +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); if (!get_insn_opcode(regs, &opcode)) { if ((opcode & OPCODE) == LL) { simulate_ll(regs, opcode); + // TRACE_TRAP_EXIT(); return; } if ((opcode & OPCODE) == SC) { simulate_sc(regs, opcode); + // TRACE_TRAP_EXIT(); return; } } - if (compute_return_epc(regs)) + if (compute_return_epc(regs)) { + // TRACE_TRAP_EXIT(); return; + } force_sig(SIGILL, current); +// TRACE_TRAP_EXIT(); } /* @@ -623,6 +733,8 @@ void fpu_emulator_init_fpu(void); int sig; +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); + cpid = (regs->cp0_cause >> CAUSEB_CE) & 3; if (cpid != 1) goto bad_cid; @@ -631,8 +743,10 @@ goto fp_emul; regs->cp0_status |= ST0_CU1; - if (last_task_used_math == current) + if (last_task_used_math == current) { + // TRACE_TRAP_EXIT(); return; + } if (current->used_math) { /* Using the FPU again. */ lazy_fpu_switch(last_task_used_math); @@ -641,6 +755,7 @@ current->used_math = 1; } last_task_used_math = current; +// TRACE_TRAP_EXIT(); return; fp_emul: @@ -654,10 +769,12 @@ last_task_used_math = current; if (sig) force_sig(sig, current); +// TRACE_TRAP_EXIT(); return; bad_cid: force_sig(SIGILL, current); +// TRACE_TRAP_EXIT(); } asmlinkage void do_watch(struct pt_regs *regs) @@ -666,7 +783,9 @@ * We use the watch exception where available to detect stack * overflows. */ +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); show_regs(regs); +// TRACE_TRACE_EXIT(); panic("Caught WATCH exception - probably caused by stack overflow."); } @@ -684,7 +803,9 @@ * caused by a new unknown cpu type or after another deadly * hard/software error. */ +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); show_regs(regs); +// TRACE_TRACE_EXIT(); panic("Caught reserved exception - should not happen."); } diff -urN -X dontdiff linux-2.5.33/arch/mips/kernel/unaligned.c linux-2.5.33.ltt.lockless/arch/mips/kernel/unaligned.c --- linux-2.5.33/arch/mips/kernel/unaligned.c Sat Aug 31 15:04:47 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/kernel/unaligned.c Sun Sep 8 23:00:20 2002 @@ -8,6 +8,9 @@ * Copyright (C) 1996, 1998 by Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * * This file contains exception handler for address error exception with the * special capability to execute faulting instructions in software. The * handler does not try to handle the case when the program counter points @@ -78,6 +81,8 @@ #include #include +#include + #include #include #include @@ -400,6 +405,8 @@ return; } +// TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); + /* * Did we catch a fault trying to load an instruction? * This also catches attempts to activate MIPS16 code on @@ -409,8 +416,10 @@ goto sigbus; pc = regs->cp0_epc + ((regs->cp0_cause & CAUSEF_BD) ? 4 : 0); - if (compute_return_epc(regs)) + if (compute_return_epc(regs)){ + // TRACE_TRAP_EXIT(); return; + } if ((current->thread.mflags & MF_FIXADE) == 0) goto sigbus; @@ -419,11 +428,13 @@ unaligned_instructions++; #endif +// TRACE_TRAP_EXIT(); return; sigbus: die_if_kernel ("Kernel unaligned instruction access", regs); force_sig(SIGBUS, current); +// TRACE_TRAP_EXIT(); return; } diff -urN -X dontdiff linux-2.5.33/arch/mips/mm/fault.c linux-2.5.33.ltt.lockless/arch/mips/mm/fault.c --- linux-2.5.33/arch/mips/mm/fault.c Sat Aug 31 15:04:53 2002 +++ linux-2.5.33.ltt.lockless/arch/mips/mm/fault.c Sun Sep 8 23:00:20 2002 @@ -4,6 +4,10 @@ * for more details. * * Copyright (C) 1995 - 2000 by Ralf Baechle + * + * ---- for LTT patch ---- + * Copyright (C) 2001 Takuzo O'Hara (takuzo@sm.sony.co.jp). + * */ #include #include @@ -19,12 +23,18 @@ #include #include +#include + #include #include #include #include #include #include +#include +#include + +#define EXC_CODE(x) ((CAUSEF_EXCCODE & (x)) >> CAUSEB_EXCCODE) #define development_version (LINUX_VERSION_CODE & 0x100) @@ -87,6 +97,10 @@ * we can handle it.. */ good_area: + // takuzo: + // I only made this to log page faults for reasonable usermode context. + // page faults in kernel, fixups, sigbuses are siliently ignored... + TRACE_TRAP_ENTRY(EXC_CODE(regs->cp0_cause), regs->cp0_epc); info.si_code = SEGV_ACCERR; if (write) { @@ -116,6 +130,7 @@ } up_read(&mm->mmap_sem); +// TRACE_TRAP_EXIT(); return; /* diff -urN -X dontdiff linux-2.5.33/arch/ppc/config.in linux-2.5.33.ltt.lockless/arch/ppc/config.in --- linux-2.5.33/arch/ppc/config.in Sat Aug 31 15:04:45 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/config.in Sun Sep 8 23:00:20 2002 @@ -588,6 +588,8 @@ source lib/Config.in +source drivers/trace/Config.in + mainmenu_option next_comment comment 'Kernel hacking' diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/entry.S linux-2.5.33.ltt.lockless/arch/ppc/kernel/entry.S --- linux-2.5.33/arch/ppc/kernel/entry.S Sat Aug 31 15:05:34 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/entry.S Sun Sep 8 23:00:20 2002 @@ -106,6 +106,32 @@ RFI #endif /* CONFIG_PPC_ISERIES */ +/* LTT stuff */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +#define TRACE_REAL_ASM_SYSCALL_ENTRY \ + addi r3,r1,STACK_FRAME_OVERHEAD; /* Put pointer to registers into r3 */ \ + mflr r29; /* Save LR */ \ + bl trace_real_syscall_entry; /* Call real trace function */ \ + mtlr r29; /* Restore LR */ \ + lwz r0,GPR0(r1); /* Restore original registers */ \ + lwz r3,GPR3(r1); \ + lwz r4,GPR4(r1); \ + lwz r5,GPR5(r1); \ + lwz r6,GPR6(r1); \ + lwz r7,GPR7(r1); \ + lwz r8,GPR8(r1); +#define TRACE_REAL_ASM_SYSCALL_EXIT \ + bl trace_real_syscall_exit; /* Call real trace function */ \ + lwz r0,GPR0(r1); /* Restore original registers */ \ + lwz r3,RESULT(r1); \ + lwz r4,GPR4(r1); \ + lwz r5,GPR5(r1); \ + lwz r6,GPR6(r1); \ + lwz r7,GPR7(r1); \ + lwz r8,GPR8(r1); \ + addi r9,r1,STACK_FRAME_OVERHEAD; +#endif + /* * Handle a system call. */ @@ -136,6 +162,9 @@ slwi r0,r0,2 lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ mtlr r10 +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + TRACE_REAL_ASM_SYSCALL_ENTRY ; +#endif addi r9,r1,STACK_FRAME_OVERHEAD blrl /* Call handler */ .globl ret_from_syscall @@ -143,6 +172,10 @@ #ifdef SHOW_SYSCALLS bl do_show_syscall_exit #endif +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + stw r3,RESULT(r1) /* Save result */ + TRACE_REAL_ASM_SYSCALL_EXIT ; +#endif mr r6,r3 li r11,-_LAST_ERRNO cmpl 0,r3,r11 diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/irq.c linux-2.5.33.ltt.lockless/arch/ppc/kernel/irq.c --- linux-2.5.33/arch/ppc/kernel/irq.c Sat Aug 31 15:04:53 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/irq.c Sun Sep 8 23:00:20 2002 @@ -49,6 +49,8 @@ #include #include +#include + #include #include #include @@ -427,6 +429,8 @@ int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; + TRACE_IRQ_ENTRY(irq, !(user_mode(regs))); + kstat.irqs[cpu][irq]++; spin_lock(&desc->lock); ack_irq(irq); @@ -504,6 +508,8 @@ irq_desc[irq].handler->enable(irq); } spin_unlock(&desc->lock); + + TRACE_IRQ_EXIT(); } #ifndef CONFIG_PPC_ISERIES /* iSeries version is in iSeries_pic.c */ diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/misc.S linux-2.5.33.ltt.lockless/arch/ppc/kernel/misc.S --- linux-2.5.33/arch/ppc/kernel/misc.S Sat Aug 31 15:04:56 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/misc.S Sun Sep 8 23:00:20 2002 @@ -1016,7 +1016,11 @@ * Create a kernel thread * kernel_thread(fn, arg, flags) */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +_GLOBAL(original_kernel_thread) +#else _GLOBAL(kernel_thread) +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ stwu r1,-16(r1) stw r30,8(r1) stw r31,12(r1) diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/process.c linux-2.5.33.ltt.lockless/arch/ppc/kernel/process.c --- linux-2.5.33/arch/ppc/kernel/process.c Sat Aug 31 15:05:10 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/process.c Sun Sep 8 23:00:20 2002 @@ -37,6 +37,8 @@ #include #include +#include + #include #include #include @@ -297,6 +299,19 @@ show_stack((unsigned long *)regs->gpr[1]); } +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +long original_kernel_thread(int (*fn) (void *), void* arg, unsigned long flags); +long kernel_thread(int (*fn) (void *), void* arg, unsigned long flags) +{ + long retval; + + retval = original_kernel_thread(fn, arg, flags); + if (retval > 0) + TRACE_PROCESS(TRACE_EV_PROCESS_KTHREAD, retval, (int) fn); + return retval; +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + void exit_thread(void) { if (last_task_used_math == current) diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/syscalls.c linux-2.5.33.ltt.lockless/arch/ppc/kernel/syscalls.c --- linux-2.5.33/arch/ppc/kernel/syscalls.c Sat Aug 31 15:05:34 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/syscalls.c Sun Sep 8 23:00:20 2002 @@ -39,6 +39,8 @@ #include #include +#include + #include #include #include @@ -85,6 +87,8 @@ version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + TRACE_IPC(TRACE_EV_IPC_CALL, call, first); + ret = -EINVAL; switch (call) { case SEMOP: diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/time.c linux-2.5.33.ltt.lockless/arch/ppc/kernel/time.c --- linux-2.5.33/arch/ppc/kernel/time.c Sat Aug 31 15:04:52 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/time.c Sun Sep 8 23:00:20 2002 @@ -60,6 +60,8 @@ #include #include +#include + #include #include #include @@ -161,6 +163,8 @@ if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); + TRACE_TRAP_ENTRY(regs->trap, instruction_pointer(regs)); + irq_enter(); while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) < 0) { @@ -215,6 +219,8 @@ ppc_md.heartbeat(); irq_exit(); + + TRACE_TRAP_EXIT(); } #endif /* CONFIG_PPC_ISERIES */ diff -urN -X dontdiff linux-2.5.33/arch/ppc/kernel/traps.c linux-2.5.33.ltt.lockless/arch/ppc/kernel/traps.c --- linux-2.5.33/arch/ppc/kernel/traps.c Sat Aug 31 15:04:52 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/kernel/traps.c Sun Sep 8 23:00:20 2002 @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -111,7 +113,9 @@ debugger(regs); die("Exception in kernel mode", regs, signr); } + TRACE_TRAP_ENTRY(regs->trap, instruction_pointer(regs)); force_sig(signr, current); + TRACE_TRAP_EXIT(); } void @@ -370,6 +374,89 @@ panic("kernel stack overflow"); } +/* Trace related code */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; + trace_syscall_entry trace_syscall_event; + + /* Set the syscall ID */ + trace_syscall_event.syscall_id = (uint8_t) regs->gpr[0]; + + /* Set the address in any case */ + trace_syscall_event.address = instruction_pointer(regs); + + /* Are we in the kernel (This is a kernel thread)? */ + if(!user_mode(regs)) + /* Don't go digining anywhere */ + goto trace_syscall_end; + + /* Get the trace configuration */ + if(trace_get_config(&use_depth, + &use_bounds, + &seek_depth, + (void*)&lower_bound, + (void*)&upper_bound) < 0) + goto trace_syscall_end; + + /* Do we have to search for an eip address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack (bottom address since stacks grow downward) */ + stack = (unsigned long*) regs->gpr[1]; + + /* Skip over first stack frame as the return address isn't valid */ + if(get_user(addr, stack)) + goto trace_syscall_end; + stack = (unsigned long*) addr; + + /* Keep on going until we reach the end of the process' stack limit (wherever it may be) */ + while(!get_user(addr, stack + 1)) /* "stack + 1", since this is where the IP is */ + { + /* Does this LOOK LIKE an address in the program */ + if((addr > current->mm->start_code) + &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + if(get_user(addr, stack)) + goto trace_syscall_end; + stack = (unsigned long*) addr; + } + } + +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} + +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + void nonrecoverable_exception(struct pt_regs *regs) { printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", diff -urN -X dontdiff linux-2.5.33/arch/ppc/mm/fault.c linux-2.5.33.ltt.lockless/arch/ppc/mm/fault.c --- linux-2.5.33/arch/ppc/mm/fault.c Sat Aug 31 15:04:54 2002 +++ linux-2.5.33.ltt.lockless/arch/ppc/mm/fault.c Sun Sep 8 23:00:20 2002 @@ -31,6 +31,8 @@ #include #include +#include + #include #include #include @@ -88,22 +90,28 @@ is_write = error_code & 0x02000000; #endif /* CONFIG_4xx */ + TRACE_TRAP_ENTRY(regs->trap, instruction_pointer(regs)); + #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) if (debugger_fault_handler && TRAP(regs) == 0x300) { debugger_fault_handler(regs); + TRACE_TRAP_EXIT(); return; } #if !defined(CONFIG_4xx) if (error_code & 0x00400000) { /* DABR match */ - if (debugger_dabr_match(regs)) + if (debugger_dabr_match(regs)){ + TRACE_TRAP_EXIT(); return; + } } #endif /* !CONFIG_4xx */ #endif /* CONFIG_XMON || CONFIG_KGDB */ if (in_interrupt() || mm == NULL) { bad_page_fault(regs, address, SIGSEGV); + TRACE_TRAP_EXIT(); return; } down_read(&mm->mmap_sem); @@ -168,6 +176,7 @@ _tlbie(address); pte_unmap(ptep); up_read(&mm->mmap_sem); + TRACE_TRAP_EXIT(); return; } if (ptep != NULL) @@ -210,6 +219,7 @@ * -- Cort */ pte_misses++; + TRACE_TRAP_EXIT(); return; bad_area: @@ -223,10 +233,12 @@ info.si_code = code; info.si_addr = (void *) address; force_sig_info(SIGSEGV, &info, current); + TRACE_TRAP_EXIT(); return; } bad_page_fault(regs, address, SIGSEGV); + TRACE_TRAP_EXIT(); return; /* @@ -244,6 +256,7 @@ if (user_mode(regs)) do_exit(SIGKILL); bad_page_fault(regs, address, SIGKILL); + TRACE_TRAP_EXIT(); return; do_sigbus: @@ -255,6 +268,7 @@ force_sig_info (SIGBUS, &info, current); if (!user_mode(regs)) bad_page_fault(regs, address, SIGBUS); + TRACE_TRAP_EXIT(); } /* diff -urN -X dontdiff linux-2.5.33/arch/s390/config.in linux-2.5.33.ltt.lockless/arch/s390/config.in --- linux-2.5.33/arch/s390/config.in Sat Aug 31 15:04:53 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/config.in Sun Sep 8 23:00:20 2002 @@ -65,6 +65,8 @@ source fs/Config.in +source drivers/trace/Config.in + mainmenu_option next_comment comment 'Kernel hacking' diff -urN -X dontdiff linux-2.5.33/arch/s390/kernel/entry.S linux-2.5.33.ltt.lockless/arch/s390/kernel/entry.S --- linux-2.5.33/arch/s390/kernel/entry.S Sat Aug 31 15:04:51 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/kernel/entry.S Sun Sep 8 23:00:20 2002 @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation */ #include @@ -183,6 +184,14 @@ sll %r8,2 GET_THREAD_INFO # load pointer to task_struct to R9 stosm 24(%r15),0x03 # reenable interrupts +/* call to ltt trace done here. R8 has the syscall (svc) number to trace */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) /* tjh - ltt port */ + /* add call to trace_real_syscall_entry */ + la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter + l %r1,BASED(.Ltracesysent) + basr %r14,%r1 + lm %r0,%r6,SP_R0(%r15) /* restore call clobbered regs tjh */ +#endif l %r8,sys_call_table-entry_base(%r8,%r13) # get system call addr. tm __TI_flags+3(%r9),_TIF_SYSCALL_TRACE bo BASED(sysc_tracesys) @@ -191,6 +200,13 @@ # ATTENTION: check sys_execve_glue before # changing anything here !! +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) /* tjh - ltt port * + /* add call to trace_real_syscall_exit */ + la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter + l %r1,BASED(.Ltracesysext) + basr %r14,%r1 + lm %r0,%r6,SP_R0(%r15) /* restore call clobbered regs */ +#endif sysc_return: stnsm 24(%r15),0xfc # disable I/O and ext. interrupts tm __TI_flags+3(%r9),_TIF_WORK_MASK @@ -896,6 +912,8 @@ .Lsigaltstack: .long sys_sigaltstack .Ltrace: .long syscall_trace .Lvfork: .long sys_vfork +.Ltracesysent: .long trace_real_syscall_entry +.Ltracesysext: .long trace_real_syscall_exit #ifdef CONFIG_SMP .Lschedtail: .long schedule_tail #endif diff -urN -X dontdiff linux-2.5.33/arch/s390/kernel/process.c linux-2.5.33.ltt.lockless/arch/s390/kernel/process.c --- linux-2.5.33/arch/s390/kernel/process.c Sat Aug 31 15:05:31 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/kernel/process.c Sun Sep 8 23:00:20 2002 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,10 @@ : "d" (clone_arg), "i" (__NR_clone), "i" (__NR_exit), "d" (arg), "d" (fn), "i" (__LC_KERNEL_STACK) , "i" (-STACK_FRAME_OVERHEAD) : "2", "3", "4" ); +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + if (retval > 0) + TRACE_PROCESS(TRACE_EV_PROCESS_KTHREAD, retval, (int) fn); +#endif return retval; } diff -urN -X dontdiff linux-2.5.33/arch/s390/kernel/sys_s390.c linux-2.5.33.ltt.lockless/arch/s390/kernel/sys_s390.c --- linux-2.5.33/arch/s390/kernel/sys_s390.c Sat Aug 31 15:04:57 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/kernel/sys_s390.c Sun Sep 8 23:00:20 2002 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,8 @@ struct ipc_kludge tmp; int ret; + TRACE_IPC(TRACE_EV_IPC_CALL, call, first); + switch (call) { case SEMOP: return sys_semop (first, (struct sembuf *)ptr, second); diff -urN -X dontdiff linux-2.5.33/arch/s390/kernel/traps.c linux-2.5.33.ltt.lockless/arch/s390/kernel/traps.c --- linux-2.5.33/arch/s390/kernel/traps.c Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/kernel/traps.c Sun Sep 8 23:00:20 2002 @@ -5,6 +5,7 @@ * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds @@ -28,6 +29,8 @@ #include #include +#include + #include #include #include @@ -275,6 +278,9 @@ static void inline do_trap(long interruption_code, int signr, char *str, struct pt_regs *regs, siginfo_t *info) { + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; + /* * We got all needed information from the lowcore and can * now safely switch on interrupts. @@ -282,6 +288,10 @@ if (regs->psw.mask & PSW_PROBLEM_STATE) local_irq_enable(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); + if (regs->psw.mask & PSW_PROBLEM_STATE) { struct task_struct *tsk = current; @@ -310,6 +320,7 @@ else die(str, regs, interruption_code); } + TRACE_TRAP_EXIT(); } static inline void *get_check_address(struct pt_regs *regs) @@ -407,6 +418,8 @@ { __u8 opcode[6]; __u16 *location; + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc); @@ -418,6 +431,10 @@ if (regs->psw.mask & PSW_PROBLEM_STATE) local_irq_enable(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); + if (regs->psw.mask & PSW_PROBLEM_STATE) get_user(*((__u16 *) opcode), location); else @@ -458,6 +475,7 @@ else if (signal) do_trap(interruption_code, signal, "illegal operation", regs, NULL); + TRACE_TRAP_EXIT(); } @@ -468,6 +486,8 @@ { __u8 opcode[6]; __u16 *location = NULL; + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *) get_check_address(regs); @@ -478,6 +498,10 @@ */ if (regs->psw.mask & PSW_PROBLEM_STATE) local_irq_enable(); + + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); if (regs->psw.mask & PSW_PROBLEM_STATE) { get_user(*((__u16 *) opcode), location); @@ -522,6 +546,7 @@ do_trap(interruption_code, signal, "specification exception", regs, &info); } + TRACE_TRAP_EXIT(); } #else DO_ERROR_INFO(SIGILL, "specification exception", specification_exception, @@ -531,6 +556,8 @@ asmlinkage void data_exception(struct pt_regs * regs, long interruption_code) { __u16 *location; + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *) get_check_address(regs); @@ -542,6 +569,10 @@ if (regs->psw.mask & PSW_PROBLEM_STATE) local_irq_enable(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); + if (MACHINE_HAS_IEEE) __asm__ volatile ("stfpc %0\n\t" : "=m" (current->thread.fp_regs.fpc)); @@ -617,6 +648,7 @@ do_trap(interruption_code, signal, "data exception", regs, &info); } + TRACE_TRAP_EXIT(); } @@ -671,6 +703,11 @@ void handle_per_exception(struct pt_regs *regs) { + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+6,&S390_lowcore.pgm_code,2); /* copy the interrupt code */ + TRACE_TRAP_ENTRY(ltt_interruption_code,(regs->psw.addr & PSW_ADDR_MASK)); if(regs->psw.mask&PSW_PROBLEM_STATE) { per_struct *per_info=¤t->thread.per_info; @@ -687,5 +724,91 @@ /* Hopefully switching off per tracing will help us survive */ regs->psw.mask &= ~PSW_PER_MASK; } + TRACE_TRAP_EXIT(); } +/* ltt - Trace related code */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; + unsigned long temp_stack; + trace_syscall_entry trace_syscall_event; + /* Set the syscall ID */ + /* Register 8 is setup just prior to the call */ + /* This instruction is just following linkage */ + /* so it's ok. If moved and chance of R8 being */ + /* clobbered, would need to dig it out of the stack */ + __asm__ volatile( + " stc 8,%0\n\t" + : "=m" (trace_syscall_event.syscall_id)); + /* get the psw address */ + trace_syscall_event.address = regs->psw.addr; + /* and off the hi-order bit */ + trace_syscall_event.address &= PSW_ADDR_MASK; + if(!(user_mode(regs))) /* if kernel mode, return */ + goto trace_syscall_end; + /* Get the trace configuration - if none, return */ + if(trace_get_config(&use_depth, + &use_bounds, + &seek_depth, + (void*)&lower_bound, + (void*)&upper_bound) < 0) + goto trace_syscall_end; + /* Do we have to search for an instruction pointer address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack */ + /* stack pointer is register 15 */ + stack = (unsigned long*) regs->gprs[15]; /* stack pointer */ + /* Keep on going until we reach the end of the process' stack limit */ + do + { + get_user(addr,stack+14); /* get the program address +0x38 */ + /* and off the hi-order bit */ + addr &= PSW_ADDR_MASK; + /* Does this LOOK LIKE an address in the program */ + if ((addr > current->mm->start_code) + &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) + && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + get_user(temp_stack,stack); /* get contents of stack */ + temp_stack &= PSW_ADDR_MASK; /* and off hi order bit */ + stack = (unsigned long *)temp_stack; /* move into stack */ + /* stack may or may not go to zero when end hit */ + /* using 0x7fffffff-_STK_LIM to validate that the address is */ + /* within the range of a valid stack address */ + /* If outside that range, exit the loop, stack end must have */ + /* been hit. */ + } while (stack >= (unsigned long *)(0x7fffffff-_STK_LIM)); + } +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ diff -urN -X dontdiff linux-2.5.33/arch/s390/mm/fault.c linux-2.5.33.ltt.lockless/arch/s390/mm/fault.c --- linux-2.5.33/arch/s390/mm/fault.c Sat Aug 31 15:05:31 2002 +++ linux-2.5.33.ltt.lockless/arch/s390/mm/fault.c Sun Sep 8 23:00:20 2002 @@ -5,6 +5,7 @@ * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (uweigand@de.ibm.com) + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/mm/fault.c" * Copyright (C) 1995 Linus Torvalds @@ -25,6 +26,7 @@ #include #include #include +#include #include #include @@ -154,6 +156,8 @@ int user_address; unsigned long fixup; int si_code = SEGV_MAPERR; + trapid_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; tsk = current; mm = tsk->mm; @@ -201,6 +205,9 @@ */ local_irq_enable(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&error_code,sizeof(error_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code,(regs->psw.addr & PSW_ADDR_MASK)); down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -247,6 +254,7 @@ } up_read(&mm->mmap_sem); + TRACE_TRAP_EXIT(); return; /* @@ -261,6 +269,7 @@ tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; force_sigsegv(regs, error_code, si_code, address); + TRACE_TRAP_EXIT(); return; } @@ -268,6 +277,7 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->psw.addr)) != 0) { regs->psw.addr = fixup; + TRACE_TRAP_EXIT(); return; } @@ -315,6 +325,8 @@ /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_PROBLEM_STATE)) goto no_context; + + TRACE_TRAP_EXIT(); } void do_protection_exception(struct pt_regs *regs, unsigned long error_code) diff -urN -X dontdiff linux-2.5.33/arch/sh/config.in linux-2.5.33.ltt.lockless/arch/sh/config.in --- linux-2.5.33/arch/sh/config.in Sat Aug 31 15:05:23 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/config.in Sun Sep 8 23:00:20 2002 @@ -358,6 +358,8 @@ fi endmenu +source drivers/trace/Config.in + mainmenu_option next_comment comment 'Kernel hacking' @@ -366,6 +368,7 @@ if [ "$CONFIG_SH_STANDARD_BIOS" = "y" ]; then bool 'Early printk support' CONFIG_SH_EARLY_PRINTK fi + endmenu source security/Config.in diff -urN -X dontdiff linux-2.5.33/arch/sh/kernel/entry.S linux-2.5.33.ltt.lockless/arch/sh/kernel/entry.S --- linux-2.5.33/arch/sh/kernel/entry.S Sat Aug 31 15:04:57 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/kernel/entry.S Sun Sep 8 23:00:20 2002 @@ -370,6 +370,20 @@ mov.l r10, @r14 ! set syscall_nr STI() ! +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + ! TODO: for i386 this code only happens when not ptrace'd + mov r15, r4 ! pass pt_regs* as first arg + mov.l __trsen, r11 ! Call trace_real_syscall_entry() + jsr @r11 ! (will chomp R[0-7]) + nop + ! Reload R4-R7 from kernel stack + mov.l @(OFF_R4,r15), r4 ! arg0 + mov.l @(OFF_R5,r15), r5 + mov.l @(OFF_R6,r15), r6 + mov.l @(OFF_R7,r15), r7 ! arg3 + mov.l @(OFF_R3,r15), r3 ! syscall_nr +#endif + stc k_current, r11 #error mov.l @(tsk_ptrace,r11), r10 ! Is current PTRACE_SYSCALL'd? #error mov #PT_TRACESYS, r11 @@ -421,6 +435,14 @@ ! In case of trace syscall_ret_trace: mov.l r0, @(OFF_R0,r15) ! save the return value + +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + ! TODO: for i386 this code only happens when not ptrace'd + mov.l __trsex, r1 ! Call trace_real_syscall_exit() + jsr @r1 + nop +#endif + mov.l __syscall_trace, r1 mova ret_from_syscall, r0 jmp @r1 ! Call syscall_trace() which notifies superior @@ -504,6 +526,14 @@ .long syscall_ret_trace __syscall_ret: .long syscall_ret + +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +__trsen: + .long trace_real_syscall_entry +__trsex: + .long trace_real_syscall_exit +#endif + __INV_IMASK: .long 0xffffff0f ! ~(IMASK) @@ -536,6 +566,14 @@ #endif syscall_ret: mov.l r0, @(OFF_R0,r15) ! save the return value + +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + ! TODO: for i386 this code only happens when not ptrace'd + mov.l __trsex2, r1 ! Call trace_real_syscall_exit() + jsr @r1 + nop +#endif + /* fall through */ ENTRY(ret_from_syscall) @@ -563,6 +601,11 @@ #error .long do_signal __irq_stat: .long irq_stat +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +__trsex2: + .long trace_real_syscall_exit +#endif + .align 2 restore_all: diff -urN -X dontdiff linux-2.5.33/arch/sh/kernel/irq.c linux-2.5.33.ltt.lockless/arch/sh/kernel/irq.c --- linux-2.5.33/arch/sh/kernel/irq.c Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/kernel/irq.c Sun Sep 8 23:00:20 2002 @@ -30,6 +30,8 @@ #include #include +#include + #include #include #include @@ -127,6 +129,12 @@ irq_enter(cpu, irq); +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + if (irq != TIMER_IRQ) { /* avoid double-reporting the timer IRQ */ + TRACE_IRQ_ENTRY(irq, !(user_mode(regs))); + } +#endif + status = 1; /* Force the "do bottom halves" bit */ if (!(action->flags & SA_INTERRUPT)) @@ -143,6 +151,12 @@ irq_exit(cpu, irq); +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + if (irq != TIMER_IRQ) { /* avoid double-reporting the timer IRQ */ + TRACE_IRQ_EXIT(); + } +#endif + return status; } diff -urN -X dontdiff linux-2.5.33/arch/sh/kernel/process.c linux-2.5.33.ltt.lockless/arch/sh/kernel/process.c --- linux-2.5.33/arch/sh/kernel/process.c Sat Aug 31 15:05:30 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/kernel/process.c Sun Sep 8 23:00:20 2002 @@ -16,6 +16,8 @@ #include #include +#include + #include #include #include @@ -138,7 +140,16 @@ : "i" (__NR_exit), "r" (__sc3), "r" (__sc4), "r" (__sc5), "r" (__sc8), "r" (__sc9) : "memory", "t"); - return __sc0; +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + { + volatile unsigned long retval = __sc0; + if (retval > 0) + TRACE_PROCESS(TRACE_EV_PROCESS_KTHREAD, retval, (int) fn); + return retval; + } +#else + return __sc0; +#endif } /* diff -urN -X dontdiff linux-2.5.33/arch/sh/kernel/sys_sh.c linux-2.5.33.ltt.lockless/arch/sh/kernel/sys_sh.c --- linux-2.5.33/arch/sh/kernel/sys_sh.c Sat Aug 31 15:05:23 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/kernel/sys_sh.c Sun Sep 8 23:00:20 2002 @@ -21,6 +21,8 @@ #include #include +#include + #include #include @@ -139,6 +141,8 @@ version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + TRACE_IPC(TRACE_EV_IPC_CALL, call, first); + if (call <= SEMCTL) switch (call) { case SEMOP: diff -urN -X dontdiff linux-2.5.33/arch/sh/kernel/traps.c linux-2.5.33.ltt.lockless/arch/sh/kernel/traps.c --- linux-2.5.33/arch/sh/kernel/traps.c Sat Aug 31 15:04:47 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/kernel/traps.c Sun Sep 8 23:00:20 2002 @@ -25,6 +25,8 @@ #include #include +#include + #include #include #include @@ -42,7 +44,9 @@ sti(); \ tsk->thread.error_code = error_code; \ tsk->thread.trap_no = trapnr; \ + TRACE_TRAP_ENTRY(trapnr, regs.pc); \ force_sig(signr, tsk); \ + TRACE_TRAP_EXIT(); \ die_if_no_fixup(str,®s,error_code); \ } @@ -464,6 +468,8 @@ asm volatile("stc r2_bank,%0": "=r" (error_code)); + TRACE_TRAP_ENTRY(error_code >> 5, regs->pc); + oldfs = get_fs(); if (user_mode(regs)) { @@ -487,8 +493,10 @@ tmp = handle_unaligned_access(instruction, regs); set_fs(oldfs); - if (tmp==0) - return; /* sorted */ + if (tmp==0) { + TRACE_TRAP_EXIT(); + return; /* sorted */ + } uspace_segv: printk(KERN_NOTICE "Killing process \"%s\" due to unaligned access\n", current->comm); @@ -509,6 +517,7 @@ handle_unaligned_access(instruction, regs); set_fs(oldfs); } + TRACE_TRAP_EXIT(); } DO_ERROR(12, SIGILL, "reserved instruction", reserved_inst, current) @@ -586,3 +595,78 @@ { printk("Backtrace not yet implemented for SH.\n"); } + +/* Trace related code */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; + trace_syscall_entry trace_syscall_event; + + /* Set the syscall ID */ + trace_syscall_event.syscall_id = (uint8_t) regs->regs[REG_REG0+3]; + + /* Set the address in any case */ + trace_syscall_event.address = regs->pc; + + /* Are we in the kernel (This is a kernel thread)? */ + if(!user_mode(regs)) + /* Don't go digining anywhere */ + goto trace_syscall_end; + + /* Get the trace configuration */ + if(trace_get_config(&use_depth, &use_bounds, &seek_depth, + (void*)&lower_bound, (void*)&upper_bound) < 0) + goto trace_syscall_end; + + /* Do we have to search for an eip address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack (bottom address since stacks grow downward) */ + stack = (unsigned long*) regs->regs[REG_REG15]; + + /* Keep on going until we reach the end of the process' stack limit (wherever it may be) */ + while(!get_user(addr, stack)) + { + /* Does this LOOK LIKE an address in the program */ + /* TODO: does this work with shared libraries?? - Greg Banks */ + if((addr > current->mm->start_code) &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) + && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + stack++; + } + } + +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} + +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ + diff -urN -X dontdiff linux-2.5.33/arch/sh/mm/fault.c linux-2.5.33.ltt.lockless/arch/sh/mm/fault.c --- linux-2.5.33/arch/sh/mm/fault.c Sat Aug 31 15:04:54 2002 +++ linux-2.5.33.ltt.lockless/arch/sh/mm/fault.c Sun Sep 8 23:00:20 2002 @@ -20,6 +20,8 @@ #include #include +#include + #include #include #include @@ -98,6 +100,14 @@ tsk = current; mm = tsk->mm; +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + { + unsigned long trapnr; + asm volatile("stc r2_bank,%0": "=r" (trapnr)); + TRACE_TRAP_ENTRY(trapnr >> 5, regs->pc); /* trap 4,5 or 6 */ + } +#endif + /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -149,6 +159,7 @@ } up_read(&mm->mmap_sem); + TRACE_TRAP_EXIT(); return; /* @@ -162,6 +173,7 @@ tsk->thread.address = address; tsk->thread.error_code = writeaccess; force_sig(SIGSEGV, tsk); + TRACE_TRAP_EXIT(); return; } @@ -170,6 +182,7 @@ fixup = search_exception_table(regs->pc); if (fixup != 0) { regs->pc = fixup; + TRACE_TRAP_EXIT(); return; } @@ -231,6 +244,8 @@ /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; + + TRACE_TRAP_EXIT(); } /* diff -urN -X dontdiff linux-2.5.33/drivers/Makefile linux-2.5.33.ltt.lockless/drivers/Makefile --- linux-2.5.33/drivers/Makefile Sat Aug 31 15:05:31 2002 +++ linux-2.5.33.ltt.lockless/drivers/Makefile Sun Sep 8 23:00:20 2002 @@ -41,5 +41,6 @@ obj-$(CONFIG_BLUEZ) += bluetooth/ obj-$(CONFIG_HOTPLUG_PCI) += hotplug/ obj-$(CONFIG_ISDN_BOOL) += isdn/ +obj-$(CONFIG_TRACE) += trace/ include $(TOPDIR)/Rules.make diff -urN -X dontdiff linux-2.5.33/drivers/s390/cio/cio.c linux-2.5.33.ltt.lockless/drivers/s390/cio/cio.c --- linux-2.5.33/drivers/s390/cio/cio.c Sat Aug 31 15:05:35 2002 +++ linux-2.5.33.ltt.lockless/drivers/s390/cio/cio.c Sun Sep 8 23:00:20 2002 @@ -18,6 +18,8 @@ #include #include +#include + #include #include #include @@ -1002,9 +1004,11 @@ } irq_enter (cpu, irq); + TRACE_IRQ_ENTRY(irq, !(((regs).psw.mask&PSW_PROBLEM_STATE) != 0)); s390irq_spin_lock (irq); s390_process_IRQ (irq); s390irq_spin_unlock (irq); + TRACE_IRQ_EXIT(); irq_exit (cpu, irq); } diff -urN -X dontdiff linux-2.5.33/drivers/s390/s390mach.c linux-2.5.33.ltt.lockless/drivers/s390/s390mach.c --- linux-2.5.33/drivers/s390/s390mach.c Sat Aug 31 15:05:36 2002 +++ linux-2.5.33.ltt.lockless/drivers/s390/s390mach.c Sun Sep 8 23:00:20 2002 @@ -5,12 +5,14 @@ * S390 version * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Ingo Adlung (adlung@de.ibm.com) + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation */ #include #include #include #include +#include #ifdef CONFIG_SMP #include #endif @@ -152,10 +154,20 @@ { int crw_count; mcic_t mcic; + trapid_t ltt_interruption_code; + uint32_t ltt_old_psw; DBG(KERN_INFO "s390_do_machine_check : starting ...\n"); memcpy(&mcic, &S390_lowcore.mcck_interruption_code, sizeof (__u64)); + memcpy( <t_interruption_code, + &S390_lowcore.mcck_interruption_code, + sizeof(__u64)); + memcpy( <t_old_psw, + &S390_lowcore.mcck_old_psw, + sizeof(uint32_t)); + ltt_old_psw &= PSW_ADDR_MASK; + TRACE_TRAP_ENTRY(ltt_interruption_code,ltt_old_psw); if (mcic.mcc.mcd.sd) /* system damage */ s390_handle_damage("received system damage machine check\n"); diff -urN -X dontdiff linux-2.5.33/drivers/trace/Config.help linux-2.5.33.ltt.lockless/drivers/trace/Config.help --- linux-2.5.33/drivers/trace/Config.help Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/drivers/trace/Config.help Sun Sep 8 23:00:20 2002 @@ -0,0 +1,36 @@ +Kernel events tracing support +CONFIG_TRACE + It is possible for the kernel to log important events to a tracing + driver. Doing so, enables the use of the generated traces in order + to reconstruct the dynamic behavior of the kernel, and hence the + whole system. + + The tracing process contains 4 parts : + 1) The logging of events by key parts of the kernel. + 2) The trace driver that keeps the events in a data buffer. + 3) A trace daemon that opens the trace driver and is notified + every time there is a certain quantity of data to read + from the trace driver (using SIG_IO). + 4) A trace event data decoder that reads the accumulated data + and formats it in a human-readable format. + + If you say Y or M here, the first part of the tracing process will + always take place. That is, critical parts of the kernel will call + upon the kernel tracing function. The data generated doesn't go + any further until a trace driver registers himself as such with the + kernel. Therefore, if you answer Y, then the driver will be part of + the kernel and the events will always proceed onto the driver and + if you say M, then the events will only proceed onto the driver when + it's module is loaded. Note that event's aren't logged in the driver + until the profiling daemon opens the device, configures it and + issues the "start" command through ioctl(). + + The impact of a fully functionnal system (kernel event logging + + driver event copying + active trace daemon) is of 2.5% for core events. + This means that for a task that took 100 seconds on a normal system, it + will take 102.5 seconds on a traced system. This is very low compared + to other profiling or tracing methods. + + For more information on kernel tracing, the trace daemon or the event + decoder, please check the following address : + http://www.opersys.com/LTT diff -urN -X dontdiff linux-2.5.33/drivers/trace/Config.in linux-2.5.33.ltt.lockless/drivers/trace/Config.in --- linux-2.5.33/drivers/trace/Config.in Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/drivers/trace/Config.in Sun Sep 8 23:00:20 2002 @@ -0,0 +1,4 @@ +mainmenu_option next_comment +comment 'Kernel tracing' +tristate 'Kernel events tracing support' CONFIG_TRACE +endmenu diff -urN -X dontdiff linux-2.5.33/drivers/trace/Makefile linux-2.5.33.ltt.lockless/drivers/trace/Makefile --- linux-2.5.33/drivers/trace/Makefile Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/drivers/trace/Makefile Sun Sep 8 23:11:34 2002 @@ -0,0 +1,21 @@ +# +# Makefile for the kernel tracing drivers. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now inherited from the +# parent makes.. +# + +O_TARGET := built-in.o + +# for tracing kernel hook +export-objs := tracer.o + +# Is it loaded as a module or as part of the kernel +obj-$(CONFIG_TRACE) = tracer.o + +include $(TOPDIR)/Rules.make + diff -urN -X dontdiff linux-2.5.33/drivers/trace/tracer.c linux-2.5.33.ltt.lockless/drivers/trace/tracer.c --- linux-2.5.33/drivers/trace/tracer.c Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/drivers/trace/tracer.c Sun Sep 8 23:22:21 2002 @@ -0,0 +1,2199 @@ +/***************************************************************** + * File : tracer.c + * Description : + * Contains the code for the kernel tracing driver (tracer + * for short). + * Author : + * Karim Yaghmour (karim@opersys.com) + * Date : + * 03/12/01, Added user event support. + * 05/01/01, Modified PPC bit manipulation functions for + * x86 compatibility. (andy_lowe@mvista.com) + * 15/11/00, Finally fixed memory allocation and remapping + * method. Now using BTTV-driver-inspired code. + * 13/03/00, Modified tracer so that the daemon mmaps the + * tracer's buffers in it's address space rather + * than use "read". + * 26/01/00, Added support for standardized buffer sizes and + * extensibility of events. + * 01/10/99, Modified tracer in order to used double-buffering. + * 28/09/99, Adding tracer configuration support. + * 09/09/99, Chaging the format of an event record in order to + * reduce the size of the traces. + * 04/03/99, Initial typing. + * Note : + * The sizes of the variables used to store the details of an + * event are planned for a system who gets at least one clock + * tick every 10milli-seconds. There has to be at least one + * event every 2^32-1 microseconds, otherwise the size of the + * variable holding the time doesn't work anymore. + *****************************************************************/ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "tracer.h" + +/* Module information */ +MODULE_AUTHOR("Karim Yaghmour (karim@opersys.com)"); +MODULE_DESCRIPTION("Linux Trace Toolkit (LTT) kernel tracing driver"); +MODULE_LICENSE("GPL"); + +/* Driver */ +static int sMajorNumber; /* Major number of the tracer */ +static int sOpenCount; /* Number of times device is open */ +/* Locking */ +static int sTracLock; /* Tracer lock used to lock primary buffer */ +static spinlock_t sSpinLock; /* Spinlock in order to lock kernel */ +/* Daemon */ +static int sSignalSent; /* A signal has been sent to the daemon */ +static struct task_struct* sDaemonTaskStruct; /* Task structure of the tracer daemon */ +/* Tracer configuration */ +static int sTracerStarted; /* Is the tracer started */ +static trace_event_mask sTracedEvents; /* Bit-field of events being traced */ +static trace_event_mask sLogEventDetailsMask; /* Log the details of the events mask */ +static int sLogCPUID; /* Log the CPUID associated with each event */ +static int sUseSyscallEIPBounds; /* Use adress bounds to fetch the EIP where call is made */ +static int sLowerEIPBoundSet; /* The lower bound EIP has been set */ +static int sUpperEIPBoundSet; /* The upper bound EIP has been set */ +static void* sLowerEIPBound; /* The lower bound EIP */ +static void* sUpperEIPBound; /* The upper bound EIP */ +static int sTracingPID; /* Tracing only the events for one pid */ +static int sTracingPGRP; /* Tracing only the events for one process group */ +static int sTracingGID; /* Tracing only the events for one gid */ +static int sTracingUID; /* Tracing only the events for one uid */ +static pid_t sTracedPID; /* PID being traced */ +static pid_t sTracedPGRP; /* Process group being traced */ +static gid_t sTracedGID; /* GID being traced */ +static uid_t sTracedUID; /* UID being traced */ +static int sSyscallEIPDepthSet; /* The call depth at which to fetch EIP has been set */ +static int sSyscallEIPDepth; /* The call depth at which to fetch the EIP */ +/* Event data buffers */ +static int sBufReadComplete; /* Number of buffers completely filled */ +static int sSizeReadIncomplete; /* Quantity of data read from incomplete buffers */ +static int sEventsLost; /* Number of events lost because of lack of buffer space */ +static u32 sBufSize; /* Buffer sizes */ +static u32 sAllocSize; /* Size of buffers allocated */ +static u32 sBufferID; /* Unique buffer ID */ +static char* sTracBuf = NULL; /* Trace buffer */ +static char* sWritBuf = NULL; /* Buffer used for writting */ +static char* sReadBuf = NULL; /* Buffer used for reading */ +static char* sWritBufEnd; /* End of write buffer */ +static char* sReadBufEnd; /* End of read buffer */ +static char* sWritPos; /* Current position for writting */ +static char* sReadLimit; /* Limit at which read should stop */ +static char* sWritLimit; /* Limit at which write should stop */ +static int sUseLocking; /* Holds command from daemon */ +static u32 sBufnoBits; /* Holds command from daemon */ +static u32 sBufOffsetBits; /* Holds command from daemon */ +static int sBuffersFull; /* All-buffers-full boolean */ +static u32 sLastEventIndex; /* For full-buffers state */ +static struct timeval sLastEventTimeStamp; /* For full-buffers state */ + +/* Time */ +static struct timeval sBufferStartTime; /* The time at which the buffer was started */ + +/* Large data components allocated at load time */ +static char *sUserEventData = NULL; /* The data associated with a user event */ + +/* The size of the structures used to describe the events */ +static int sEventStructSize[TRACE_EV_MAX + 1] = +{ + sizeof(trace_start) /* TRACE_START */ , + sizeof(trace_syscall_entry) /* TRACE_SYSCALL_ENTRY */ , + 0 /* TRACE_SYSCALL_EXIT */ , + sizeof(trace_trap_entry) /* TRACE_TRAP_ENTRY */ , + 0 /* TRACE_TRAP_EXIT */ , + sizeof(trace_irq_entry) /* TRACE_IRQ_ENTRY */ , + 0 /* TRACE_IRQ_EXIT */ , + sizeof(trace_schedchange) /* TRACE_SCHEDCHANGE */ , + 0 /* TRACE_KERNEL_TIMER */ , + sizeof(trace_soft_irq) /* TRACE_SOFT_IRQ */ , + sizeof(trace_process) /* TRACE_PROCESS */ , + sizeof(trace_file_system) /* TRACE_FILE_SYSTEM */ , + sizeof(trace_timer) /* TRACE_TIMER */ , + sizeof(trace_memory) /* TRACE_MEMORY */ , + sizeof(trace_socket) /* TRACE_SOCKET */ , + sizeof(trace_ipc) /* TRACE_IPC */ , + sizeof(trace_network) /* TRACE_NETWORK */ , + sizeof(trace_buffer_start) /* TRACE_BUFFER_START */ , + 0 /* TRACE_BUFFER_END */ , + sizeof(trace_new_event) /* TRACE_NEW_EVENT */ , + sizeof(trace_custom) /* TRACE_CUSTOM */ , + sizeof(trace_change_mask) /* TRACE_CHANGE_MASK */ +}; + +/* The file operations available for the tracer */ +static struct file_operations sTracerFileOps = +{ + owner: THIS_MODULE, + ioctl: tracer_ioctl, + mmap: tracer_mmap, + open: tracer_open, + release: tracer_release, + fsync: tracer_fsync, +}; + +/* The global per-buffer control data structure, shared between the tracing + driver and the trace daemon via ioctl. */ +static struct buffer_control sBufferControl; + +/* Space reserved for TRACE_EV_BUFFER_START */ +static u32 sStartReserve = TRACER_FIRST_EVENT_SIZE; +/* Space reserved for TRACE_EV_BUFFER_END event + sizeof lost word, which + though the sizeof lost word isn't necessarily contiguous with rest of + event (it's always at the end of the buffer) is included here for code + clarity. */ +static u32 sEndReserve = TRACER_LAST_EVENT_SIZE; + +/************************************************************************************************************/ +/************************************** Code inspired from BTTV driver **************************************/ +/************************************************************************************************************/ +#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE /* This inspired by rtai/shmem */ + +/* Here we want the physical address of the memory. + * This is used when initializing the contents of the + * area and marking the pages as reserved. + */ +static inline unsigned long kvirt_to_pa(unsigned long adr) +{ + unsigned long kva, ret; + + kva = (unsigned long) page_address(vmalloc_to_page((void *) adr)); + kva |= adr & (PAGE_SIZE - 1); /* restore the offset */ + ret = __pa(kva); + return ret; +} + +static void *rvmalloc(unsigned long size) +{ + void *mem; + unsigned long adr; + + mem = vmalloc_32(size); + if (!mem) + return NULL; + + memset(mem, 0, size); /* Clear the ram out, no junk to the user */ + adr = (unsigned long) mem; + while (size > 0) { + mem_map_reserve(vmalloc_to_page((void *) adr)); + adr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + return mem; +} + +static void rvfree(void *mem, unsigned long size) +{ + unsigned long adr; + + if (!mem) + return; + + adr = (unsigned long) mem; + while ((long) size > 0) { + mem_map_unreserve(vmalloc_to_page((void *) adr)); + adr += PAGE_SIZE; + size -= PAGE_SIZE; + } + vfree(mem); +} + +static int tracer_mmap_region(struct vm_area_struct *vma, + const char *adr, + const char *start_pos, + unsigned long size) +{ + unsigned long start = (unsigned long) adr; + unsigned long page, pos; + + pos = (unsigned long) start_pos; + while (size > 0) { + page = kvirt_to_pa(pos); + if (remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) + return -EAGAIN; + start += PAGE_SIZE; + pos += PAGE_SIZE; + size -= PAGE_SIZE; + } + return 0; +} + +/************************************************************************************************************/ +/************************************************************************************************************/ +/************************************************************************************************************/ + +/************************************************************** + * Macro : tracer_write_to_buffer() + * Description : + * Writes data to the destination buffer and updates the + * begining the buffer write position. + **************************************************************/ +#define tracer_write_to_buffer(DEST, SRC, SIZE) \ +do\ +{\ + memcpy(DEST, SRC, SIZE);\ + DEST += SIZE;\ +} while(0); + +/*** Lockless scheme functions ***/ + +/* Initialize buffer control structure for new tracing run. Sanity of param + values should be checked by caller. i.e. bufno_bits and offset_bits must + reflect sane buffer sizes/numbers. */ +static void init_buffer_control(struct buffer_control * pmBC, + int pmUseLockless, + u8 pmBufnoBits, + u8 pmOffsetBits) +{ + unsigned i; + + if((pmBC->using_lockless = pmUseLockless) == TRUE) { + pmBC->index = sStartReserve; + pmBC->bufno_bits = pmBufnoBits; + pmBC->n_buffers = TRACE_MAX_BUFFER_NUMBER(pmBufnoBits); + pmBC->offset_bits = pmOffsetBits; + pmBC->offset_mask = TRACE_BUFFER_OFFSET_MASK(pmOffsetBits); + pmBC->index_mask = (1UL << (pmBufnoBits + pmOffsetBits)) - 1; + + pmBC->buffers_produced = pmBC->buffers_consumed = 0; + + /* When a new buffer is switched to, TRACE_BUFFER_SIZE is + subtracted from its fill_count in order to initialize it + to the empty state. The reason it's done this way is + because an intervening event may have already been written + to the buffer while we were in the process of switching and + thus blindly initializing to 0 would erase that event. + The first buffer is initialized to 0 and the others are + initialized to TRACE_BUFFER_SIZE because the very first + buffer we ever see won't be initialized in that way by + the switching code and since there's never been an event, + we know it should be 0 and that it must be explicitly + initialized that way before logging begins. sStartReserve + is is factored into the end-of-buffer processing, so isn't + added to the fill counts here, except for the first. */ + atomic_set(&pmBC->fill_count[0], (int)sStartReserve); + for(i = 1; i < TRACER_MAX_BUFFERS; i++) { + atomic_set(&pmBC->fill_count[i], (int)TRACE_BUFFER_SIZE(pmOffsetBits)); + } + + /* All buffers are empty at this point */ + sBuffersFull = FALSE; + } +} + +/* These inline atomic functions wrap the linux versions in order to + implement the interface we want as well as to ensure memory barriers. */ + +/* If *ptr is still what we think it is, atomically assign nval to it and + return a boolean indicating TRUE if the new value was stored, FALSE + otherwise. + + Pseudocode for this operation: + + if(*ptr == oval) { + *ptr = nval; + return TRUE; + } else { + return FALSE; + } + + We need barriers before and after because despite what the locking + guide states, all atomic operations in Linux apparently don't act + as memory barriers. +*/ +inline int compare_and_store_volatile(volatile u32 *ptr, + u32 oval, + u32 nval) +{ + u32 prev; + + barrier(); + prev = cmpxchg(ptr, oval, nval); + barrier(); + + return (prev == oval); +} + +/* Atomically set the value in ptr to nval, with memory barriers */ +inline void atomic_set_volatile(atomic_t *ptr, + u32 nval) +{ + barrier(); + atomic_set(ptr, (int)nval); + barrier(); +} + +/* Atomically add val to the value at ptr, with memory barriers */ +inline void atomic_add_volatile(atomic_t *ptr, u32 val) +{ + barrier(); + atomic_add((int)val, ptr); + barrier(); +} + +/* Atomically subtract val from the value at ptr, with memory barriers */ +inline void atomic_sub_volatile(atomic_t *ptr, s32 val) +{ + barrier(); + atomic_sub((int)val, ptr); + barrier(); +} + +/* Atomically add len to the fill_count of the buffer containing index. */ +static inline void trace_commit(u32 index, u32 len) +{ + u32 bufno = TRACE_BUFFER_NUMBER_GET(index, sBufferControl.offset_bits); + atomic_add_volatile(&sBufferControl.fill_count[bufno], len); +} + +/* Write start event at the beginning of the buffer containing new_index. */ +static inline void write_start_buffer_event(u32 pmIndex, struct timeval pmTime) +{ + trace_buffer_start lStartBufferEvent; /* Start of new buffer event */ + u8 lEventID; /* Event ID of last event */ + uint16_t lDataSize; /* Size of tracing data */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + char* lWritPos; /* Current position for writing */ + + /* Clear the offset bits of index to get the beginning of buffer */ + lWritPos = sTracBuf + TRACE_BUFFER_OFFSET_CLEAR(pmIndex, + sBufferControl.offset_mask); + + /* Increment buffer ID */ + sBufferID++; + + /* Write the start of buffer event */ + lStartBufferEvent.ID = sBufferID; + lStartBufferEvent.Time = pmTime; + + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_START; + tracer_write_to_buffer(lWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(lWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Write event structure */ + tracer_write_to_buffer(lWritPos, + &lStartBufferEvent, + sizeof(lStartBufferEvent)); + + /* Compute the data size */ + lDataSize = sizeof(lEventID) + + sizeof(lTimeDelta) + + sizeof(lStartBufferEvent) + + sizeof(lDataSize); + + /* Write the length of the event description */ + tracer_write_to_buffer(lWritPos, + &lDataSize, + sizeof(lDataSize)); +} + +/* Write end buffer event at index. */ +static inline void write_end_buffer_event(u32 pmIndex, struct timeval pmTime) +{ + u8 lEventID; /* Event ID of last event */ + u8 lCPUID; /* CPUID of currently runing process */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + char* lWritPos; /* Current position for writing */ + + lWritPos = sTracBuf + pmIndex; + + /* Write the CPUID to the tracing buffer, if required */ + if (sLogCPUID == TRUE) { + lCPUID = smp_processor_id(); + tracer_write_to_buffer(lWritPos, + &lCPUID, + sizeof(lCPUID)); + } + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_END; + tracer_write_to_buffer(lWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(lWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); +} + +/* Write the lost size to end of buffer containing index. */ +static inline void write_lost_size(u32 pmIndex, u32 pmSizeLost) +{ + char* lWritBufEnd; /* End of buffer */ + + /* Get end of buffer by clearing offset and adding buffer size */ + lWritBufEnd = sTracBuf + TRACE_BUFFER_OFFSET_CLEAR(pmIndex, sBufferControl.offset_mask) + TRACE_BUFFER_SIZE(sBufferControl.offset_bits); + + /* Write size lost at the end of the buffer */ + *((u32 *) (lWritBufEnd - sizeof(pmSizeLost))) = pmSizeLost; +} + +/* This function must be called from within a lock, because it increments + buffers_produced */ +static inline void finalize_buffer(u32 pmEndIndex, u32 pmSizeLost, struct timeval *pmTimestamp) +{ + /* Write end buffer event as last event in old buffer. */ + write_end_buffer_event(pmEndIndex, *pmTimestamp); + /* In any buffer switch, we need to write out the lost size, + which can be 0. */ + write_lost_size(pmEndIndex, pmSizeLost); + /* Add the size lost and end event size to fill_count so that + the old buffer won't be seen as incomplete. */ + trace_commit(pmEndIndex, pmSizeLost); + /* Every finalized buffer means a produced buffer */ + sBufferControl.buffers_produced++; +} + +/* Called when tracing is stopped, to finish processing last buffer. Note + that there will always be space for the last event in the current buffer. */ +static inline void finalize_lockless_trace(void) +{ + u32 lEventsEnd; /* Index of end of last event */ + u32 lSizeLost; /* Bytes after end of last event */ + unsigned long int lFlags; /* CPU flags for lock */ + + /* Find index of end of last event */ + lEventsEnd = TRACE_BUFFER_OFFSET_GET(sBufferControl.index, sBufferControl.offset_mask); + /* Size lost in buffer is the unused space after end of last event + and end of buffer. */ + lSizeLost = TRACE_BUFFER_SIZE(sBufferControl.offset_bits) - lEventsEnd; + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + /* Write end event etc. and increment buffers_produced. The + time used here is what the locking version uses as well. */ + finalize_buffer(sBufferControl.index & sBufferControl.index_mask, lSizeLost, &sBufferStartTime); + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); +} + +/* Determine whether the event should be discarded. The return value contains + the result flags. + + Return value is an ORed combination of: + + LTT_EVENT_DISCARD_NONE - event should not be discarded + LTT_BUFFER_SWITCH - buffer switch occurred + LTT_EVENT_DISCARD - event should be discarded (all buffers are full) + LTT_EVENT_TOO_LONG - event won't fit into even an empty buffer +*/ +static inline int discard_check(u32 pmOldIndex, + u32 pmLen, + struct timeval *pmTimestamp) +{ + u32 lBuffersReady; + u32 lOffsetMask = sBufferControl.offset_mask; + u8 lOffsetBits = sBufferControl.offset_bits; + u32 lIndexMask = sBufferControl.index_mask; + u32 lSizeLost; + unsigned long int lFlags; /* CPU flags for lock */ + + /* Check whether the event is larger than a buffer */ + if(pmLen >= TRACE_BUFFER_SIZE(sBufferControl.offset_bits)) { + return LTT_EVENT_DISCARD | LTT_EVENT_TOO_LONG; + } + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + /* We're already overrun, nothing left to do */ + if(sBuffersFull == TRUE) { + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + return LTT_EVENT_DISCARD; + } + + lBuffersReady = sBufferControl.buffers_produced - sBufferControl.buffers_consumed; + /* If this happens, we've been pushed to the edge of the last + available buffer which means we need to finalize it and increment + buffers_produced. However, we don't want to allow + sBufferControl.index to be actually pushed to full or beyond, + otherwise we'd just be wrapping around and allowing subsequent + events to overwrite good buffers. It is true that there may not + be enough space for this event, but there could be space for + subsequent smaller event(s). It doesn't matter if they write + themselves, because here we say that anything after the old_index + passed in to this function is lost, even if other events have or + will reserve space in this last buffer. Nor can any other event + reserve space in buffers following this one, until at least one + buffer is consumed by the daemon. */ + if(lBuffersReady == sBufferControl.n_buffers - 1) { + /* We set this flag so we only do this once per overrun */ + sBuffersFull = TRUE; + /* Get the time of the event */ + do_gettimeofday(pmTimestamp); + /* Size lost is everything after old_index */ + lSizeLost = TRACE_BUFFER_SIZE(lOffsetBits) - TRACE_BUFFER_OFFSET_GET(pmOldIndex, lOffsetMask); + /* Write end event and lost size. This increases buffer_count + by the lost size, which is important later when we add the + deferred size. */ + finalize_buffer(pmOldIndex & lIndexMask, lSizeLost, pmTimestamp); + /* We need to add the lost size to old index, but we can't + do it now, or we'd roll index over and allow new events, + so we defer it until a buffer is free. Note however that + buffer_count does get incremented by lost size, which is + important later when start logging again. */ + sLastEventIndex = pmOldIndex; + sLastEventTimeStamp = *pmTimestamp; + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* We lose this event */ + return LTT_BUFFER_SWITCH | LTT_EVENT_DISCARD; + } + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* Nothing untoward happened */ + return LTT_EVENT_DISCARD_NONE; +} + +/* Called by trace_reserve if the length of the event being logged would + most likely cause a 'buffer switch'. The value of the variable pointed + to by 'index' will contain the index actually reserved by this function. + The boolean return value indicates whether there actually was a buffer + switch (not inevitable in certain cases). + + Return value is an ORed combination of: + + LTT_BUFFER_SWITCH_NONE - no buffer switch occurred + LTT_EVENT_DISCARD_NONE - event should not be discarded + LTT_BUFFER_SWITCH - buffer switch occurred + LTT_EVENT_DISCARD - event should be discarded (all buffers are full) + LTT_EVENT_TOO_LONG - event won't fit into even an empty buffer +*/ +static inline int trace_reserve_slow(u32 pmOldIndex, /* needed for overruns */ + u32 pmLen, + u32 *pmIndex, + struct timeval *pmTimestamp) +{ + u32 lNewIndex, lOffset, lNewBufno; + unsigned long int lFlags; /* CPU flags for lock */ + u32 lOffsetMask = sBufferControl.offset_mask; + u8 lOffsetBits = sBufferControl.offset_bits; + u32 lIndexMask = sBufferControl.index_mask; + u32 lSizeLost = sEndReserve; /* size lost always includes end event */ + int lDiscardEvent; + int lBufferSwitched = LTT_BUFFER_SWITCH_NONE; + + /* We don't get here unless the event might cause a buffer switch */ + + /* First check whether conditions exist do discard the event */ + lDiscardEvent = discard_check(pmOldIndex, pmLen, pmTimestamp); + if(lDiscardEvent != LTT_EVENT_DISCARD_NONE) + return lDiscardEvent; + + /* If we're here, we still have free buffers to reserve from */ + + /* Do this until we reserve a spot for the event */ + do { + /* Yeah, we're re-using a param variable, is that bad form? */ + pmOldIndex = sBufferControl.index; + /* We're here because the event + ending reserve space would + overflow or exactly fill old buffer. Calculate new index + again. */ + lNewIndex = pmOldIndex + pmLen; + /* We only care about the offset part of the new index */ + lOffset = TRACE_BUFFER_OFFSET_GET(lNewIndex + sEndReserve, lOffsetMask); + /* If we would actually overflow and not exactly fill the old + buffer, we reserve the first slot (after adding a buffer + start event) in the new one. */ + if((lOffset < pmLen) && (lOffset > 0)) { + /* This is an overflow, not an exact fit. The + reserved index is just after the space reserved for + the start event in the new buffer. */ + *pmIndex = TRACE_BUFFER_OFFSET_CLEAR(lNewIndex + sEndReserve, lOffsetMask) + sStartReserve; + /* Now the next free space is at the reserved index + plus the length of this event. */ + lNewIndex = *pmIndex + pmLen; + } else if (lOffset < pmLen) { + /* We'll exactly fill the old buffer, so our reserved + index is still in the old buffer and our new index + is in the new one + sStartReserve */ + *pmIndex = pmOldIndex; + lNewIndex = TRACE_BUFFER_OFFSET_CLEAR(lNewIndex + sEndReserve, lOffsetMask) + sStartReserve; + } else { + /* another event has actually pushed us into a new + buffer since we were called. */ + *pmIndex = pmOldIndex; + } + + /* Get the time of the event */ + do_gettimeofday(pmTimestamp); + } while (!compare_and_store_volatile(&sBufferControl.index, + pmOldIndex, lNewIndex)); + + /* Once we're successful in saving a new_index as the authoritative + new global buffer control index, finish the buffer switch + processing. */ + + /* Mask off the high bits outside of our reserved index */ + *pmIndex &= lIndexMask; + + /* At this point, our indices are set in stone, so we can safely + write our start and end events and lost count to our buffers. + The first test here could fail if between the time reserve_slow + was called and we got a reserved slot, we slept and someone else + did the buffer switch already. */ + if(lOffset < pmLen) { /* Event caused a buffer switch. */ + if(lOffset > 0) { /* We didn't exactly fill the old buffer */ + /* Set the size lost value in the old buffer. That + value is len+sEndReserve-offset-sEndReserve, + i.e. sEndReserve cancels itself out. */ + lSizeLost += pmLen - lOffset; + } else { /* We exactly filled the old buffer */ + /* Since we exactly filled the old buffer, the index + we write the end event to is after the space + reserved for this event. */ + pmOldIndex += pmLen; + } + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + /* Write end event etc. and increment buffers_produced. */ + finalize_buffer(pmOldIndex & lIndexMask, lSizeLost, pmTimestamp); + /* If we're here, we had a normal buffer switch and need to + update the start buffer time before writing the event. + The start buffer time is the same as the event time for the + event reserved, and lTimeDelta of 0 but that also appears + to be the case in the locking version as well. */ + sBufferStartTime = *pmTimestamp; + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* new_index is always valid here, since it's set correctly + if offset < len + sEndReserve, and we don't get here + unless that's true. The issue would be that if we didn't + actually switch buffers, new_index would be too large by + sEndReserve bytes. */ + write_start_buffer_event(lNewIndex & lIndexMask, *pmTimestamp); + /* We initialize the new buffer by subtracting + TRACE_BUFFER_SIZE rather than directly initializing to + sStartReserve in case events have been already been added + to the new buffer under us. We subtract space for the start + buffer event from buffer size to leave room for the start + buffer event we just wrote. */ + lNewBufno = TRACE_BUFFER_NUMBER_GET(lNewIndex & lIndexMask, lOffsetBits); + atomic_sub_volatile(&sBufferControl.fill_count[lNewBufno], TRACE_BUFFER_SIZE(lOffsetBits) - sStartReserve); + /* We need to check whether fill_count is less than the + sStartReserve. If this test is true, it means that + subtracting the buffer size underflowed fill_count i.e. + fill_count represents an incomplete buffer. Any any case, + we're completely fubared and don't have any choice but to + start the new buffer out fresh. */ + if(atomic_read(&sBufferControl.fill_count[lNewBufno]) < sStartReserve) { + atomic_set_volatile(&sBufferControl.fill_count[lNewBufno], sStartReserve); + } + + /* If we're here, there must have been a buffer switch */ + lBufferSwitched = LTT_BUFFER_SWITCH; + } + + return lBufferSwitched; +} + +/* Reserve a space in a buffer for len bytes. The value of the variable + pointed to by 'index' will contain the index actually reserved by this + function. + + Return value is an ORed combination of: + + LTT_BUFFER_SWITCH_NONE - no buffer switch occurred + LTT_EVENT_DISCARD_NONE - event should not be discarded + LTT_BUFFER_SWITCH - buffer switch occurred + LTT_EVENT_DISCARD - event should be discarded (all buffers are full) + LTT_EVENT_TOO_LONG - event won't fit into even an empty buffer +*/ +static inline int trace_reserve(u32 pmLen, + u32 *pmIndex, + struct timeval *pmTimestamp) +{ + u32 lOldIndex, lNewIndex, lOffset; + u32 lOffsetMask = sBufferControl.offset_mask; + + /* Do this until we reserve a spot for the event */ + do { + lOldIndex = sBufferControl.index; + /* If adding len + sEndReserve to the old index doesn't put us + into a new buffer, this is what the new index would be. */ + lNewIndex = lOldIndex + pmLen; + lOffset = TRACE_BUFFER_OFFSET_GET(lNewIndex + sEndReserve, lOffsetMask); + /* If adding the length reserved for the end buffer event and + lost count to the new index would put us into a new buffer, + we need to do a buffer switch. If in between now and the + buffer switch another event that does fit comes in, no + problem because we check again in the slow version. In + either case, there will always be room for the end event + in the old buffer. The trick in this test is that adding + a length that would carry into the non-offset bits of the + index results in the offset portion being smaller than the + length that was added. */ + if(lOffset < pmLen) { + /* We would roll over into a new buffer, need to do + buffer switch processing. */ + return trace_reserve_slow(lOldIndex, pmLen, pmIndex, pmTimestamp); + } + /* Get the time of the event */ + do_gettimeofday(pmTimestamp); + } while (!compare_and_store_volatile(&sBufferControl.index, + lOldIndex, lNewIndex)); + + /* Once we're successful in saving a new_index as the authoritative + new global buffer control index, we can return old_index, the + successfully reserved index. */ + + /* Return the reserved index value */ + *pmIndex = lOldIndex & sBufferControl.index_mask; + + return LTT_BUFFER_SWITCH_NONE; /* No buffer switch occurred */ +} + +/* Reserve space for an event, write the event and signal the daemon if it + caused a buffer switch. */ +int lockless_write_event(u8 pmEventID, + void *pmEventStruct, + uint16_t pmDataSize, + u8 pmCPUID, + void *pmVarDataBeg, + int pmVarDataLen) +{ + u32 lReservedIndex; + struct timeval lTime; + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + struct siginfo lSigInfo; /* Signal information */ + int lReserveRC; + char* lWritPos; /* Current position for writing */ + int lRC = 0; + + /* Reserve space for the event. If the space reserved is in a new + buffer, note that fact. */ + lReserveRC = trace_reserve((u32)pmDataSize, + &lReservedIndex, &lTime); + + /* Exact lost event count isn't important to anyone, so this is OK. */ + if(lReserveRC & LTT_EVENT_DISCARD) { + sEventsLost++; + } + + /* We don't write the event, but we still need to signal */ + if((lReserveRC & LTT_BUFFER_SWITCH) && + (lReserveRC & LTT_EVENT_DISCARD)) { + lRC = -ENOMEM; + goto send_buffer_switch_signal; + } + + /* no buffer space left, discard event. */ + if((lReserveRC & LTT_EVENT_DISCARD) || + (lReserveRC & LTT_EVENT_TOO_LONG)) { + /* return value for trace() */ + return -ENOMEM; + } + + /* The position we write to in the trace memory area is simply the + beginning of trace memory plus the index we just reserved. */ + lWritPos = sTracBuf + lReservedIndex; + /* Compute the time delta between this event and the time at which + this buffer was started */ + lTimeDelta = (lTime.tv_sec - sBufferStartTime.tv_sec) * 1000000 + + (lTime.tv_usec - sBufferStartTime.tv_usec); + + /* Write the CPUID to the tracing buffer, if required */ + if ((sLogCPUID == TRUE) && (pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) + tracer_write_to_buffer(lWritPos, + &pmCPUID, + sizeof(pmCPUID)); + + /* Write event type to tracing buffer */ + tracer_write_to_buffer(lWritPos, + &pmEventID, + sizeof(pmEventID)); + + /* Write event time delta to tracing buffer */ + tracer_write_to_buffer(lWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Do we log event details */ + if (ltt_test_bit(pmEventID, &sLogEventDetailsMask)) { + /* Write event structure */ + tracer_write_to_buffer(lWritPos, + pmEventStruct, + sEventStructSize[pmEventID]); + + /* Write string if any */ + if (pmVarDataLen) + tracer_write_to_buffer(lWritPos, + pmVarDataBeg, + pmVarDataLen); + } + /* Write the length of the event description */ + tracer_write_to_buffer(lWritPos, + &pmDataSize, + sizeof(pmDataSize)); + + /* We've written the event - update the fill_count for the buffer. */ + + trace_commit(lReservedIndex, (u32)pmDataSize); + +send_buffer_switch_signal: + + /* Signal the daemon if we switched buffers */ + if(lReserveRC & LTT_BUFFER_SWITCH) + { + /* Setup signal information */ + lSigInfo.si_signo = SIGIO; + lSigInfo.si_errno = 0; + lSigInfo.si_code = SI_KERNEL; + +#if 0 + /* DEBUG */ + printk("<1> Sending SIGIO to %d \n", sDaemonTaskStruct->pid); +#endif + /* Signal the tracing daemon */ + send_sig_info(SIGIO, &lSigInfo, sDaemonTaskStruct); + } + + return lRC; +} + +/************************************************************** + * Function : trace() + * Description : Tracing function per se. + * Parameters : + * pmEventID, ID of event as defined in linux/trace.h + * pmEventStruct, struct describing the event + * Return values : + * 0, if everything went OK (event got registered) + * -ENODEV, no tracing daemon opened the driver. + * -ENOMEM, no more memory to store events. + * -EBUSY, tracer not started yet. + * Note : + * The kernel has to be locked here because trace() could + * be called from an interrupt handling routine and from + * a process service routine. + **************************************************************/ +int trace(u8 pmEventID, + void *pmEventStruct) +{ + int lVarDataLen = 0; /* Length of variable length data to be copied, if any */ + void *lVarDataBeg = NULL; /* Begining of variable length data to be copied */ + int lSendSignal = FALSE; /* Should the daemon be summoned */ + u8 lCPUID; /* CPUID of currently runing process */ + uint16_t lDataSize; /* Size of tracing data */ + struct siginfo lSigInfo; /* Signal information */ + struct timeval lTime; /* Event time */ + unsigned long int lFlags; /* CPU flags for lock */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + struct task_struct *pIncomingProcess = NULL; /* Pointer to incoming process */ + + /* Is there a tracing daemon */ + if (sDaemonTaskStruct == NULL) + return -ENODEV; + + /* Is this the exit of a process? */ + if ((pmEventID == TRACE_EV_PROCESS) && + (pmEventStruct != NULL) && + ((((trace_process *) pmEventStruct)->event_sub_id) == TRACE_EV_PROCESS_EXIT)) + trace_destroy_owners_events(current->pid); + + /* Do we trace the event */ + if ((sTracerStarted == TRUE) || (pmEventID == TRACE_EV_START) || (pmEventID == TRACE_EV_BUFFER_START)) + goto TraceEvent; + + return -EBUSY; + + TraceEvent: + + /* Are we monitoring this event */ + if (!ltt_test_bit(pmEventID, &sTracedEvents)) + return 0; + + /* Always let the start event pass, whatever the IDs */ + if ((pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) { + /* Is this a scheduling change */ + if (pmEventID == TRACE_EV_SCHEDCHANGE) { + /* Get pointer to incoming process */ + pIncomingProcess = (struct task_struct *) (((trace_schedchange *) pmEventStruct)->in); + + /* Set PID information in schedchange event */ + (((trace_schedchange *) pmEventStruct)->in) = pIncomingProcess->pid; + } + /* Are we monitoring a particular process */ + if ((sTracingPID == TRUE) && (current->pid != sTracedPID)) { + /* Record this event if it is the scheduling change bringing in the traced PID */ + if (pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->pid != sTracedPID) + return 0; + } + /* Are we monitoring a particular process group */ + if ((sTracingPGRP == TRUE) && (current->pgrp != sTracedPGRP)) { + /* Record this event if it is the scheduling change bringing in a process of the traced PGRP */ + if (pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->pgrp != sTracedPGRP) + return 0; + } + /* Are we monitoring the processes of a given group of users */ + if ((sTracingGID == TRUE) && (current->egid != sTracedGID)) { + /* Record this event if it is the scheduling change bringing in a process of the traced GID */ + if (pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->egid != sTracedGID) + return 0; + } + /* Are we monitoring the processes of a given user */ + if ((sTracingUID == TRUE) && (current->euid != sTracedUID)) { + /* Record this event if it is the scheduling change bringing in a process of the traced UID */ + if (pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->euid != sTracedUID) + return 0; + } + } + /* Compute size of tracing data */ + lDataSize = sizeof(pmEventID) + sizeof(lTimeDelta) + sizeof(lDataSize); + + /* Do we log the event details */ + if (ltt_test_bit(pmEventID, &sLogEventDetailsMask)) { + /* Update the size of the data entry */ + lDataSize += sEventStructSize[pmEventID]; + + /* Some events have variable length */ + switch (pmEventID) { + /* Is there a file name in this */ + case TRACE_EV_FILE_SYSTEM: + if ((((trace_file_system *) pmEventStruct)->event_sub_id == TRACE_EV_FILE_SYSTEM_EXEC) + || (((trace_file_system *) pmEventStruct)->event_sub_id == TRACE_EV_FILE_SYSTEM_OPEN)) { + /* Remember the string's begining and update size variables */ + lVarDataBeg = ((trace_file_system *) pmEventStruct)->file_name; + lVarDataLen = ((trace_file_system *) pmEventStruct)->event_data2 + 1; + lDataSize += (uint16_t) lVarDataLen; + } + break; + + /* Logging of a custom event */ + case TRACE_EV_CUSTOM: + lVarDataBeg = ((trace_custom *) pmEventStruct)->data; + lVarDataLen = ((trace_custom *) pmEventStruct)->data_size; + lDataSize += (uint16_t) lVarDataLen; + break; + } + } + + /* Do we record the CPUID */ + if ((sLogCPUID == TRUE) && (pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) { + /* Remember the CPUID */ + lCPUID = smp_processor_id(); + + /* Update the size of the data entry */ + lDataSize += sizeof(lCPUID); + } + +/* Lock-free event-writing isn't available without cmpxchg */ +#ifdef __HAVE_ARCH_CMPXCHG + /* If we're using the lockless scheme, we preempt the default path + here - nothing after this point in this function will be executed. + This used to be implemented as a kernel hook, and will be again + when/if kernel hooks are accepted into the kernel. */ + if(sBufferControl.using_lockless) + return lockless_write_event(pmEventID, + pmEventStruct, + lDataSize, + lCPUID, + lVarDataBeg, + lVarDataLen); +#endif + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* The following time calculations have to be done within the spinlock because + otherwise the event order could be inverted. */ + + /* Get the time of the event */ + do_gettimeofday(&lTime); + + /* Compute the time delta between this event and the time at which this buffer was started */ + lTimeDelta = (lTime.tv_sec - sBufferStartTime.tv_sec) * 1000000 + + (lTime.tv_usec - sBufferStartTime.tv_usec); + + /* Is there enough space left in the write buffer */ + if (sWritPos + lDataSize > sWritLimit) { + /* Have we already switched buffers and informed the daemon of it */ + if (sSignalSent == TRUE) { + /* We've lost another event */ + sEventsLost++; + + /* Bye, bye, now */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + return -ENOMEM; + } + /* We need to inform the daemon */ + lSendSignal = TRUE; + + /* Switch buffers */ + tracer_switch_buffers(lTime); + + /* Recompute the time delta since sBufferStartTime has changed because of the buffer change */ + lTimeDelta = (lTime.tv_sec - sBufferStartTime.tv_sec) * 1000000 + + (lTime.tv_usec - sBufferStartTime.tv_usec); + } + /* Write the CPUID to the tracing buffer, if required */ + if ((sLogCPUID == TRUE) && (pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) + tracer_write_to_buffer(sWritPos, + &lCPUID, + sizeof(lCPUID)); + + /* Write event type to tracing buffer */ + tracer_write_to_buffer(sWritPos, + &pmEventID, + sizeof(pmEventID)); + + /* Write event time delta to tracing buffer */ + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Do we log event details */ + if (ltt_test_bit(pmEventID, &sLogEventDetailsMask)) { + /* Write event structure */ + tracer_write_to_buffer(sWritPos, + pmEventStruct, + sEventStructSize[pmEventID]); + + /* Write string if any */ + if (lVarDataLen) + tracer_write_to_buffer(sWritPos, + lVarDataBeg, + lVarDataLen); + } + /* Write the length of the event description */ + tracer_write_to_buffer(sWritPos, + &lDataSize, + sizeof(lDataSize)); + + /* Should the tracing daemon be notified */ + if (lSendSignal == TRUE) { + /* Remember that a signal has been sent */ + sSignalSent = TRUE; + + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* Setup signal information */ + lSigInfo.si_signo = SIGIO; + lSigInfo.si_errno = 0; + lSigInfo.si_code = SI_KERNEL; + + /* DEBUG */ +#if 0 + printk("<1> Sending SIGIO to %d \n", sDaemonTaskStruct->pid); +#endif + + /* Signal the tracing daemon */ + send_sig_info(SIGIO, &lSigInfo, sDaemonTaskStruct); + } else + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + return 0; +} + +/************************************************************* + * Function : tracer_switch_buffers() + * Description : + * Put the current write buffer to be read and reset put + * the old read buffer to be written to. Set the tracer + * variables in consequence. + * Parameters : + * pmTime, current time + * Return values : + * NONE + * Note : + * This should be called from within a spin_lock. + *************************************************************/ +void tracer_switch_buffers(struct timeval pmTime) +{ + char *lTempBuf; /* Temporary buffer pointer */ + char *lTempBufEnd; /* Temporary buffer end pointer */ + char *lInitWritPos; /* Initial write position */ + u8 lEventID; /* Event ID of last event */ + u8 lCPUID; /* CPUID of currently runing process */ + uint16_t lDataSize; /* Size of tracing data */ + u32 lSizeLost; /* Size delta between last event and end of buffer */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + trace_buffer_start lStartBufferEvent; /* Start of the new buffer event */ + + /* Remember initial write position */ + lInitWritPos = sWritPos; + + /* Write the end event at the write of the buffer */ + + /* Write the CPUID to the tracing buffer, if required */ + if (sLogCPUID == TRUE) { + lCPUID = smp_processor_id(); + tracer_write_to_buffer(sWritPos, + &lCPUID, + sizeof(lCPUID)); + } + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_END; + tracer_write_to_buffer(sWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Get size lost */ + lSizeLost = sWritBufEnd - lInitWritPos; + + /* Write size lost at the end of the buffer */ + *((u32 *) (sWritBufEnd - sizeof(lSizeLost))) = lSizeLost; + + /* Switch buffers */ + lTempBuf = sReadBuf; + sReadBuf = sWritBuf; + sWritBuf = lTempBuf; + + /* Set buffer ends */ + lTempBufEnd = sReadBufEnd; + sReadBufEnd = sWritBufEnd; + sWritBufEnd = lTempBufEnd; + + /* Set read limit */ + sReadLimit = sReadBufEnd; + + /* Set write limit */ + sWritLimit = sWritBufEnd - TRACER_LAST_EVENT_SIZE; + + /* Set write position */ + sWritPos = sWritBuf; + + /* Increment buffer ID */ + sBufferID++; + + /* Set the time of begining of this buffer */ + sBufferStartTime = pmTime; + + /* Write the start of buffer event */ + lStartBufferEvent.ID = sBufferID; + lStartBufferEvent.Time = pmTime; + + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_START; + tracer_write_to_buffer(sWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Write event structure */ + tracer_write_to_buffer(sWritPos, + &lStartBufferEvent, + sizeof(lStartBufferEvent)); + + /* Compute the data size */ + lDataSize = sizeof(lEventID) + + sizeof(lTimeDelta) + + sizeof(lStartBufferEvent) + + sizeof(lDataSize); + + /* Write the length of the event description */ + tracer_write_to_buffer(sWritPos, + &lDataSize, + sizeof(lDataSize)); +} + +/* Continue a trace that's been temporarily stopped because all buffers were + full. */ +static inline void continue_trace(void) +{ + int lDiscardSize; + u32 lLastEventBufno; + u32 lLastBufferLostSize; + u32 lLastEventOffset; + u32 lNewIndex; + + /* A buffer's been consumed, and as we've been waiting around at the + end of the last one produced, the one after that must now be free */ + int lFreedBufno = sBufferControl.buffers_produced % sBufferControl.n_buffers; + /* Start the new buffer out at the beginning */ + atomic_set_volatile(&sBufferControl.fill_count[lFreedBufno], sStartReserve); + /* In the all-buffers-full case, sBufferControl.index is frozen at the + position of the first event that would have caused a buffer switch. + However, the fill_count for that buffer is not frozen and reflects + not only the lost size calculated at that point, but also any + smaller events that managed to write themselves at the end of the + last buffer (because there's technically still space at the end, + though it and all those contained events will be erased here). + Here we try to salvage if possible that last buffer, but to do + that, we need to subtract those pesky smaller events that managed + to get in. If after all that, another small event manages to + sneak in in the time it takes us to do this, well, we concede and + the daemon will toss that buffer. It's not the end of the world + if that happens, since that buffer actually marked the start of a + bunch of lost events which continues until a buffer is freed. */ + + /* Get the bufno and offset of the buffer containing the last event + logged before we had to stop for a buffer-full condition. */ + lLastEventOffset = TRACE_BUFFER_OFFSET_GET(sLastEventIndex, sBufferControl.offset_mask); + lLastEventBufno = TRACE_BUFFER_NUMBER_GET(sLastEventIndex, sBufferControl.offset_bits); + /* We also need to know the lost size we wrote to that buffer when we + stopped */ + lLastBufferLostSize = TRACE_BUFFER_SIZE(sBufferControl.offset_bits) - lLastEventOffset; + /* Since the time we stopped, some smaller events probably reserved + space and wrote themselves in, the sizes of which would have been + reflected in the fill_count. The total size of these events is + calculated here. */ + lDiscardSize = atomic_read(&sBufferControl.fill_count[lLastEventBufno]) - lLastEventOffset - lLastBufferLostSize; + /* If there were events written after we stopped, subtract those from + the fill_count. If that doesn't fix things, the buffer either is + really incomplete, or another event snuck in, and we'll just stop + now and say we did what we could for it. */ + if(lDiscardSize > 0) { + atomic_sub_volatile(&sBufferControl.fill_count[lLastEventBufno], lDiscardSize); + } + + /* Since our end buffer event probably got trounced, rewrite it in old + buffer. */ + write_end_buffer_event(sLastEventIndex & sBufferControl.index_mask, sLastEventTimeStamp); + /* We also need to update the buffer start time and write the start + event for the next buffer, since we couldn't do it until now */ + do_gettimeofday(&sBufferStartTime); + /* The current buffer control index is hanging around near the end of + the last buffer. So we add the buffer size and clear the offset to + get to the beginning of the newly freed buffer. */ + lNewIndex = sBufferControl.index + TRACE_BUFFER_SIZE(sBufferControl.offset_bits); + lNewIndex = TRACE_BUFFER_OFFSET_CLEAR(lNewIndex, sBufferControl.offset_mask) + sStartReserve; + write_start_buffer_event(lNewIndex & sBufferControl.index_mask, sBufferStartTime); + + /* Fixing up sBufferControl.index is simpler. Since a buffer has been + consumed, there's now at least one buffer free, and we can continue. + We start off the next buffer in a fresh state. Since nothing else + can be meaningfully updating the buffer control index, we can safely + do that here. 'Meaningfully' means that there may be cases of + smaller events managing to update the index in the last buffer but + they're essentially erased by the lost size of that buffer when + sBuffersFull was set. We need to restart the index at the beginning + of the next available buffer before turning off sBuffersFull, and + avoid an erroneous buffer switch. */ + sBufferControl.index = lNewIndex; + /* Now we can continue reserving events */ + sBuffersFull = FALSE; +} + +/************************************************************* + * Function : tracer_ioctl() + * Description : "Ioctl" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * pmCmd, command given by the caller + * pmArg, arguments to the command + * Return values : + * >0, In case the caller requested the number of events + * lost. + * 0, Everything went OK + * -ENOSYS, no such command + * -EINVAL, tracer not properly configured + * -EBUSY, tracer can't be reconfigured while in operation + * -ENOMEM, no more memory + * -EFAULT, unable to access user space memory + * Note : + * In the future, this function should check to make sure + * that it's the server that make thes ioctl. + *************************************************************/ +int tracer_ioctl(struct inode *pmInode, + struct file *pmFile, + unsigned int pmCmd, + unsigned long pmArg) +{ + int lRetValue; /* Function return value */ + int lDevMinor; /* Device minor number */ + int lNewUserEventID; /* ID of newly created user event */ + trace_start lStartEvent; /* Event marking the begining of the trace */ + unsigned long int lFlags; /* CPU flags for lock */ + trace_custom lUserEvent; /* The user event to be logged */ + trace_change_mask lTraceMask; /* Event mask */ + trace_new_event lNewUserEvent; /* The event to be created for the user */ + trace_buffer_start lStartBufferEvent; /* Start of the new buffer event */ + /* Get device's minor number */ + lDevMinor = minor(pmInode->i_rdev) & 0x0f; + + /* If the tracer is started, the daemon can't modify the configuration */ + if ((lDevMinor == 0) + && (sTracerStarted == TRUE) && (pmCmd != TRACER_STOP) && (pmCmd != TRACER_DATA_COMITTED) && (pmCmd != TRACER_GET_BUFFER_CONTROL)) + return -EBUSY; + + /* Only some operations are permitted to user processes trying to log events */ + if ((lDevMinor == 1) + && (pmCmd != TRACER_CREATE_USER_EVENT) + && (pmCmd != TRACER_DESTROY_USER_EVENT) + && (pmCmd != TRACER_TRACE_USER_EVENT) + && (pmCmd != TRACER_SET_EVENT_MASK) + && (pmCmd != TRACER_GET_EVENT_MASK)) + return -ENOSYS; + + /* Depending on the command executed */ + switch (pmCmd) { + /* Start the tracer */ + case TRACER_START: + /* Initialize buffer control regardless of scheme in use */ + init_buffer_control(&sBufferControl, + !sUseLocking, /* using_lockless */ + sBufnoBits, /* bufno_bits, 2**n */ + sBufOffsetBits); /* offset_bits, 2**n */ + + /* Check if the device has been properly set up */ + if (((sUseSyscallEIPBounds == TRUE) + && (sSyscallEIPDepthSet == TRUE)) + || ((sUseSyscallEIPBounds == TRUE) + && ((sLowerEIPBoundSet != TRUE) + || (sUpperEIPBoundSet != TRUE))) + || ((sTracingPID == TRUE) + && (sTracingPGRP == TRUE))) + return -EINVAL; + + /* Set the kernel-side trace configuration */ + if (trace_set_config(trace, + sSyscallEIPDepthSet, + sUseSyscallEIPBounds, + sSyscallEIPDepth, + sLowerEIPBound, + sUpperEIPBound) < 0) + return -EINVAL; + + /* Always log the start event and the buffer start event */ + ltt_set_bit(TRACE_EV_BUFFER_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_BUFFER_START, &sLogEventDetailsMask); + ltt_set_bit(TRACE_EV_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_START, &sLogEventDetailsMask); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sTracedEvents); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sLogEventDetailsMask); + + /* Get the time of start */ + do_gettimeofday(&sBufferStartTime); + + /* Set the event description */ + lStartBufferEvent.ID = sBufferID; + lStartBufferEvent.Time = sBufferStartTime; + + /* Set the event description */ + lStartEvent.MagicNumber = TRACER_MAGIC_NUMBER; + lStartEvent.ArchType = TRACE_ARCH_TYPE; + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT; + lStartEvent.SystemType = TRACE_SYS_TYPE_VANILLA_LINUX; + lStartEvent.MajorVersion = TRACER_VERSION_MAJOR; + lStartEvent.MinorVersion = TRACER_VERSION_MINOR; + lStartEvent.BufferSize = sBufSize; + lStartEvent.EventMask = sTracedEvents; + lStartEvent.DetailsMask = sLogEventDetailsMask; + lStartEvent.LogCPUID = sLogCPUID; + + /* Trace the buffer start event */ + if(sBufferControl.using_lockless == TRUE) { + write_start_buffer_event(sBufferControl.index & sBufferControl.index_mask, sBufferStartTime); + } else { + trace(TRACE_EV_BUFFER_START, &lStartBufferEvent); + } + + /* Trace the start event */ + trace(TRACE_EV_START, &lStartEvent); + + /* Start tapping into Linux's syscall flow */ + syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &sTracedEvents); + syscall_exit_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &sTracedEvents); + + /* We can start tracing */ + sTracerStarted = TRUE; + + /* Reregister custom trace events created earlier */ + trace_reregister_custom_events(); + break; + + /* Stop the tracer */ + case TRACER_STOP: + /* Stop tracing */ + /* We don't log new events, but old lockless ones can finish */ + sTracerStarted = FALSE; + + /* Stop interrupting the normal flow of system calls */ + syscall_entry_trace_active = 0; + syscall_exit_trace_active = 0; + + /* Make sure the last buffer touched is finalized */ + if(sBufferControl.using_lockless) { + /* Write end buffer event as last event in old buf. */ + finalize_lockless_trace(); + break; + } /* Else locking scheme */ + + /* Acquire the lock to avoid SMP case of where another CPU is writing a trace + while buffer is being switched */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* Switch the buffers to ensure that the end of the buffer mark is set (time isn't important) */ + tracer_switch_buffers(sBufferStartTime); + + /* Release lock */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + break; + + /* Set the tracer to the default configuration */ + case TRACER_CONFIG_DEFAULT: + tracer_set_default_config(); + break; + + /* Set the memory buffers the daemon wants us to use */ + case TRACER_CONFIG_MEMORY_BUFFERS: + /* Is the given size "reasonable" */ + if (sUseLocking == TRUE) { + if (pmArg < TRACER_MIN_BUF_SIZE) + return -EINVAL; + } else { + if ((pmArg < TRACER_LOCKLESS_MIN_BUF_SIZE) || + (pmArg > TRACER_LOCKLESS_MAX_BUF_SIZE)) + return -EINVAL; + } + + /* Set the buffer's size */ + return tracer_set_buffer_size(pmArg); + break; + + /* Set the number of memory buffers the daemon wants us to use */ + case TRACER_CONFIG_N_MEMORY_BUFFERS: + /* Is the given size "reasonable" */ + if ((sUseLocking == TRUE) || (pmArg < TRACER_MIN_BUFFERS) || + (pmArg > TRACER_MAX_BUFFERS)) + return -EINVAL; + + /* Set the number of buffers */ + return tracer_set_n_buffers(pmArg); + break; + + /* Set locking scheme the daemon wants us to use */ + case TRACER_CONFIG_USE_LOCKING: + /* Set the locking scheme in a global for later */ + sUseLocking = pmArg; +#ifndef __HAVE_ARCH_CMPXCHG + if(sUseLocking == FALSE) { /* Trying to use lock-free scheme */ + /* Lock-free scheme not supported on this platform */ + return -EINVAL; + } +#endif + break; + + /* Trace the given events */ + case TRACER_CONFIG_EVENTS: + if (copy_from_user(&sTracedEvents, (void *) pmArg, sizeof(sTracedEvents))) + return -EFAULT; + break; + + /* Record the details of the event, or not */ + case TRACER_CONFIG_DETAILS: + if (copy_from_user(&sLogEventDetailsMask, (void *) pmArg, sizeof(sLogEventDetailsMask))) + return -EFAULT; + break; + + /* Record the CPUID associated with the event */ + case TRACER_CONFIG_CPUID: + sLogCPUID = TRUE; + break; + + /* Trace only one process */ + case TRACER_CONFIG_PID: + sTracingPID = TRUE; + sTracedPID = pmArg; + break; + + /* Trace only the given process group */ + case TRACER_CONFIG_PGRP: + sTracingPGRP = TRUE; + sTracedPGRP = pmArg; + break; + + /* Trace the processes of a given group of users */ + case TRACER_CONFIG_GID: + sTracingGID = TRUE; + sTracedGID = pmArg; + break; + + /* Trace the processes of a given user */ + case TRACER_CONFIG_UID: + sTracingUID = TRUE; + sTracedUID = pmArg; + break; + + /* Set the call depth a which the EIP should be fetched on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_DEPTH: + sSyscallEIPDepthSet = TRUE; + sSyscallEIPDepth = pmArg; + break; + + /* Set the lowerbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_LOWER: + /* We are using bounds for fetching the EIP where syscall was made */ + sUseSyscallEIPBounds = TRUE; + + /* Set the lower bound */ + sLowerEIPBound = (void *) pmArg; + + /* The lower bound has been set */ + sLowerEIPBoundSet = TRUE; + break; + + /* Set the upperbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_UPPER: + /* We are using bounds for fetching the EIP where syscall was made */ + sUseSyscallEIPBounds = TRUE; + + /* Set the upper bound */ + sUpperEIPBound = (void *) pmArg; + + /* The upper bound has been set */ + sUpperEIPBoundSet = TRUE; + break; + + /* The daemon has comitted the last trace */ + case TRACER_DATA_COMITTED: +#if 0 + /* DEBUG */ + printk("Tracer: Data has been committed \n"); +#endif + /* The lockless version doesn't use sSignalSent. pmArg is the + number of buffers the daemon has told us it just consumed. + Add that to the global count. */ + if(sBufferControl.using_lockless) { + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + /* We consumed some buffers, note it. */ + sBufferControl.buffers_consumed += (u32)pmArg; + /* If we were full, we no longer are */ + if(sBuffersFull && ((u32)pmArg > 0)) { + continue_trace(); + } + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + break; + } /* Else locking version below */ + + /* Safely set the signal sent flag to FALSE */ + spin_lock_irqsave(&sSpinLock, lFlags); + sSignalSent = FALSE; + spin_unlock_irqrestore(&sSpinLock, lFlags); + break; + /* Get the number of events lost */ + case TRACER_GET_EVENTS_LOST: + return sEventsLost; + break; + + /* Create a user event */ + case TRACER_CREATE_USER_EVENT: + /* Copy the information from user space */ + if (copy_from_user(&lNewUserEvent, (void *) pmArg, sizeof(lNewUserEvent))) + return -EFAULT; + + /* Create the event */ + lNewUserEventID = trace_create_owned_event(lNewUserEvent.type, + lNewUserEvent.desc, + lNewUserEvent.format_type, + lNewUserEvent.form, + current->pid); + + /* Has the operation succeded */ + if (lNewUserEventID >= 0) { + /* Set the event ID */ + lNewUserEvent.id = lNewUserEventID; + + /* Copy the event information back to user space */ + if (copy_to_user((void *) pmArg, &lNewUserEvent, sizeof(lNewUserEvent))) { + /* Since we were unable to tell the user about the event, destroy it */ + trace_destroy_event(lNewUserEventID); + return -EFAULT; + } + } else + /* Forward trace_create_event()'s error code */ + return lNewUserEventID; + break; + + /* Destroy a user event */ + case TRACER_DESTROY_USER_EVENT: + /* Pass on the user's request */ + trace_destroy_event((int) pmArg); + break; + + /* Trace a user event */ + case TRACER_TRACE_USER_EVENT: + /* Copy the information from user space */ + if (copy_from_user(&lUserEvent, (void *) pmArg, sizeof(lUserEvent))) + return -EFAULT; + + /* Copy the user event data */ + if (copy_from_user(sUserEventData, lUserEvent.data, lUserEvent.data_size)) + return -EFAULT; + + /* Log the raw event */ + lRetValue = trace_raw_event(lUserEvent.id, + lUserEvent.data_size, + sUserEventData); + + /* Has the operation failed */ + if (lRetValue < 0) + /* Forward trace_create_event()'s error code */ + return lRetValue; + break; + + /* Set event mask */ + case TRACER_SET_EVENT_MASK: + /* Copy the information from user space */ + if (copy_from_user(&(lTraceMask.mask), (void *) pmArg, sizeof(lTraceMask.mask))) + return -EFAULT; + + /* Trace the event */ + lRetValue = trace(TRACE_EV_CHANGE_MASK, &lTraceMask); + + /* Change the event mask. (This has to be done second or else may loose the + information if the user decides to stop logging "change mask" events) */ + memcpy(&sTracedEvents, &(lTraceMask.mask), sizeof(lTraceMask.mask)); + syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &sTracedEvents); + syscall_exit_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &sTracedEvents); + + /* Always trace the buffer start, the trace start and the change mask */ + ltt_set_bit(TRACE_EV_BUFFER_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sTracedEvents); + + /* Forward trace()'s error code */ + return lRetValue; + break; + + /* Get event mask */ + case TRACER_GET_EVENT_MASK: + /* Copy the information to user space */ + if (copy_to_user((void *) pmArg, &sTracedEvents, sizeof(sTracedEvents))) + return -EFAULT; + break; + + /* Get buffer control data */ + case TRACER_GET_BUFFER_CONTROL: + /* We can't copy_to_user() with a lock held (accessing user + memory may cause a page fault), so buffers_produced may + actually be larger than what the daemon sees when this + snapshot is taken. This isn't a problem because the + daemon will get a chance to read the new buffer the next + time it's signaled. */ + /* Copy the buffer control information to user space */ + if(copy_to_user((void *) pmArg, &sBufferControl, sizeof(sBufferControl))) + return -EFAULT; + break; + + /* Unknown command */ + default: + return -ENOSYS; + } + + return 0; +} + +/************************************************************* + * Function : tracer_mmap() + * Description : "Mmap" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * pmVmArea, Virtual memory area description structure + * Return values : + * 0 if ok + * -EAGAIN, when remap failed + * -EACCESS, permission denied + ************************************************************/ +int tracer_mmap(struct file *pmFile, + struct vm_area_struct *pmVmArea) +{ + int lRetValue; /* Function's return value */ + + /* Only the trace daemon is allowed access to mmap */ + if (current != sDaemonTaskStruct) + return -EACCES; + + /* Remap trace buffer into the process's memory space */ + lRetValue = tracer_mmap_region(pmVmArea, + (char *) pmVmArea->vm_start, + sTracBuf, + pmVmArea->vm_end - pmVmArea->vm_start); + +#if 0 + printk("Tracer: Trace buffer virtual address => 0x%08X \n", (u32) sTracBuf); + printk("Tracer: Trace buffer physical address => 0x%08X \n", (u32) virt_to_phys(sTracBuf)); + printk("Tracer: Trace buffer virtual address in daemon space => 0x%08X \n", (u32) pmVmArea->vm_start); + printk("Tracer: Trace buffer physical address in daemon space => 0x%08X \n", (u32) virt_to_phys((void *) pmVmArea->vm_start)); +#endif + + return lRetValue; +} + +/************************************************************* + * Function : tracer_open() + * Description : "Open" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * Return values : + * 0, everything went OK + * -ENODEV, no such device. + * -EBUSY, daemon channel (minor number 0) already in use. + ************************************************************/ +int tracer_open(struct inode *pmInode, + struct file *pmFile) +{ + int lDevMinor = minor(pmInode->i_rdev) & 0x0f; /* Device minor number */ + + /* Only minor number 0 and 1 are used */ + if ((lDevMinor > 0) && (lDevMinor != 1)) + return -ENODEV; + + /* If the device has already been opened */ + if (sOpenCount) { + /* Is there another process trying to open the daemon's channel (minor number 0) */ + if (lDevMinor == 0) + return -EBUSY; + else + /* Only increment use, this is just another user process trying to log user events */ + goto IncrementUse; + } + /* Fetch the task structure of the process that opened the device */ + sDaemonTaskStruct = current; + + /* Reset the default configuration since this is the daemon and he will complete the setup */ + tracer_set_default_config(); + +#if 0 + /* DEBUG */ + printk("<1>Process %d opened the tracing device \n", sDaemonTaskStruct->pid); +#endif + + IncrementUse: + /* Lock the device */ + sOpenCount++; + +#ifdef MODULE + /* Increment module usage */ + MOD_INC_USE_COUNT; +#endif + + return 0; +} + +/************************************************************* + * Function : tracer_release() + * Description : "Release" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * Return values : + * 0, everything went OK + * Note : + * It is assumed that if the tracing daemon dies, exits + * or simply stops existing, the kernel or "someone" will + * call tracer_release. Otherwise, we're in trouble ... + *************************************************************/ +int tracer_release(struct inode *pmInode, + struct file *pmFile) +{ + int lDevMinor = minor(pmInode->i_rdev) & 0x0f; /* Device minor number */ + + /* Is this a simple user process exiting? */ + if (lDevMinor != 0) + goto DecrementUse; + + /* Did we loose any events */ + if (sEventsLost > 0) + printk(KERN_ALERT "Tracer: Lost %d events \n", sEventsLost); + + /* Reset the daemon PID */ + sDaemonTaskStruct = NULL; + + /* Free the current buffers, if any */ + if (sTracBuf != NULL) + rvfree(sTracBuf, sAllocSize); + + /* Reset the read and write buffers */ + sTracBuf = NULL; + sWritBuf = NULL; + sReadBuf = NULL; + sWritBufEnd = NULL; + sReadBufEnd = NULL; + sWritPos = NULL; + sReadLimit = NULL; + sWritLimit = NULL; + sUseLocking = FALSE; + + /* Reset the tracer's configuration */ + tracer_set_default_config(); + sTracerStarted = FALSE; + + /* Reset number of bytes recorded and number of events lost */ + sBufReadComplete = 0; + sSizeReadIncomplete = 0; + sEventsLost = 0; + + /* Reset signal sent */ + sSignalSent = FALSE; + + DecrementUse: + /* Unlock the device */ + sOpenCount--; + +#ifdef MODULE + /* Decrement module usage */ + MOD_DEC_USE_COUNT; +#endif + + return 0; +} + +/************************************************************* + * Function : tracer_fsync() + * Description : "Fsync" file op + * Parameters : + * pmFile, file structure given to the acting process + * pmDEntry, dentry associated with file + * Return values : + * 0, everything went OK + * -EACCESS, permission denied + * Note : + * We need to look the modifications of the values because + * they are read and written by trace(). + * Sonia : ne m oublie pas, je suis toujours a toi.... + *************************************************************/ +int tracer_fsync(struct file *pmFile, + struct dentry *pmDEntry, + int pmDataSync) +{ + unsigned long int lFlags; + + /* Only the trace daemon is allowed access to fsync */ + if (current != sDaemonTaskStruct) + return -EACCES; + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* Reset the write positions */ + sWritPos = sWritBuf; + + /* Reset read limit */ + sReadLimit = sReadBuf; + + /* Reset bytes recorded */ + sBufReadComplete = 0; + sSizeReadIncomplete = 0; + sEventsLost = 0; + + /* Reset signal sent */ + sSignalSent = FALSE; + + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + return 0; +} + +/************************************************************* + * Function : tracer_set_buffer_size() + * Description : + * Sets the size of the buffers containing the trace data. + * For the lockless version, if the value isn't a power of + * two, it will be converted to the next lower power of 2 + * and multiplied by the number of buffers previously + * specified (default 2). For the locking version, the + * value will be rounded up to the next page boundary. + * Parameters : + * pmSize, Size of buffers + * Return values : + * 0, Size setting went OK + * -ENOMEM, unable to get a hold of memory for tracer + *************************************************************/ +int tracer_set_buffer_size(int pmSize) +{ + int lSizeAlloc; + /* sBufnoBits must have already been set before we do this. */ + int lNBuffers = TRACE_MAX_BUFFER_NUMBER(sBufnoBits); + + if(sUseLocking == TRUE) { + /* Set size to allocate (= pmSize * 2) and fix it's size to be on a page boundary */ + lSizeAlloc = FIX_SIZE(pmSize << 1); + } else { /* Calculate power-of-2 buffer size */ + if(hweight32(pmSize) != 1) /* Invalid if # set bits != 1 */ + return -EINVAL; + + /* Find position of one and only set bit */ + sBufOffsetBits = ffs(pmSize) - 1; + /* Calculate total size of buffers */ + lSizeAlloc = pmSize * lNBuffers; + /* Sanity check */ + if(lSizeAlloc > TRACER_LOCKLESS_MAX_TOTAL_BUF_SIZE) + return -EINVAL; + } + + /* Free the current buffers, if any */ + if (sTracBuf != NULL) + rvfree(sTracBuf, sAllocSize); + + /* Allocate space for the tracing buffers */ + if ((sTracBuf = (char *) rvmalloc(lSizeAlloc)) == NULL) + return -ENOMEM; + +#if 1 /* DEBUG - init all of buffer with easy-to-spot default values */ + { + int i; + for(i=0; i> (offset_bits)) +#define TRACE_BUFFER_OFFSET_GET(index, mask) ((index) & (mask)) +#define TRACE_BUFFER_OFFSET_CLEAR(index, mask) ((index) & ~(mask)) + +/* Flags returned by trace_reserve/trace_reserve_slow */ +#define LTT_BUFFER_SWITCH_NONE 0x00 +#define LTT_EVENT_DISCARD_NONE 0x00 +#define LTT_BUFFER_SWITCH 0x01 +#define LTT_EVENT_DISCARD 0x02 +#define LTT_EVENT_TOO_LONG 0x04 + +/* Structure used for communicating buffer info between tracer and daemon + for lock-free tracing. This is a per-buffer (CPU, etc.) data structure. */ +struct buffer_control +{ + int using_lockless; + + u32 index; + u8 bufno_bits; + u32 n_buffers; /* cached value */ + u8 offset_bits; + u32 offset_mask; /* cached value */ + u32 index_mask; /* cached value */ + + u32 buffers_produced; + u32 buffers_consumed; + /* atomic_t has only 24 usable bits, limiting us to 16M buffers */ + atomic_t fill_count[TRACER_MAX_BUFFERS]; +}; + +/* If cmpxchg isn't defined for the architecture, we don't want to + generate a link error - the locking scheme will still be available. */ +#ifndef __HAVE_ARCH_CMPXCHG +#define cmpxchg(p,o,n) 0 +#endif + +extern __inline__ int ltt_set_bit(int nr, void *addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr & 7); + unsigned char old; + + p += nr >> 3; + old = *p; + *p |= mask; + + return ((old & mask) != 0); +} + +extern __inline__ int ltt_clear_bit(int nr, void *addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr & 7); + unsigned char old; + + p += nr >> 3; + old = *p; + *p &= ~mask; + + return ((old & mask) != 0); +} + +extern __inline__ int ltt_test_bit(int nr, void *addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr & 7); + + p += nr >> 3; + + return ((*p & mask) != 0); +} + +/* Function prototypes */ +int trace + (u8, + void *); +void tracer_switch_buffers + (struct timeval); +int tracer_ioctl + (struct inode *, + struct file *, + unsigned int, + unsigned long); +int tracer_mmap + (struct file *, + struct vm_area_struct *); +int tracer_open + (struct inode *, + struct file *); +int tracer_release + (struct inode *, + struct file *); +int tracer_fsync + (struct file *, + struct dentry *, + int); +#ifdef MODULE +void tracer_exit + (void); +#endif /* #ifdef MODULE */ +int tracer_set_buffer_size + (int); +int tracer_set_n_buffers + (int); +int tracer_set_default_config + (void); +int tracer_init + (void); +#endif /* _TRACER_H */ diff -urN -X dontdiff linux-2.5.33/fs/buffer.c linux-2.5.33.ltt.lockless/fs/buffer.c --- linux-2.5.33/fs/buffer.c Sat Aug 31 15:05:27 2002 +++ linux-2.5.33.ltt.lockless/fs/buffer.c Sun Sep 8 23:00:20 2002 @@ -34,6 +34,7 @@ #include #include #include +#include #include static void invalidate_bh_lrus(void); @@ -135,6 +136,7 @@ get_bh(bh); add_wait_queue(wq, &wait); do { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_BUF_WAIT_START, 0, 0, NULL); blk_run_queues(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!buffer_locked(bh)) @@ -142,6 +144,7 @@ schedule(); } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_BUF_WAIT_END, 0, 0, NULL); remove_wait_queue(wq, &wait); put_bh(bh); } diff -urN -X dontdiff linux-2.5.33/fs/exec.c linux-2.5.33.ltt.lockless/fs/exec.c --- linux-2.5.33/fs/exec.c Sat Aug 31 15:04:57 2002 +++ linux-2.5.33.ltt.lockless/fs/exec.c Sun Sep 8 23:00:20 2002 @@ -41,6 +41,8 @@ #include #include +#include + #include #include #include @@ -916,6 +918,11 @@ if (IS_ERR(file)) return retval; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_EXEC, + 0, + file->f_dentry->d_name.len, + file->f_dentry->d_name.name); + bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); diff -urN -X dontdiff linux-2.5.33/fs/ioctl.c linux-2.5.33.ltt.lockless/fs/ioctl.c --- linux-2.5.33/fs/ioctl.c Sat Aug 31 15:04:52 2002 +++ linux-2.5.33.ltt.lockless/fs/ioctl.c Sun Sep 8 23:00:20 2002 @@ -10,6 +10,8 @@ #include #include +#include + #include #include @@ -65,6 +67,11 @@ goto out; } + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_IOCTL, + fd, + cmd, + NULL); + lock_kernel(); switch (cmd) { case FIOCLEX: diff -urN -X dontdiff linux-2.5.33/fs/open.c linux-2.5.33.ltt.lockless/fs/open.c --- linux-2.5.33/fs/open.c Sat Aug 31 15:04:48 2002 +++ linux-2.5.33.ltt.lockless/fs/open.c Sun Sep 8 23:00:20 2002 @@ -19,6 +19,8 @@ #include #include +#include + #include #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) @@ -801,6 +803,10 @@ error = PTR_ERR(f); if (IS_ERR(f)) goto out_error; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_OPEN, + fd, + f->f_dentry->d_name.len, + f->f_dentry->d_name.name); fd_install(fd, f); } out: @@ -867,6 +873,10 @@ filp = files->fd[fd]; if (!filp) goto out_unlock; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_CLOSE, + fd, + 0, + NULL); files->fd[fd] = NULL; FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); diff -urN -X dontdiff linux-2.5.33/fs/read_write.c linux-2.5.33.ltt.lockless/fs/read_write.c --- linux-2.5.33/fs/read_write.c Sat Aug 31 15:05:28 2002 +++ linux-2.5.33.ltt.lockless/fs/read_write.c Sun Sep 8 23:00:20 2002 @@ -13,6 +13,8 @@ #include #include +#include + #include struct file_operations generic_ro_fops = { @@ -133,6 +135,10 @@ if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SEEK, + fd, + offset, + NULL); fput(file); bad: return retval; @@ -163,6 +169,11 @@ offset = llseek(file, ((loff_t) offset_high << 32) | offset_low, origin); + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SEEK, + fd, + offset, + NULL); + retval = (int)offset; if (offset >= 0) { retval = -EFAULT; @@ -229,6 +240,10 @@ file = fget(fd); if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); ret = vfs_read(file, buf, count, &file->f_pos); fput(file); } @@ -243,6 +258,10 @@ file = fget(fd); if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); ret = vfs_write(file, buf, count, &file->f_pos); fput(file); } @@ -261,6 +280,10 @@ file = fget(fd); if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); ret = vfs_read(file, buf, count, &pos); fput(file); } @@ -279,6 +302,10 @@ file = fget(fd); if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); ret = vfs_write(file, buf, count, &pos); fput(file); } @@ -405,6 +432,10 @@ file = fget(fd); if (!file) goto bad_file; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); if (file->f_op && (file->f_mode & FMODE_READ) && (file->f_op->readv || file->f_op->read)) { ret = security_ops->file_permission (file, MAY_READ); @@ -428,6 +459,10 @@ file = fget(fd); if (!file) goto bad_file; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); if (file->f_op && (file->f_mode & FMODE_WRITE) && (file->f_op->writev || file->f_op->write)) { ret = security_ops->file_permission (file, MAY_WRITE); diff -urN -X dontdiff linux-2.5.33/fs/select.c linux-2.5.33.ltt.lockless/fs/select.c --- linux-2.5.33/fs/select.c Sat Aug 31 15:04:47 2002 +++ linux-2.5.33.ltt.lockless/fs/select.c Sun Sep 8 23:00:20 2002 @@ -21,6 +21,8 @@ #include #include +#include + #include #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) @@ -194,6 +196,10 @@ file = fget(i); mask = POLLNVAL; if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SELECT, + i /* The fd*/, + __timeout, + NULL); mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, wait); @@ -368,6 +374,10 @@ struct file * file = fget(fd); mask = POLLNVAL; if (file != NULL) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_POLL, + fd, + 0, + NULL); mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, *pwait); diff -urN -X dontdiff linux-2.5.33/include/asm-i386/trace.h linux-2.5.33.ltt.lockless/include/asm-i386/trace.h --- linux-2.5.33/include/asm-i386/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/asm-i386/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,15 @@ +/* + * linux/include/asm-i386/trace.h + * + * Copyright (C) 2002, Karim Yaghmour + * + * i386 definitions for tracing system + */ + +#include + +/* Current arch type */ +#define TRACE_ARCH_TYPE TRACE_ARCH_TYPE_I386 + +/* Current variant type */ +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_NONE diff -urN -X dontdiff linux-2.5.33/include/asm-mips/trace.h linux-2.5.33.ltt.lockless/include/asm-mips/trace.h --- linux-2.5.33/include/asm-mips/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/asm-mips/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,15 @@ +/* + * linux/include/asm-mips/trace.h + * + * Copyright (C) 2002, Karim Yaghmour + * + * MIPS definitions for tracing system + */ + +#include + +/* Current arch type */ +#define TRACE_ARCH_TYPE TRACE_ARCH_TYPE_MIPS + +/* Current variant type */ +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_NONE diff -urN -X dontdiff linux-2.5.33/include/asm-ppc/trace.h linux-2.5.33.ltt.lockless/include/asm-ppc/trace.h --- linux-2.5.33/include/asm-ppc/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/asm-ppc/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,30 @@ +/* + * linux/include/asm-ppc/trace.h + * + * Copyright (C) 2002, Karim Yaghmour + * + * PowerPC definitions for tracing system + */ + +#include +#include + +/* Current arch type */ +#define TRACE_ARCH_TYPE TRACE_ARCH_TYPE_PPC + +/* PowerPC variants */ +#define TRACE_ARCH_VARIANT_PPC_4xx 1 /* 4xx systems (IBM embedded series) */ +#define TRACE_ARCH_VARIANT_PPC_6xx 2 /* 6xx/7xx/74xx/8260/POWER3 systems (desktop flavor) */ +#define TRACE_ARCH_VARIANT_PPC_8xx 3 /* 8xx system (Motoral embedded series) */ +#define TRACE_ARCH_VARIANT_PPC_ISERIES 4 /* 8xx system (iSeries) */ + +/* Current variant type */ +#if defined(CONFIG_4xx) +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_PPC_4xx +#elif defined(CONFIG_6xx) +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_PPC_6xx +#elif defined(CONFIG_8xx) +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_PPC_8xx +#elif defined(CONFIG_PPC_ISERIES) +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_PPC_ISERIES +#endif diff -urN -X dontdiff linux-2.5.33/include/asm-s390/trace.h linux-2.5.33.ltt.lockless/include/asm-s390/trace.h --- linux-2.5.33/include/asm-s390/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/asm-s390/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,15 @@ +/* + * linux/include/asm-s390/trace.h + * + * Copyright (C) 2002, Karim Yaghmour + * + * S/390 definitions for tracing system + */ + +#include + +/* Current arch type */ +#define TRACE_ARCH_TYPE TRACE_ARCH_TYPE_S390 + +/* Current variant type */ +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_NONE diff -urN -X dontdiff linux-2.5.33/include/asm-sh/trace.h linux-2.5.33.ltt.lockless/include/asm-sh/trace.h --- linux-2.5.33/include/asm-sh/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/asm-sh/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,15 @@ +/* + * linux/include/asm-sh/trace.h + * + * Copyright (C) 2002, Karim Yaghmour + * + * SuperH definitions for tracing system + */ + +#include + +/* Current arch type */ +#define TRACE_ARCH_TYPE TRACE_ARCH_TYPE_SH + +/* Current variant type */ +#define TRACE_ARCH_VARIANT TRACE_ARCH_VARIANT_NONE diff -urN -X dontdiff linux-2.5.33/include/linux/trace.h linux-2.5.33.ltt.lockless/include/linux/trace.h --- linux-2.5.33/include/linux/trace.h Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/include/linux/trace.h Sun Sep 8 23:00:20 2002 @@ -0,0 +1,450 @@ +/* + * linux/include/linux/trace.h + * + * Copyright (C) 1999-2002 Karim Yaghmour (karim@opersys.com) + * + * This contains the necessary definitions for tracing the + * the system. + */ + +#ifndef _LINUX_TRACE_H +#define _LINUX_TRACE_H + +#include +#include +#include + +/* Is kernel tracing enabled */ +#if defined(CONFIG_TRACE) || defined(CONFIG_TRACE_MODULE) + +/* Structure packing within the trace */ +#if LTT_UNPACKED_STRUCTS +#define LTT_PACKED_STRUCT +#else /* if LTT_UNPACKED_STRUCTS */ +#define LTT_PACKED_STRUCT __attribute__ ((packed)) +#endif /* if LTT_UNPACKED_STRUCTS */ + +/* The prototype of the tracer call (EventID, *EventStruct) */ +typedef int (*tracer_call) (u8, void *); + +/* This structure contains all the information needed to be known + about the tracing module. */ +struct tracer { + tracer_call trace; /* The tracing routine itself */ + + int fetch_syscall_eip_use_depth; /* Use the given depth */ + int fetch_syscall_eip_use_bounds; /* Find eip in bounds */ + int syscall_eip_depth; /* Call depth at which eip is fetched */ + void *syscall_lower_eip_bound; /* Lower eip bound */ + void *syscall_upper_eip_bound; /* Higher eip bound */ +}; + +/* Maximal size a custom event can have */ +#define CUSTOM_EVENT_MAX_SIZE 8192 + +/* String length limits for custom events creation */ +#define CUSTOM_EVENT_TYPE_STR_LEN 20 +#define CUSTOM_EVENT_DESC_STR_LEN 100 +#define CUSTOM_EVENT_FORM_STR_LEN 256 +#define CUSTOM_EVENT_FINAL_STR_LEN 200 + +/* Type of custom event formats */ +#define CUSTOM_EVENT_FORMAT_TYPE_NONE 0 +#define CUSTOM_EVENT_FORMAT_TYPE_STR 1 +#define CUSTOM_EVENT_FORMAT_TYPE_HEX 2 +#define CUSTOM_EVENT_FORMAT_TYPE_XML 3 +#define CUSTOM_EVENT_FORMAT_TYPE_IBM 4 + +/* Architecture types */ +#define TRACE_ARCH_TYPE_I386 1 /* i386 system */ +#define TRACE_ARCH_TYPE_PPC 2 /* PPC system */ +#define TRACE_ARCH_TYPE_SH 3 /* SH system */ +#define TRACE_ARCH_TYPE_S390 4 /* S/390 system */ +#define TRACE_ARCH_TYPE_MIPS 5 /* MIPS system */ + +/* Standard definitions for variants */ +#define TRACE_ARCH_VARIANT_NONE 0 /* Main architecture implementation */ + +/* Global trace flags */ +extern unsigned int syscall_entry_trace_active; +extern unsigned int syscall_exit_trace_active; + +/* The functions to the tracer management code */ +int register_tracer + (tracer_call /* The tracer function */ ); +int unregister_tracer + (tracer_call /* The tracer function */ ); +int trace_set_config + (tracer_call /* The tracer function */ , + int /* Use depth to fetch eip */ , + int /* Use bounds to fetch eip */ , + int /* Detph to fetch eip */ , + void * /* Lower bound eip address */ , + void * /* Upper bound eip address */ ); +int trace_register_callback + (tracer_call /* The callback to add */ , + u8 /* The event ID targeted */ ); +int trace_unregister_callback + (tracer_call /* The callback to remove */ , + u8 /* The event ID targeted */ ); +int trace_get_config + (int * /* Use depth to fetch eip */ , + int * /* Use bounds to fetch eip */ , + int * /* Detph to fetch eip */ , + void ** /* Lower bound eip address */ , + void ** /* Upper bound eip address */ ); +int trace_create_event + (char * /* String describing event type */ , + char * /* String to format standard event description */ , + int /* Type of formatting used to log event data */ , + char * /* Data specific to format */ ); +int trace_create_owned_event + (char * /* String describing event type */ , + char * /* String to format standard event description */ , + int /* Type of formatting used to log event data */ , + char * /* Data specific to format */ , + pid_t /* PID of event's owner */ ); +void trace_destroy_event + (int /* The event ID given by trace_create_event() */ ); +void trace_destroy_owners_events + (pid_t /* The PID of the process' who's events are to be deleted */ ); +void trace_reregister_custom_events + (void); +int trace_std_formatted_event + (int /* The event ID given by trace_create_event() */ , + ... /* The parameters to be printed out in the event string */ ); +int trace_raw_event + (int /* The event ID given by trace_create_event() */ , + int /* The size of the raw data */ , + void * /* Pointer to the raw event data */ ); +int trace_event + (u8 /* Event ID (as defined in this header file) */ , + void * /* Structure describing the event */ ); + +/* Generic function */ +static inline void TRACE_EVENT(u8 event_id, void* data) +{ + trace_event(event_id, data); +} + +/* Traced events */ +#define TRACE_EV_START 0 /* This is to mark the trace's start */ +#define TRACE_EV_SYSCALL_ENTRY 1 /* Entry in a given system call */ +#define TRACE_EV_SYSCALL_EXIT 2 /* Exit from a given system call */ +#define TRACE_EV_TRAP_ENTRY 3 /* Entry in a trap */ +#define TRACE_EV_TRAP_EXIT 4 /* Exit from a trap */ +#define TRACE_EV_IRQ_ENTRY 5 /* Entry in an irq */ +#define TRACE_EV_IRQ_EXIT 6 /* Exit from an irq */ +#define TRACE_EV_SCHEDCHANGE 7 /* Scheduling change */ +#define TRACE_EV_KERNEL_TIMER 8 /* The kernel timer routine has been called */ +#define TRACE_EV_SOFT_IRQ 9 /* Hit key part of soft-irq management */ +#define TRACE_EV_PROCESS 10 /* Hit key part of process management */ +#define TRACE_EV_FILE_SYSTEM 11 /* Hit key part of file system */ +#define TRACE_EV_TIMER 12 /* Hit key part of timer management */ +#define TRACE_EV_MEMORY 13 /* Hit key part of memory management */ +#define TRACE_EV_SOCKET 14 /* Hit key part of socket communication */ +#define TRACE_EV_IPC 15 /* Hit key part of System V IPC */ +#define TRACE_EV_NETWORK 16 /* Hit key part of network communication */ + +#define TRACE_EV_BUFFER_START 17 /* Mark the begining of a trace buffer */ +#define TRACE_EV_BUFFER_END 18 /* Mark the ending of a trace buffer */ +#define TRACE_EV_NEW_EVENT 19 /* New event type */ +#define TRACE_EV_CUSTOM 20 /* Custom event */ + +#define TRACE_EV_CHANGE_MASK 21 /* Change in event mask */ + +/* Number of traced events */ +#define TRACE_EV_MAX TRACE_EV_CHANGE_MASK + +/* Structures and macros for events */ +/* TRACE_SYSCALL_ENTRY */ +typedef struct _trace_syscall_entry { + u8 syscall_id; /* Syscall entry number in entry.S */ + u32 address; /* Address from which call was made */ +} LTT_PACKED_STRUCT trace_syscall_entry; + +/* TRACE_TRAP_ENTRY */ +#ifndef __s390__ +typedef struct _trace_trap_entry { + u16 trap_id; /* Trap number */ + u32 address; /* Address where trap occured */ +} LTT_PACKED_STRUCT trace_trap_entry; +static inline void TRACE_TRAP_ENTRY(u16 trap_id, u32 address) +#else +typedef u64 trapid_t; +typedef struct _trace_trap_entry { + trapid_t trap_id; /* Trap number */ + u32 address; /* Address where trap occured */ +} LTT_PACKED_STRUCT trace_trap_entry; +static inline void TRACE_TRAP_ENTRY(trapid_t trap_id, u32 address) +#endif +{ + trace_trap_entry trap_event; + + trap_event.trap_id = trap_id; + trap_event.address = address; + + trace_event(TRACE_EV_TRAP_ENTRY, &trap_event); +} + +/* TRACE_TRAP_EXIT */ +static inline void TRACE_TRAP_EXIT(void) +{ + trace_event(TRACE_EV_TRAP_EXIT, NULL); +} + +/* TRACE_IRQ_ENTRY */ +typedef struct _trace_irq_entry { + u8 irq_id; /* IRQ number */ + u8 kernel; /* Are we executing kernel code */ +} LTT_PACKED_STRUCT trace_irq_entry; +static inline void TRACE_IRQ_ENTRY(u8 irq_id, u8 in_kernel) +{ + trace_irq_entry irq_entry; + + irq_entry.irq_id = irq_id; + irq_entry.kernel = in_kernel; + + trace_event(TRACE_EV_IRQ_ENTRY, &irq_entry); +} + +/* TRACE_IRQ_EXIT */ +static inline void TRACE_IRQ_EXIT(void) +{ + trace_event(TRACE_EV_IRQ_EXIT, NULL); +} + +/* TRACE_SCHEDCHANGE */ +typedef struct _trace_schedchange { + u32 out; /* Outgoing process */ + u32 in; /* Incoming process */ + u32 out_state; /* Outgoing process' state */ +} LTT_PACKED_STRUCT trace_schedchange; +static inline void TRACE_SCHEDCHANGE(task_t * task_out, task_t * task_in) +{ + trace_schedchange sched_event; + + sched_event.out = (u32) task_out->pid; + sched_event.in = (u32) task_in; + sched_event.out_state = (u32) task_out->state; + + trace_event(TRACE_EV_SCHEDCHANGE, &sched_event); +} + +/* TRACE_SOFT_IRQ */ +#define TRACE_EV_SOFT_IRQ_BOTTOM_HALF 1 /* Conventional bottom-half */ +#define TRACE_EV_SOFT_IRQ_SOFT_IRQ 2 /* Real soft-irq */ +#define TRACE_EV_SOFT_IRQ_TASKLET_ACTION 3 /* Tasklet action */ +#define TRACE_EV_SOFT_IRQ_TASKLET_HI_ACTION 4 /* Tasklet hi-action */ +typedef struct _trace_soft_irq { + u8 event_sub_id; /* Soft-irq event Id */ + u32 event_data; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_soft_irq; +static inline void TRACE_SOFT_IRQ(u8 ev_id, u32 data) +{ + trace_soft_irq soft_irq_event; + + soft_irq_event.event_sub_id = ev_id; + soft_irq_event.event_data = data; + + trace_event(TRACE_EV_SOFT_IRQ, &soft_irq_event); +} + +/* TRACE_PROCESS */ +#define TRACE_EV_PROCESS_KTHREAD 1 /* Creation of a kernel thread */ +#define TRACE_EV_PROCESS_FORK 2 /* A fork or clone occured */ +#define TRACE_EV_PROCESS_EXIT 3 /* An exit occured */ +#define TRACE_EV_PROCESS_WAIT 4 /* A wait occured */ +#define TRACE_EV_PROCESS_SIGNAL 5 /* A signal has been sent */ +#define TRACE_EV_PROCESS_WAKEUP 6 /* Wake up a process */ +typedef struct _trace_process { + u8 event_sub_id; /* Process event ID */ + u32 event_data1; /* Data associated with event */ + u32 event_data2; +} LTT_PACKED_STRUCT trace_process; +static inline void TRACE_PROCESS(u8 ev_id, u32 data1, u32 data2) +{ + trace_process proc_event; + + proc_event.event_sub_id = ev_id; + proc_event.event_data1 = data1; + proc_event.event_data2 = data2; + + trace_event(TRACE_EV_PROCESS, &proc_event); +} + +/* TRACE_FILE_SYSTEM */ +#define TRACE_EV_FILE_SYSTEM_BUF_WAIT_START 1 /* Starting to wait for a data buffer */ +#define TRACE_EV_FILE_SYSTEM_BUF_WAIT_END 2 /* End to wait for a data buffer */ +#define TRACE_EV_FILE_SYSTEM_EXEC 3 /* An exec occured */ +#define TRACE_EV_FILE_SYSTEM_OPEN 4 /* An open occured */ +#define TRACE_EV_FILE_SYSTEM_CLOSE 5 /* A close occured */ +#define TRACE_EV_FILE_SYSTEM_READ 6 /* A read occured */ +#define TRACE_EV_FILE_SYSTEM_WRITE 7 /* A write occured */ +#define TRACE_EV_FILE_SYSTEM_SEEK 8 /* A seek occured */ +#define TRACE_EV_FILE_SYSTEM_IOCTL 9 /* An ioctl occured */ +#define TRACE_EV_FILE_SYSTEM_SELECT 10 /* A select occured */ +#define TRACE_EV_FILE_SYSTEM_POLL 11 /* A poll occured */ +typedef struct _trace_file_system { + u8 event_sub_id; /* File system event ID */ + u32 event_data1; /* Event data */ + u32 event_data2; /* Event data 2 */ + char *file_name; /* Name of file operated on */ +} LTT_PACKED_STRUCT trace_file_system; +static inline void TRACE_FILE_SYSTEM(u8 ev_id, u32 data1, u32 data2, const unsigned char *file_name) +{ + trace_file_system fs_event; + + fs_event.event_sub_id = ev_id; + fs_event.event_data1 = data1; + fs_event.event_data2 = data2; + fs_event.file_name = (char*) file_name; + + trace_event(TRACE_EV_FILE_SYSTEM, &fs_event); +} + +/* TRACE_TIMER */ +#define TRACE_EV_TIMER_EXPIRED 1 /* Timer expired */ +#define TRACE_EV_TIMER_SETITIMER 2 /* Setting itimer occurred */ +#define TRACE_EV_TIMER_SETTIMEOUT 3 /* Setting sched timeout occurred */ +typedef struct _trace_timer { + u8 event_sub_id; /* Timer event ID */ + u8 event_sdata; /* Short data */ + u32 event_data1; /* Data associated with event */ + u32 event_data2; +} LTT_PACKED_STRUCT trace_timer; +static inline void TRACE_TIMER(u8 ev_id, u8 sdata, u32 data1, u32 data2) +{ + trace_timer timer_event; + + timer_event.event_sub_id = ev_id; + timer_event.event_sdata = sdata; + timer_event.event_data1 = data1; + timer_event.event_data2 = data2; + + trace_event(TRACE_EV_TIMER, &timer_event); +} + +/* TRACE_MEMORY */ +#define TRACE_EV_MEMORY_PAGE_ALLOC 1 /* Allocating pages */ +#define TRACE_EV_MEMORY_PAGE_FREE 2 /* Freing pages */ +#define TRACE_EV_MEMORY_SWAP_IN 3 /* Swaping pages in */ +#define TRACE_EV_MEMORY_SWAP_OUT 4 /* Swaping pages out */ +#define TRACE_EV_MEMORY_PAGE_WAIT_START 5 /* Start to wait for page */ +#define TRACE_EV_MEMORY_PAGE_WAIT_END 6 /* End to wait for page */ +typedef struct _trace_memory { + u8 event_sub_id; /* Memory event ID */ + u32 event_data; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_memory; +static inline void TRACE_MEMORY(u8 ev_id, u32 data) +{ + trace_memory memory_event; + + memory_event.event_sub_id = ev_id; + memory_event.event_data = data; + + trace_event(TRACE_EV_MEMORY, &memory_event); +} + +/* TRACE_SOCKET */ +#define TRACE_EV_SOCKET_CALL 1 /* A socket call occured */ +#define TRACE_EV_SOCKET_CREATE 2 /* A socket has been created */ +#define TRACE_EV_SOCKET_SEND 3 /* Data was sent to a socket */ +#define TRACE_EV_SOCKET_RECEIVE 4 /* Data was read from a socket */ +typedef struct _trace_socket { + u8 event_sub_id; /* Socket event ID */ + u32 event_data1; /* Data associated with event */ + u32 event_data2; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_socket; +static inline void TRACE_SOCKET(u8 ev_id, u32 data1, u32 data2) +{ + trace_socket socket_event; + + socket_event.event_sub_id = ev_id; + socket_event.event_data1 = data1; + socket_event.event_data2 = data2; + + trace_event(TRACE_EV_SOCKET, &socket_event); +} + +/* TRACE_IPC */ +#define TRACE_EV_IPC_CALL 1 /* A System V IPC call occured */ +#define TRACE_EV_IPC_MSG_CREATE 2 /* A message queue has been created */ +#define TRACE_EV_IPC_SEM_CREATE 3 /* A semaphore was created */ +#define TRACE_EV_IPC_SHM_CREATE 4 /* A shared memory segment has been created */ +typedef struct _trace_ipc { + u8 event_sub_id; /* IPC event ID */ + u32 event_data1; /* Data associated with event */ + u32 event_data2; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_ipc; +static inline void TRACE_IPC(u8 ev_id, u32 data1, u32 data2) +{ + trace_ipc ipc_event; + + ipc_event.event_sub_id = ev_id; + ipc_event.event_data1 = data1; + ipc_event.event_data2 = data2; + + trace_event(TRACE_EV_IPC, &ipc_event); +} + +/* TRACE_NETWORK */ +#define TRACE_EV_NETWORK_PACKET_IN 1 /* A packet came in */ +#define TRACE_EV_NETWORK_PACKET_OUT 2 /* A packet was sent */ +typedef struct _trace_network { + u8 event_sub_id; /* Network event ID */ + u32 event_data; /* Event data */ +} LTT_PACKED_STRUCT trace_network; +static inline void TRACE_NETWORK(u8 ev_id, u32 data) +{ + trace_network net_event; + + net_event.event_sub_id = ev_id; + net_event.event_data = data; + + trace_event(TRACE_EV_NETWORK, &net_event); +} + +/* Custom declared events */ +/* ***WARNING*** These structures should never be used as is, use the provided custom + event creation and logging functions. */ +typedef struct _trace_new_event { + /* Basics */ + u32 id; /* Custom event ID */ + char type[CUSTOM_EVENT_TYPE_STR_LEN]; /* Event type description */ + char desc[CUSTOM_EVENT_DESC_STR_LEN]; /* Detailed event description */ + + /* Custom formatting */ + u32 format_type; /* Type of formatting */ + char form[CUSTOM_EVENT_FORM_STR_LEN]; /* Data specific to format */ +} LTT_PACKED_STRUCT trace_new_event; +typedef struct _trace_custom { + u32 id; /* Event ID */ + u32 data_size; /* Size of data recorded by event */ + void *data; /* Data recorded by event */ +} LTT_PACKED_STRUCT trace_custom; + +/* TRACE_CHANGE_MASK */ +typedef u64 trace_event_mask; /* The event mask type */ +typedef struct _trace_change_mask { + trace_event_mask mask; /* Event mask */ +} LTT_PACKED_STRUCT trace_change_mask; + +#else /* Kernel is configured without tracing */ +#define TRACE_EVENT(ID, DATA) +#define TRACE_TRAP_ENTRY(ID, EIP) +#define TRACE_TRAP_EXIT() +#define TRACE_IRQ_ENTRY(ID, KERNEL) +#define TRACE_IRQ_EXIT() +#define TRACE_SCHEDCHANGE(OUT, IN) +#define TRACE_SOFT_IRQ(ID, DATA) +#define TRACE_PROCESS(ID, DATA1, DATA2) +#define TRACE_FILE_SYSTEM(ID, DATA1, DATA2, FILE_NAME) +#define TRACE_TIMER(ID, SDATA, DATA1, DATA2) +#define TRACE_MEMORY(ID, DATA) +#define TRACE_SOCKET(ID, DATA1, DATA2) +#define TRACE_IPC(ID, DATA1, DATA2) +#define TRACE_NETWORK(ID, DATA) +#endif /* defined(CONFIG_TRACE) || defined(CONFIG_TRACE_MODULE) */ + +#endif /* _LINUX_TRACE_H */ diff -urN -X dontdiff linux-2.5.33/ipc/msg.c linux-2.5.33.ltt.lockless/ipc/msg.c --- linux-2.5.33/ipc/msg.c Sat Aug 31 15:04:46 2002 +++ linux-2.5.33.ltt.lockless/ipc/msg.c Sun Sep 8 23:00:20 2002 @@ -25,6 +25,8 @@ #include #include "util.h" +#include + /* sysctl: */ int msg_ctlmax = MSGMAX; int msg_ctlmnb = MSGMNB; @@ -300,6 +302,7 @@ msg_unlock(id); } up(&msg_ids.sem); + TRACE_IPC(TRACE_EV_IPC_MSG_CREATE, ret, msgflg); return ret; } diff -urN -X dontdiff linux-2.5.33/ipc/sem.c linux-2.5.33.ltt.lockless/ipc/sem.c --- linux-2.5.33/ipc/sem.c Sat Aug 31 15:04:57 2002 +++ linux-2.5.33.ltt.lockless/ipc/sem.c Sun Sep 8 23:00:20 2002 @@ -66,6 +66,7 @@ #include #include "util.h" +#include #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id)) #define sem_unlock(id) ipc_unlock(&sem_ids,id) @@ -183,6 +184,7 @@ } up(&sem_ids.sem); + TRACE_IPC(TRACE_EV_IPC_SEM_CREATE, err, semflg); return err; } diff -urN -X dontdiff linux-2.5.33/ipc/shm.c linux-2.5.33.ltt.lockless/ipc/shm.c --- linux-2.5.33/ipc/shm.c Sat Aug 31 15:05:23 2002 +++ linux-2.5.33.ltt.lockless/ipc/shm.c Sun Sep 8 23:00:20 2002 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "util.h" @@ -245,6 +246,7 @@ shm_unlock(id); } up(&shm_ids.sem); + TRACE_IPC(TRACE_EV_IPC_SHM_CREATE, err, shmflg); return err; } diff -urN -X dontdiff linux-2.5.33/kernel/Makefile linux-2.5.33.ltt.lockless/kernel/Makefile --- linux-2.5.33/kernel/Makefile Sat Aug 31 15:04:52 2002 +++ linux-2.5.33.ltt.lockless/kernel/Makefile Sun Sep 8 23:00:20 2002 @@ -10,7 +10,7 @@ O_TARGET := kernel.o export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \ - printk.o platform.o suspend.o dma.o + printk.o platform.o suspend.o dma.o trace.o obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o time.o softirq.o resource.o \ @@ -34,4 +34,8 @@ CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer endif +ifdef CONFIG_TRACE +obj-y += trace.o +endif + include $(TOPDIR)/Rules.make diff -urN -X dontdiff linux-2.5.33/kernel/exit.c linux-2.5.33.ltt.lockless/kernel/exit.c --- linux-2.5.33/kernel/exit.c Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/kernel/exit.c Sun Sep 8 23:00:20 2002 @@ -20,6 +20,8 @@ #include #include +#include + #include #include #include @@ -582,6 +584,8 @@ acct_process(code); __exit_mm(tsk); + TRACE_PROCESS(TRACE_EV_PROCESS_EXIT, 0, 0); + sem_exit(); __exit_files(tsk); __exit_fs(tsk); @@ -641,6 +645,8 @@ if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; + TRACE_PROCESS(TRACE_EV_PROCESS_WAIT, pid, 0); + add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag = 0; diff -urN -X dontdiff linux-2.5.33/kernel/fork.c linux-2.5.33.ltt.lockless/kernel/fork.c --- linux-2.5.33/kernel/fork.c Sat Aug 31 15:04:49 2002 +++ linux-2.5.33.ltt.lockless/kernel/fork.c Sun Sep 8 23:00:20 2002 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -907,6 +908,8 @@ if (p->ptrace & PT_PTRACED) send_sig(SIGSTOP, p, 1); + TRACE_PROCESS(TRACE_EV_PROCESS_FORK, p->pid, 0); + wake_up_forked_process(p); /* do this last */ ++total_forks; if (clone_flags & CLONE_VFORK) diff -urN -X dontdiff linux-2.5.33/kernel/itimer.c linux-2.5.33.ltt.lockless/kernel/itimer.c --- linux-2.5.33/kernel/itimer.c Sat Aug 31 15:04:58 2002 +++ linux-2.5.33.ltt.lockless/kernel/itimer.c Sun Sep 8 23:00:20 2002 @@ -10,6 +10,8 @@ #include #include +#include + #include int do_getitimer(int which, struct itimerval *value) @@ -67,6 +69,8 @@ struct task_struct * p = (struct task_struct *) __data; unsigned long interval; + TRACE_TIMER(TRACE_EV_TIMER_EXPIRED, 0, 0, 0); + send_sig(SIGALRM, p, 1); interval = p->it_real_incr; if (interval) { @@ -86,6 +90,7 @@ j = timeval_to_jiffies(&value->it_value); if (ovalue && (k = do_getitimer(which, ovalue)) < 0) return k; + TRACE_TIMER(TRACE_EV_TIMER_SETITIMER, which, i, j); switch (which) { case ITIMER_REAL: del_timer_sync(¤t->real_timer); diff -urN -X dontdiff linux-2.5.33/kernel/sched.c linux-2.5.33.ltt.lockless/kernel/sched.c --- linux-2.5.33/kernel/sched.c Sat Aug 31 15:04:53 2002 +++ linux-2.5.33.ltt.lockless/kernel/sched.c Sun Sep 8 23:00:20 2002 @@ -29,6 +29,7 @@ #include #include #include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -404,6 +405,8 @@ long old_state; runqueue_t *rq; + TRACE_PROCESS(TRACE_EV_PROCESS_WAKEUP, p->pid, p->state); + repeat_lock_task: rq = task_rq_lock(p, &flags); old_state = p->state; @@ -1007,8 +1010,11 @@ if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; - + prepare_arch_switch(rq, next); + + TRACE_SCHEDCHANGE(prev, next); + prev = context_switch(prev, next); barrier(); rq = this_rq(); diff -urN -X dontdiff linux-2.5.33/kernel/signal.c linux-2.5.33.ltt.lockless/kernel/signal.c --- linux-2.5.33/kernel/signal.c Sat Aug 31 15:04:57 2002 +++ linux-2.5.33.ltt.lockless/kernel/signal.c Sun Sep 8 23:00:20 2002 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -572,6 +573,8 @@ if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig)) goto out; + TRACE_PROCESS(TRACE_EV_PROCESS_SIGNAL, sig, t->pid); + ret = deliver_signal(sig, info, t); out: spin_unlock_irqrestore(&t->sigmask_lock, flags); diff -urN -X dontdiff linux-2.5.33/kernel/softirq.c linux-2.5.33.ltt.lockless/kernel/softirq.c --- linux-2.5.33/kernel/softirq.c Sat Aug 31 15:04:52 2002 +++ linux-2.5.33.ltt.lockless/kernel/softirq.c Sun Sep 8 23:00:20 2002 @@ -18,6 +18,7 @@ #include #include #include +#include /* - No shared variables, all the data are CPU local. @@ -85,8 +86,10 @@ h = softirq_vec; do { - if (pending & 1) + if (pending & 1) { + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_SOFT_IRQ, (h - softirq_vec)); h->action(h); + } h++; pending >>= 1; } while (pending); @@ -194,6 +197,9 @@ if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_TASKLET_ACTION, (unsigned long) (t->func)); + t->func(t->data); tasklet_unlock(t); continue; @@ -227,6 +233,9 @@ if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_TASKLET_HI_ACTION, (unsigned long) (t->func)); + t->func(t->data); tasklet_unlock(t); continue; @@ -290,8 +299,10 @@ if (!spin_trylock(&global_bh_lock)) goto resched; - if (bh_base[nr]) + if (bh_base[nr]){ + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_BOTTOM_HALF, (nr)); bh_base[nr](); + } hardirq_endlock(); spin_unlock(&global_bh_lock); diff -urN -X dontdiff linux-2.5.33/kernel/timer.c linux-2.5.33.ltt.lockless/kernel/timer.c --- linux-2.5.33/kernel/timer.c Sat Aug 31 15:04:54 2002 +++ linux-2.5.33.ltt.lockless/kernel/timer.c Sun Sep 8 23:00:20 2002 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -659,6 +660,7 @@ void timer_bh(void) { + TRACE_EVENT(TRACE_EV_KERNEL_TIMER, NULL); update_times(); run_timer_list(); } @@ -788,6 +790,7 @@ static void process_timeout(unsigned long __data) { + TRACE_TIMER(TRACE_EV_TIMER_EXPIRED, 0, 0, 0); wake_up_process((task_t *)__data); } diff -urN -X dontdiff linux-2.5.33/kernel/trace.c linux-2.5.33.ltt.lockless/kernel/trace.c --- linux-2.5.33/kernel/trace.c Wed Dec 31 16:00:00 1969 +++ linux-2.5.33.ltt.lockless/kernel/trace.c Sun Sep 8 23:00:20 2002 @@ -0,0 +1,631 @@ +/* + * linux/kernel/trace.c + * + * (C) Copyright 1999, 2000, 2001, 2002 - Karim Yaghmour (karym@opersys.com) + * + * This code is distributed under the GPL license + * + * Tracing management + * + */ + +#include /* For __init */ +#include /* Tracing definitions */ +#include /* Miscellaneous error codes */ +#include /* NULL */ +#include /* kmalloc() */ +#include /* EXPORT_SYMBOL */ +#include /* pid_t */ + +/* Global variables */ +unsigned int syscall_entry_trace_active = 0; +unsigned int syscall_exit_trace_active = 0; + +/* Local variables */ +static int tracer_registered = 0; /* Is there a tracer registered */ +struct tracer *tracer = NULL; /* The registered tracer */ + +/* Registration lock. This lock avoids a race condition in case a tracer is +removed while an event is being traced. */ +rwlock_t tracer_register_lock = RW_LOCK_UNLOCKED; + +/* Trace callback table entry */ +struct trace_callback_table_entry { + tracer_call callback; + + struct trace_callback_table_entry *next; +}; + +/* Trace callback table */ +struct trace_callback_table_entry trace_callback_table[TRACE_EV_MAX]; + +/* Custom event description */ +struct custom_event_desc { + trace_new_event event; + + pid_t owner_pid; + + struct custom_event_desc *next; + struct custom_event_desc *prev; +}; + +/* Next event ID to be used */ +int next_event_id; + +/* Circular list of custom events */ +struct custom_event_desc custom_events_head; +struct custom_event_desc *custom_events; + +/* Circular list lock. This is classic lock that provides for atomic access +to the circular list. */ +rwlock_t custom_list_lock = RW_LOCK_UNLOCKED; + +/********************************************************************* + * Register the tracer to the kernel + * Return values : + * 0, all is OK + * -EBUSY, there already is a registered tracer + * -ENOMEM, couldn't allocate memory + *********************************************************************/ +int register_tracer(tracer_call pm_trace_function) +{ + unsigned long l_flags; + + if (tracer_registered == 1) + return -EBUSY; + + /* Allocate memory for the tracer */ + if ((tracer = (struct tracer *) kmalloc(sizeof(struct tracer), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + /* Safely register the new tracer */ + write_lock_irqsave(&tracer_register_lock, l_flags); + tracer_registered = 1; + tracer->trace = pm_trace_function; + write_unlock_irqrestore(&tracer_register_lock, l_flags); + + /* Initialize the tracer settings */ + tracer->fetch_syscall_eip_use_bounds = 0; + tracer->fetch_syscall_eip_use_depth = 0; + + return 0; +} + +/********************************************************************* + * Unregister the currently registered tracer + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -ENXIO, unregestering wrong tracer + *********************************************************************/ +int unregister_tracer(tracer_call pm_trace_function) +{ + unsigned long l_flags; + + if (tracer_registered == 0) + return -ENOMEDIUM; + + write_lock_irqsave(&tracer_register_lock, l_flags); + + /* Is it the tracer that was registered */ + if (tracer->trace == pm_trace_function) + /* There isn't any tracer in here */ + tracer_registered = 0; + else { + write_unlock_irqrestore(&tracer_register_lock, l_flags); + return -ENXIO; + } + + /* Free the memory used by the tracing structure */ + kfree(tracer); + tracer = NULL; + + write_unlock_irqrestore(&tracer_register_lock, l_flags); + + return 0; +} + +/********************************************************************* + * Set the tracing configuration + * Parameters : + * pm_trace_function, the trace function. + * pm_fetch_syscall_use_depth, Use depth to fetch eip + * pm_fetch_syscall_use_bounds, Use bounds to fetch eip + * pm_syscall_eip_depth, Detph to fetch eip + * pm_syscall_lower_bound, Lower bound eip address + * pm_syscall_upper_bound, Upper bound eip address + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -ENXIO, wrong tracer + * -EINVAL, invalid configuration + *********************************************************************/ +int trace_set_config(tracer_call pm_trace_function, + int pm_fetch_syscall_use_depth, + int pm_fetch_syscall_use_bounds, + int pm_syscall_eip_depth, + void *pm_syscall_lower_bound, + void *pm_syscall_upper_bound) +{ + if (tracer_registered == 0) + return -ENOMEDIUM; + + /* Is this the tracer that is already registered */ + if (tracer->trace != pm_trace_function) + return -ENXIO; + + /* Is this a valid configuration */ + if ((pm_fetch_syscall_use_depth && pm_fetch_syscall_use_bounds) + || (pm_syscall_lower_bound > pm_syscall_upper_bound) + || (pm_syscall_eip_depth < 0)) + return -EINVAL; + + /* Set the configuration */ + tracer->fetch_syscall_eip_use_depth = pm_fetch_syscall_use_depth; + tracer->fetch_syscall_eip_use_bounds = pm_fetch_syscall_use_bounds; + tracer->syscall_eip_depth = pm_syscall_eip_depth; + tracer->syscall_lower_eip_bound = pm_syscall_lower_bound; + tracer->syscall_upper_eip_bound = pm_syscall_upper_bound; + + return 0; +} + +/********************************************************************* + * Get the tracing configuration + * Parameters : + * pm_fetch_syscall_use_depth, Use depth to fetch eip + * pm_fetch_syscall_use_bounds, Use bounds to fetch eip + * pm_syscall_eip_depth, Detph to fetch eip + * pm_syscall_lower_bound, Lower bound eip address + * pm_syscall_upper_bound, Upper bound eip address + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + *********************************************************************/ +int trace_get_config(int *pm_fetch_syscall_use_depth, + int *pm_fetch_syscall_use_bounds, + int *pm_syscall_eip_depth, + void **pm_syscall_lower_bound, + void **pm_syscall_upper_bound) +{ + if (tracer_registered == 0) + return -ENOMEDIUM; + + /* Get the configuration */ + *pm_fetch_syscall_use_depth = tracer->fetch_syscall_eip_use_depth; + *pm_fetch_syscall_use_bounds = tracer->fetch_syscall_eip_use_bounds; + *pm_syscall_eip_depth = tracer->syscall_eip_depth; + *pm_syscall_lower_bound = tracer->syscall_lower_eip_bound; + *pm_syscall_upper_bound = tracer->syscall_upper_eip_bound; + + return 0; +} + +/********************************************************************* + * Register a callback function to be called on occurence + * of given event + * Parameters : + * pm_trace_function, the callback function. + * pm_event_id, the event ID to be monitored. + * Return values : + * 0, all is OK + * -ENOMEM, unable to allocate memory for callback + *********************************************************************/ +int trace_register_callback(tracer_call pm_trace_function, + u8 pm_event_id) +{ + struct trace_callback_table_entry *p_tct_entry; + + /* Search for an empty entry in the callback table */ + for (p_tct_entry = &(trace_callback_table[pm_event_id - 1]); + p_tct_entry->next != NULL; + p_tct_entry = p_tct_entry->next); + + /* Allocate a new callback */ + if ((p_tct_entry->next = kmalloc(sizeof(struct trace_callback_table_entry), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + /* Setup the new callback */ + p_tct_entry->next->callback = pm_trace_function; + p_tct_entry->next->next = NULL; + + return 0; +} + +/********************************************************************* + * UnRegister a callback function. + * Parameters : + * pm_trace_function, the callback function. + * pm_event_id, the event ID that had to be monitored. + * Return values : + * 0, all is OK + * -ENOMEDIUM, no such callback resigtered + *********************************************************************/ +int trace_unregister_callback(tracer_call pm_trace_function, + u8 pm_event_id) +{ + struct trace_callback_table_entry *p_tct_entry; + struct trace_callback_table_entry *p_temp_entry; + + /* Search for the callback in the callback table */ + for (p_tct_entry = &(trace_callback_table[pm_event_id - 1]); + ((p_tct_entry->next != NULL) && (p_tct_entry->next->callback != pm_trace_function)); + p_tct_entry = p_tct_entry->next); + + /* Did we find anything */ + if (p_tct_entry == NULL) + return -ENOMEDIUM; + + /* Free the callback entry we found */ + p_temp_entry = p_tct_entry->next->next; + kfree(p_tct_entry->next); + p_tct_entry->next = p_temp_entry; + + return 0; +} + +/********************************************************************* + * Create a new traceable event type + * Parameters : + * pm_event_type, string describing event type + * pm_event_desc, string used for standard formatting + * pm_format_type, type of formatting used to log event data + * pm_format_data, data specific to format + * pm_owner_pid, PID of event's owner (0 if none) + * Return values : + * New Event ID if all is OK + * -ENOMEM, Unable to allocate new event + *********************************************************************/ +int _trace_create_event(char *pm_event_type, + char *pm_event_desc, + int pm_format_type, + char *pm_format_data, + pid_t pm_owner_pid) +{ + struct custom_event_desc *p_new_event; + + /* Create event */ + if ((p_new_event = (struct custom_event_desc *) kmalloc(sizeof(struct custom_event_desc), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + /* Initialize event properties */ + p_new_event->event.type[0] = '\0'; + p_new_event->event.desc[0] = '\0'; + p_new_event->event.form[0] = '\0'; + + /* Set basic event properties */ + if (pm_event_type != NULL) + strncpy(p_new_event->event.type, pm_event_type, CUSTOM_EVENT_TYPE_STR_LEN); + if (pm_event_desc != NULL) + strncpy(p_new_event->event.desc, pm_event_desc, CUSTOM_EVENT_DESC_STR_LEN); + if (pm_format_data != NULL) + strncpy(p_new_event->event.form, pm_format_data, CUSTOM_EVENT_FORM_STR_LEN); + + /* Ensure that strings are bound */ + p_new_event->event.type[CUSTOM_EVENT_TYPE_STR_LEN - 1] = '\0'; + p_new_event->event.desc[CUSTOM_EVENT_DESC_STR_LEN - 1] = '\0'; + p_new_event->event.form[CUSTOM_EVENT_FORM_STR_LEN - 1] = '\0'; + + /* Set format type */ + p_new_event->event.format_type = pm_format_type; + + /* Give the new event a unique event ID */ + p_new_event->event.id = next_event_id; + next_event_id++; + + /* Set event's owner */ + p_new_event->owner_pid = pm_owner_pid; + + /* Insert new event in event list */ + write_lock(&custom_list_lock); + p_new_event->next = custom_events; + p_new_event->prev = custom_events->prev; + custom_events->prev->next = p_new_event; + custom_events->prev = p_new_event; + write_unlock(&custom_list_lock); + + /* Log the event creation event */ + trace_event(TRACE_EV_NEW_EVENT, &(p_new_event->event)); + + return p_new_event->event.id; +} +int trace_create_event(char *pm_event_type, + char *pm_event_desc, + int pm_format_type, + char *pm_format_data) +{ + return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, 0); +} +int trace_create_owned_event(char *pm_event_type, + char *pm_event_desc, + int pm_format_type, + char *pm_format_data, + pid_t pm_owner_pid) +{ + return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, pm_owner_pid); +} + +/********************************************************************* + * Destroy a created event type + * Parameters : + * pm_event_id, the Id returned by trace_create_event() + * Return values : + * NONE + *********************************************************************/ +void trace_destroy_event(int pm_event_id) +{ + struct custom_event_desc *p_event_desc; + + write_lock(&custom_list_lock); + + /* Find the event to destroy in the event description list */ + for (p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if (p_event_desc->event.id == pm_event_id) + break; + + /* If we found something */ + if (p_event_desc != custom_events) { + /* Remove the event fromt the list */ + p_event_desc->next->prev = p_event_desc->prev; + p_event_desc->prev->next = p_event_desc->next; + + /* Free the memory used by this event */ + kfree(p_event_desc); + } + write_unlock(&custom_list_lock); +} + +/********************************************************************* + * Destroy an owner's events + * Parameters : + * pm_owner_pid, the PID of the owner who's events are to be deleted. + * Return values : + * NONE + *********************************************************************/ +void trace_destroy_owners_events(pid_t pm_owner_pid) +{ + struct custom_event_desc *p_temp_event; + struct custom_event_desc *p_event_desc; + + write_lock(&custom_list_lock); + + /* Start at the first event in the list */ + p_event_desc = custom_events->next; + + /* Find all events belonging to the PID */ + while (p_event_desc != custom_events) { + p_temp_event = p_event_desc->next; + + /* Does this event belong to the same owner */ + if (p_event_desc->owner_pid == pm_owner_pid) { + /* Remove the event from the list */ + p_event_desc->next->prev = p_event_desc->prev; + p_event_desc->prev->next = p_event_desc->next; + + /* Free the memory used by this event */ + kfree(p_event_desc); + } + p_event_desc = p_temp_event; + } + + write_unlock(&custom_list_lock); +} + +/********************************************************************* + * Relog the declarations of custom events. This is necessary to make + * sure that even though the event creation might not have taken place + * during a previous trace, that all custom events be part of all traces. + * Hence, if a custom event occurs during a new trace, we can be sure + * that its definition will also be part of the trace. + * Parameters : + * NONE + * Return values : + * NONE + *********************************************************************/ +void trace_reregister_custom_events(void) +{ + struct custom_event_desc *p_event_desc; + + read_lock(&custom_list_lock); + + /* Log an event creation for every description in the list */ + for (p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + trace_event(TRACE_EV_NEW_EVENT, &(p_event_desc->event)); + + read_unlock(&custom_list_lock); +} + +/********************************************************************* + * Trace a formatted event + * Parameters : + * pm_event_id, the event Id provided upon creation + * ..., printf-like data that will be used to fill the event string. + * Return values : + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer or event doesn't exist. + *********************************************************************/ +int trace_std_formatted_event(int pm_event_id,...) +{ + int l_string_size; /* Size of the string outputed by vsprintf() */ + char l_string[CUSTOM_EVENT_FINAL_STR_LEN]; /* Final formatted string */ + va_list l_var_arg_list; /* Variable argument list */ + trace_custom l_custom; + struct custom_event_desc *p_event_desc; + + read_lock(&custom_list_lock); + + /* Find the event description matching this event */ + for (p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if (p_event_desc->event.id == pm_event_id) + break; + + /* If we haven't found anything */ + if (p_event_desc == custom_events) { + read_unlock(&custom_list_lock); + + return -ENOMEDIUM; + } + /* Set custom event Id */ + l_custom.id = pm_event_id; + + /* Initialize variable argument list access */ + va_start(l_var_arg_list, pm_event_id); + + /* Print the description out to the temporary buffer */ + l_string_size = vsprintf(l_string, p_event_desc->event.desc, l_var_arg_list); + + read_unlock(&custom_list_lock); + + /* Facilitate return to caller */ + va_end(l_var_arg_list); + + /* Set the size of the event */ + l_custom.data_size = (u32) (l_string_size + 1); + + /* Set the pointer to the event data */ + l_custom.data = l_string; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &l_custom); +} + +/********************************************************************* + * Trace a raw event + * Parameters : + * pm_event_id, the event Id provided upon creation + * pm_event_size, the size of the data provided + * pm_event_data, data buffer describing event + * Return values : + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer or event doesn't exist. + *********************************************************************/ +int trace_raw_event(int pm_event_id, int pm_event_size, void *pm_event_data) +{ + trace_custom l_custom; + struct custom_event_desc *p_event_desc; + + read_lock(&custom_list_lock); + + /* Find the event description matching this event */ + for (p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if (p_event_desc->event.id == pm_event_id) + break; + + read_unlock(&custom_list_lock); + + /* If we haven't found anything */ + if (p_event_desc == custom_events) + return -ENOMEDIUM; + + /* Set custom event Id */ + l_custom.id = pm_event_id; + + /* Set the data size */ + if (pm_event_size <= CUSTOM_EVENT_MAX_SIZE) + l_custom.data_size = (u32) pm_event_size; + else + l_custom.data_size = (u32) CUSTOM_EVENT_MAX_SIZE; + + /* Set the pointer to the event data */ + l_custom.data = pm_event_data; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &l_custom); +} + +/********************************************************************* + * Trace an event + * Parameters : + * pm_event_id, the event's ID (check out trace.h) + * pm_event_struct, the structure describing the event + * Return values : + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer + *********************************************************************/ +int trace_event(u8 pm_event_id, + void *pm_event_struct) +{ + int l_ret_value; + struct trace_callback_table_entry *p_tct_entry; + + read_lock(&tracer_register_lock); + + /* Is there a tracer registered */ + if (tracer_registered != 1) + l_ret_value = -ENOMEDIUM; + else + /* Call the tracer */ + l_ret_value = tracer->trace(pm_event_id, pm_event_struct); + + read_unlock(&tracer_register_lock); + + /* Is this a native event */ + if (pm_event_id <= TRACE_EV_MAX) { + /* Are there any callbacks to call */ + if (trace_callback_table[pm_event_id - 1].next != NULL) { + /* Call all the callbacks linked to this event */ + for (p_tct_entry = trace_callback_table[pm_event_id - 1].next; + p_tct_entry != NULL; + p_tct_entry = p_tct_entry->next) + p_tct_entry->callback(pm_event_id, pm_event_struct); + } + } + return l_ret_value; +} + +/********************************************************************* + * Initialize trace facility + * Parameters : + * NONE + * Return values : + * 0, if everything went ok. + *********************************************************************/ +static int __init trace_init(void) +{ + int i; + + /* Initialize callback table */ + for (i = 0; i < TRACE_EV_MAX; i++) { + trace_callback_table[i].callback = NULL; + trace_callback_table[i].next = NULL; + } + + /* Initialize next event ID to be used */ + next_event_id = TRACE_EV_MAX + 1; + + /* Initialize custom events list */ + custom_events = &custom_events_head; + custom_events->next = custom_events; + custom_events->prev = custom_events; + + return 0; +} + +module_init(trace_init); + +/* Export symbols so that can be visible from outside this file */ +EXPORT_SYMBOL(register_tracer); +EXPORT_SYMBOL(unregister_tracer); +EXPORT_SYMBOL(trace_set_config); +EXPORT_SYMBOL(trace_get_config); +EXPORT_SYMBOL(trace_register_callback); +EXPORT_SYMBOL(trace_unregister_callback); +EXPORT_SYMBOL(trace_create_event); +EXPORT_SYMBOL(trace_create_owned_event); +EXPORT_SYMBOL(trace_destroy_event); +EXPORT_SYMBOL(trace_destroy_owners_events); +EXPORT_SYMBOL(trace_reregister_custom_events); +EXPORT_SYMBOL(trace_std_formatted_event); +EXPORT_SYMBOL(trace_raw_event); +EXPORT_SYMBOL(trace_event); + +EXPORT_SYMBOL(syscall_entry_trace_active); +EXPORT_SYMBOL(syscall_exit_trace_active); diff -urN -X dontdiff linux-2.5.33/mm/filemap.c linux-2.5.33.ltt.lockless/mm/filemap.c --- linux-2.5.33/mm/filemap.c Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/mm/filemap.c Sun Sep 8 23:00:20 2002 @@ -23,6 +23,7 @@ #include #include #include +#include /* * This is needed for the following functions: * - try_to_release_page @@ -634,10 +635,12 @@ set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!test_bit(bit_nr, &page->flags)) break; + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_WAIT_START, 0); sync_page(page); schedule(); } while (test_bit(bit_nr, &page->flags)); __set_task_state(tsk, TASK_RUNNING); + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_WAIT_END, 0); remove_wait_queue(waitqueue, &wait); } EXPORT_SYMBOL(wait_on_page_bit); diff -urN -X dontdiff linux-2.5.33/mm/memory.c linux-2.5.33.ltt.lockless/mm/memory.c --- linux-2.5.33/mm/memory.c Sat Aug 31 15:05:03 2002 +++ linux-2.5.33.ltt.lockless/mm/memory.c Sun Sep 8 23:00:20 2002 @@ -45,6 +45,9 @@ #include #include +#include +#include + #include #include #include @@ -1157,6 +1160,7 @@ spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { + TRACE_MEMORY(TRACE_EV_MEMORY_SWAP_IN, address); swapin_readahead(entry); page = read_swap_cache_async(entry); if (!page) { diff -urN -X dontdiff linux-2.5.33/mm/page_alloc.c linux-2.5.33.ltt.lockless/mm/page_alloc.c --- linux-2.5.33/mm/page_alloc.c Sat Aug 31 15:04:48 2002 +++ linux-2.5.33.ltt.lockless/mm/page_alloc.c Sun Sep 8 23:00:20 2002 @@ -23,6 +23,7 @@ #include #include #include +#include unsigned long totalram_pages; unsigned long totalhigh_pages; @@ -98,6 +99,8 @@ ClearPageDirty(page); BUG_ON(page_count(page) != 0); + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_FREE, order); + if (unlikely(current->flags & PF_FREE_PAGES)) { if (!current->nr_local_pages && !in_interrupt()) { list_add(&page->list, ¤t->local_pages); @@ -434,6 +437,7 @@ page = alloc_pages(gfp_mask, order); if (!page) return 0; + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_ALLOC, order); return (unsigned long) page_address(page); } diff -urN -X dontdiff linux-2.5.33/mm/page_io.c linux-2.5.33.ltt.lockless/mm/page_io.c --- linux-2.5.33/mm/page_io.c Sat Aug 31 15:04:49 2002 +++ linux-2.5.33.ltt.lockless/mm/page_io.c Sun Sep 8 23:00:20 2002 @@ -18,6 +18,7 @@ #include #include /* for block_sync_page() */ #include +#include #include static struct bio * @@ -95,6 +96,7 @@ kstat.pswpout++; SetPageWriteback(page); unlock_page(page); + TRACE_MEMORY(TRACE_EV_MEMORY_SWAP_OUT, (unsigned long) page); submit_bio(WRITE, bio); out: return ret; diff -urN -X dontdiff linux-2.5.33/net/core/dev.c linux-2.5.33.ltt.lockless/net/core/dev.c --- linux-2.5.33/net/core/dev.c Sat Aug 31 15:05:23 2002 +++ linux-2.5.33.ltt.lockless/net/core/dev.c Sun Sep 8 23:00:20 2002 @@ -105,10 +105,12 @@ #include #include #include +#include #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO) #include /* Note : will define WIRELESS_EXT */ #include #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ + #ifdef CONFIG_PLIP extern int plip_init(void); #endif @@ -1007,6 +1009,8 @@ goto out; } + TRACE_NETWORK(TRACE_EV_NETWORK_PACKET_OUT, skb->protocol); + /* Grab device queue */ spin_lock_bh(&dev->queue_lock); q = dev->qdisc; @@ -1440,6 +1444,8 @@ netdev_rx_stat[smp_processor_id()].total++; + TRACE_NETWORK(TRACE_EV_NETWORK_PACKET_IN, skb->protocol); + #ifdef CONFIG_NET_FASTROUTE if (skb->pkt_type == PACKET_FASTROUTE) { netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++; diff -urN -X dontdiff linux-2.5.33/net/socket.c linux-2.5.33.ltt.lockless/net/socket.c --- linux-2.5.33/net/socket.c Sat Aug 31 15:04:55 2002 +++ linux-2.5.33.ltt.lockless/net/socket.c Sun Sep 8 23:00:20 2002 @@ -75,6 +75,8 @@ #include #include +#include + #if defined(CONFIG_KMOD) && defined(CONFIG_NET) #include #endif @@ -518,6 +520,8 @@ int err; struct scm_cookie scm; + TRACE_SOCKET(TRACE_EV_SOCKET_SEND, sock->type, size); + err = scm_send(sock, msg, &scm); if (err >= 0) { err = sock->ops->sendmsg(sock, msg, size, &scm); @@ -532,6 +536,8 @@ memset(&scm, 0, sizeof(scm)); + TRACE_SOCKET(TRACE_EV_SOCKET_RECEIVE, sock->type, size); + size = sock->ops->recvmsg(sock, msg, size, flags, &scm); if (size >= 0) scm_recv(sock, msg, &scm, flags); @@ -927,6 +933,8 @@ if (retval < 0) goto out_release; + TRACE_SOCKET(TRACE_EV_SOCKET_CREATE, retval, type); + out: /* It may be already another descriptor 8) Not kernel problem. */ return retval; @@ -1564,6 +1572,8 @@ a0=a[0]; a1=a[1]; + + TRACE_SOCKET(TRACE_EV_SOCKET_CALL, call, a0); switch(call) {