一文弄懂printf函数从用户态到内核态的执行流程

您所在的位置:网站首页 printf中的赋值语句会执行吗为什么 一文弄懂printf函数从用户态到内核态的执行流程

一文弄懂printf函数从用户态到内核态的执行流程

2024-07-13 02:43:32| 来源: 网络整理| 查看: 265

目录 1.简介2.示例代码3.程序执行初探4.用户态处理流程5.内核态处理流程5.1. 软中断处理5.2 系统调用返回5.3 系统调用处理5.4 stdout重定向到console5.5 tty及sstar uart驱动5.6 sstar uart dma发送线程 6 问:为什么printf打印不会卡?7.参考文献

1.简介

我们经常使用C库的printf函数,花时间整理一下从用户态到内核态的整个流程,涉及libc、系统调用、tty驱动、console等多个方面()其中,跟踪的驱动部分代码是sigmastar的,视用户实际使用的平台而定)。文章略长,请耐心阅读哈~

由于作者水平有限,如有纰漏,请帮忙指正,谢谢~

2.示例代码

使用最简单的代码作为示例。

#include #include int main() { printf("hello world!\n"); return 0; } 3.程序执行初探

gcc编译上述程序后,使用strace命令可以跟踪程序的系统调用流程。可以看到,程序执行需要依赖C库。整个执行流程大致如下:Hello_world可执行程序通过execve加载到内存后,libc.so等动态库通过mmap加载到内存映射区,最终通过write系统调用将“hello world!”输出到屏幕,程序执行完成退出。

$ strace ./hello_world execve("./hello_world", ["./hello_world"], [/ 44 vars /]) = 0 brk(0) = 0x12bd000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfadc000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=70625, ...}) = 0 mmap(NULL, 70625, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f48bfaca000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\356\1\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1924768, ...}) = 0 mmap(NULL, 3750184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f48bf529000 mprotect(0x7f48bf6b4000, 2093056, PROT_NONE) = 0 mmap(0x7f48bf8b3000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x7f48bf8b3000 mmap(0x7f48bf8b9000, 14632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f48bf8b9000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac9000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac8000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfac7000 arch_prctl(ARCH_SET_FS, 0x7f48bfac8700) = 0 mprotect(0x7f48bf8b3000, 16384, PROT_READ) = 0 mprotect(0x7f48bfadd000, 4096, PROT_READ) = 0 munmap(0x7f48bfaca000, 70625) = 0 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f48bfadb000 write(1, "hello world!\n", 13hello world! ) = 13 exit_group(0) = ? +++ exited with 0 +++ 4.用户态处理流程

printf的实现是在C库,通过stdout打印。

int printf(const char * __restrict format, ...) { va_list arg; int rv; va_start(arg, format); rv = vfprintf(stdout, format, arg); va_end(arg); return rv; }

vfprintf函数主要是处理和校验打印格式,调用关系如下:

Vfprintf: PUTC-> putc_unlocked-> __PUTC_UNLOCKED->…-> __PUTC_UNLOCKED_MACRO: __fputc_unlocked

__fputc_unlocked函数部分实现如下:

//buffer还没满 if (__STDIO_STREAM_BUFFER_SIZE(stream)) { //添加到缓冲区 __STDIO_STREAM_BUFFER_ADD(stream, ((unsigned char) c)); if (__STDIO_STREAM_IS_LBF(stream)) { //遇到'\n'则直接进行commit buffer。 if ((((unsigned char) c) == '\n') && __STDIO_COMMIT_WRITE_BUFFER(stream)) { / Commit failed! / __STDIO_STREAM_BUFFER_UNADD(stream); / Undo the write! / goto BAD; } } } else { //buffer满了,则直接进行write。 unsigned char uc = (unsigned char) c; if (! __stdio_WRITE(stream, &uc, 1)) { goto BAD; } }

上文的__STDIO_COMMIT_WRITE_BUFFER 和__stdio_WRITE最终都会调用到write系统调用陷入到内核态继续执行。

static inline ssize_t __WRITE(FILE stream, const char buf, size_t bufsize) { __STDIO_STREAM_CUSTOM_WRITE_FUNC(stream, buf, bufsize); return write(stream->__filedes, buf, bufsize); }

这里的write调用实际是__libc_write,各种宏定义展开如下:

PSEUDO (__libc_write, write, 3) ret PSEUDO_END (__libc_write) #define PSEUDO(name, syscall_name, args) \ .text; \ ENTRY (name); \ DO_CALL (syscall_name, args); \ cmn r0, $4096; #undef DO_CALL #if defined(__ARM_EABI__) #define DO_CALL(syscall_name, args) \ DOARGS_##args \ mov ip, r7; \ ldr r7, =SYS_ify (syscall_name); \ //r7记录系统调用号 swi 0x0; \ //产生软中断 mov r7, ip; \ UNDOARGS_##args #else #define DO_CALL(syscall_name, args) \ DOARGS_##args \ swi SYS_ify (syscall_name); \ UNDOARGS_##args #endif #define SYS_ify(syscall_name) (__NR_##syscall_name) //在内核src\arch\arm\include\uapi\asm\unistd.h中的定义如下: #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 #else #define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE #endif #define __NR_write (__NR_SYSCALL_BASE+ 4)

调用是先处理参数,接着通过r7记录系统调用号(我使用的内核支持__ARM_EABI__(#define CONFIG_AEABI 1),write的系统调用号为4),执行swi 0x0从用户态先入到内核态。 至此,用户态流程处理完成。

5.内核态处理流程 5.1. 软中断处理

上节说到产生软中断后,内核态会跳转到中断向量处执行。可以看到,通过指令ldrcc pc, [tbl, scno, lsl #2]执行系统调用,通过ret_fast_syscall来返回。

ENTRY(vector_swi) //执行系统调用前先保存用户态18个寄存器,PT_REGS_SIZE = 72,sizeof(struct pt_regs),分//别是r0-r15、cspr、spsr sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr mrs r8, spsr @ called from non-FIQ mode, so ok. str lr, [sp, #S_PC] @ Save calling PC //进入内核态之前先保存CPSR,返回到用户态时从SPSR中恢复 str r8, [sp, #S_PSR] @ Save CPSR str r0, [sp, #S_OLD_R0] @ Save OLD_R0 zero_fp alignment_trap r10, ip, __cr_alignment enable_irq ct_user_exit get_thread_info tsk /* * Get the system call number. */ #if defined(CONFIG_OABI_COMPAT) ... #elif defined(CONFIG_AEABI) /* * Pure EABI user space always put syscall number into scno (r7). */ #elif defined(CONFIG_ARM_THUMB) / Legacy ABI only, possibly thumb mode. / tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in USER( ldreq scno, [lr, #-4] ) #else … #endif uaccess_disable tbl //加载系统调用表基地址 adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) /* * If the swi argument is zero, this is an EABI call and we do nothing. * * If this is an old ABI call, get the syscall number into scno and * get the old ABI syscall table address. */ … #elif !defined(CONFIG_AEABI) bic scno, scno, #0xff000000 @ mask off SWI op-code eor scno, scno, #__NR_SYSCALL_BASE @ check OS number #endif local_restart: ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing stmdb sp!, {r4, r5} @ push fifth and sixth args tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit //通过__ret_fast_syscall返回 badr lr, __ret_fast_syscall @ return address //通过系统调用表基地址tbl+系统调用好scno,执行系统调用函数 ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func ENDPROC(vector_swi) 5.2 系统调用返回

上一小节看到,系统调用执行完成返回到__ret_fast_syscall:

ret_fast_syscall: __ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK bne fast_work_pending / perform architecture specific actions before user return / arch_ret_to_user r1, lr restore_user_regs fast = 1, offset = S_OFF UNWIND(.fnend ) ENDPROC(ret_fast_syscall) fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 / fall through to work_pending / slow_work_pending: mov r0, sp @ 'regs' mov r2, why @ 'syscall' bl do_work_pending //见下 cmp r0, #0 beq no_work_pending movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) ldmia sp, {r0 - r6} @ have to reload r0 - r6 b local_restart @ ... and off we go no_work_pending: asm_trace_hardirqs_on save = 0 / perform architecture specific actions before user return / arch_ret_to_user r1, lr //恢复用户态的寄存器 ct_user_enter save = 0 restore_user_regs fast = 0, offset = 0

在返回用户态前,do_work_pending主要检查是否处理pend的信号。

asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { /* * The assembly code enters us with IRQs off, but it hasn't * informed the tracing code of that for efficiency reasons. * Update the trace code with the current status. */ trace_hardirqs_off(); do { //检查是否需要重新调用 if (likely(thread_flags & _TIF_NEED_RESCHED)) { schedule(); } else { if (unlikely(!user_mode(regs))) return 0; local_irq_enable(); //有未处理的信号 if (thread_flags & _TIF_SIGPENDING) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { /* * Restart without handlers. * Deal with it without leaving * the kernel space. */ return restart; } syscall = 0; } … } local_irq_disable(); thread_flags = current_thread_info()->flags; } while (thread_flags & _TIF_WORK_MASK); return 0; } 5.3 系统调用处理

系统调用write实际调用的是sys_write,在内核代码中无法直接搜到,因为它是通过宏定义拼接的,跟踪宏展开中name字段就可以看到最终是sys_write函数,在内核编译生成的System.map也可以搜到sys_write符号:

define __NR_write 64 __SYSCALL(__NR_write, sys_write) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) #define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(SyS##name)))); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))

Sys_write函数的具体实现如下:

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if(f.file) { loff_t pos = file_pos_read(f.file); ret = vfs_write(f.file, buf, count, &pos); if(ret >= 0) file_pos_write(f.file, pos); fdput_pos(f); } return ret; }

vfs_write函数调用如下:

vfs_write __vfs_write file->f_op->write(file, p, count, pos); //这里的实际执行函数时redirected_tty_write 5.4 stdout重定向到console

查看程序的fd,可以看到fd 0、1和2都是重定向到/dev/console。

# 679为程序pid ls /proc/679/fd lrwx------ 1 64 2 -> /dev/console lrwx------ 1 64 1 -> /dev/console lrwx------ 1 64 0 -> /dev/console

内核启动时创建init进程(pid=1):

start_kernel rest_init /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ kernel_thread(kernel_init, NULL, CLONE_FS);

init进程打开/dev/console作为标准输入输出。

kernel_init kernel_init_freeable / Open the /dev/console on the rootfs, this should never fail / if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) .magic = TTY_LDISC_MAGIC, .name = "n_tty", .open = n_tty_open, .close = n_tty_close, .flush_buffer = n_tty_flush_buffer, .read = n_tty_read, .write = n_tty_write, .ioctl = n_tty_ioctl, .set_termios = n_tty_set_termios, .poll = n_tty_poll, .receive_buf = n_tty_receive_buf, .write_wakeup = n_tty_write_wakeup, .receive_buf2 = n_tty_receive_buf2, };

注释2: 我这里跟踪的是sigmastar平台的uart驱动。 内核启动时会调用uart驱动模块的init函数,即ms_uart_module_init。

ms_uart_module_init uart_register_driver(&ms_uart_driver); tty_set_operations(normal, &uart_ops); platform_driver_register(&ms_uart_platform_driver); static struct uart_driver ms_uart_driver = { .owner = THIS_MODULE, .driver_name = "ms_uart", .dev_name = "ttyS", .nr = 8, .cons = &ms_uart_console, }; static struct console ms_uart_console = { .name = MS_CONSOLE_DEV, .write = ms_uart_console_write, .setup = ms_uart_console_setup, .flags = CON_PRINTBUFFER, .device = uart_console_device, .data = &ms_uart_driver, .index = -1, #if CONSOLE_DMA .match = ms_uart_console_match, #endif }; static const struct tty_operations uart_ops = { .open = uart_open, .close = uart_close, .write = uart_write, .put_char = uart_put_char, .flush_chars = uart_flush_chars, .write_room = uart_write_room, … }; static struct platform_driver ms_uart_platform_driver = { .remove = ms_uart_remove, .probe = ms_uart_probe, … };

在platform_driver_register中,会调用probe函数ms_uart_probe注册console ttyS0,调用关系如下:

ms_uart_module_init platform_driver_register—> __platform_driver_register driver_register bus_add_driver driver_attach bus_for_each_dev __driver_attach driver_probe_device really_probe ret = dev->bus->probe(dev); //这里实际调用的就是上面注册的 ms_uart_probe uart_add_one_port uart_configure_port register_console print console [ttyS0] enabled 5.6 sstar uart dma发送线程

需要关注到一个内核线程urdma_tx_thread,它是在内核启动时会初始化platform,调用ms_uart_probe创建tx线程。

ms_uart_probe //设置DMA的tx和rx缓冲区(页对齐) mp->urdma->rx_urdma_size = PAGE_ALIGN(UR2DMA_RX_BUF_LENGTH); mp->urdma->tx_urdma_size = PAGE_ALIGN(UR2DMA_TX_BUF_LENGTH); //启动一个内核线程输出打印 mp->urdma_task = kthread_run(urdma_tx_thread,(void *)&mp->port,"urdma_tx_thread"); ret = uart_add_one_port(&ms_uart_driver, &mp->port); uart_configure_port //boot参数dh_keyboard在此生效 register_console ms_uart_console_setup //设置波特率等参数

urdma_tx_thread实现如下:

static int urdma_tx_thread(void *arg) { struct uart_port p = (struct uart_port )arg; struct circ_buf *xmit; while(!kthread_should_stop()){ //等待中断唤醒返回 wait_event_interruptible(urdma_wait, urdma_conditions); urdma_conditions = 0; xmit = &p->state->xmit; if (uart_circ_empty(xmit) || uart_tx_stopped(p)) { ms_uart_stop_tx(p); } if (uart_circ_chars_pending(xmit)) { //环形缓冲区有数据,则将数据拷贝到驱动 URDMA_StartTx(p); }else { //环形缓冲区数据满了 //调用n_tty_write_wakeup,发送SIGIO信号通知driver有output data uart_write_wakeup(p); } } return 0; }

驱动加载的时候会uart_ops中的open接口,实现如下:

uart_open tty_port_open port->ops->activate(port, tty); //实际为uart_port_activate uart_port_activate uart_startup uart_port_startup uport->ops->startup(uport) //实际为ms_uart_startup ms_uart_startup //此处注册了uart的中断处理函数ms_uart_interrupt request_irq(mp->urdma->urdma_irq, ms_uart_interrupt, IRQF_SHARED, "ms_serial_dma",p);

ms_uart_interrupt函数实现如下:

static irqreturn_t ms_uart_interrupt(s32 irq, void *dev_id) { … if(mp->use_dma) { u8 status = URDMA_GetInterruptStatus(p); if(status & URDMA_INTR_STATUS_RX) { … } else if(status & URDMA_INTR_STATUS_TX) { //有tx_mcu_intr中断,则wakeup URDMA_TxClearInterrupt(p); urdma_conditions = 1; wake_up_interruptible(&urdma_wait); } … }

在收到urdma_wait的唤醒中断时,urdma_tx_thread会被唤醒,如果环形缓冲区数据满了,则通知驱动中断程序取数据并输出到串口上。 至此,整个printf打印流程完成。

6 问:为什么printf打印不会卡?

答:printf打印不会卡最根本的原因在于printf打印是异步的。数据从用户态的C库中的缓冲区到内核态的write调用,接着到拷贝到tty的xmit环形缓冲区,这个过程是同步的,执行完成返回。在此过程中,主要涉及内存拷贝动作,没有其他耗时的操作。 剩下的过程是异步执行:当有tx_mcu_intr中断时,在内核线程urdma_tx_thread中从tty的环形缓冲区拷贝到驱动设备的私有数据,当数据满的时候,发送SIGIO信号通知driver有output data。而驱动收到信号后将数据输出打印到串口中。

7.参考文献

http://blog.chinaunix.net/uid-29401328-id-4866781.html https://www.cnblogs.com/pengdonglin137/p/3878316.html https://www.cnblogs.com/cslunatic/p/3655970.html



【本文地址】

公司简介

联系我们

今日新闻


点击排行

实验室常用的仪器、试剂和
说到实验室常用到的东西,主要就分为仪器、试剂和耗
不用再找了,全球10大实验
01、赛默飞世尔科技(热电)Thermo Fisher Scientif
三代水柜的量产巅峰T-72坦
作者:寞寒最近,西边闹腾挺大,本来小寞以为忙完这
通风柜跟实验室通风系统有
说到通风柜跟实验室通风,不少人都纠结二者到底是不
集消毒杀菌、烘干收纳为一
厨房是家里细菌较多的地方,潮湿的环境、没有完全密
实验室设备之全钢实验台如
全钢实验台是实验室家具中较为重要的家具之一,很多

推荐新闻


图片新闻

实验室药品柜的特性有哪些
实验室药品柜是实验室家具的重要组成部分之一,主要
小学科学实验中有哪些教学
计算机 计算器 一般 打孔器 打气筒 仪器车 显微镜
实验室各种仪器原理动图讲
1.紫外分光光谱UV分析原理:吸收紫外光能量,引起分
高中化学常见仪器及实验装
1、可加热仪器:2、计量仪器:(1)仪器A的名称:量
微生物操作主要设备和器具
今天盘点一下微生物操作主要设备和器具,别嫌我啰嗦
浅谈通风柜使用基本常识
 众所周知,通风柜功能中最主要的就是排气功能。在

专题文章

    CopyRight 2018-2019 实验室设备网 版权所有 win10的实时保护怎么永久关闭