引言

__libc_open是glibc中openat接口的封装,针对给定的file路径,打开文件,生成一个文件描述符fd。可以创建文件/创建临时文件/打开已有文件。在很多IO接口函数中都有使用到,我们来一起看看它的实现原理。

源码分析

代码参考:glibc/sysdeps/unix/sysv/linux/open.c

28 /* Open FILE with access OFLAG.  If O_CREAT or O_TMPFILE is in OFLAG,
29 a third argument is the file protection. */30 int31 __libc_open (const char *file, int oflag, ...)
32 {
33 int mode = 0;
34
35 if (__OPEN_NEEDS_MODE (oflag))
36 {
37 va_list arg;
38 va_start (arg, oflag);
39 mode = va_arg (arg, int);
40 va_end (arg);
41 }
42
43 return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
44 }

首先根据oflag(里面是否包含O_CREAT或O_TMPFILE)检查,是否需要获取第三个参数mode(用来进行读写控制的)。

__OPEN_NEEDS_MODE宏判定

具体实现里面就是通过位运算检测oflag中是否包含__O_TMPFILE或O_CREAT

//glibc/io/fcntl.h37 /* Detect if open needs mode as a third argument (or for openat as a fourth
38 argument). */39 #ifdef __O_TMPFILE40 # define __OPEN_NEEDS_MODE(oflag) \
41 (((oflag) & O_CREAT) != 0 || ((oflag) & __O_TMPFILE) == __O_TMPFILE)42 #else43 # define __OPEN_NEEDS_MODE(oflag) (((oflag) & O_CREAT) != 0)44 #endif

SYSCALL_CANCEL

在这个宏中实际上最后还是调用INLINE_SYSCALL_CALL实现我们的调用,多余的部分实际上是syscall 取消的检测,这里我们就不展开了。

//glibc/sysdeps/unix/sysdep.h
111 #if IS_IN (rtld)
112 /* All cancellation points are compiled out in the dynamic loader. */
113 # define NO_SYSCALL_CANCEL_CHECKING 1
114 #else
115 # define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P
116 #endif
117
118 #define SYSCALL_CANCEL(...) \
119 ({ \
120 long int sc_ret; \
121 if (NO_SYSCALL_CANCEL_CHECKING) \
122 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
123 else \
124 { \
125 int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \
126 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
127 LIBC_CANCEL_RESET (sc_cancel_oldtype); \
128 } \
129 sc_ret; \
130 })

INLINE_SYSCALL_CALL

这里就是对syscall的相关封装了,实际上就是要从我们之前传入的 ​​SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);​​解析出具体需要执行的汇编指令

103 /* Issue a syscall defined by syscall number plus any other argument
104 required. Any error will be handled using arch defined macros and errno
105 will be set accordingly.
106 It is similar to INLINE_SYSCALL macro, but without the need to pass the
107 expected argument number as second parameter. */
108 #define INLINE_SYSCALL_CALL(...) \
109 __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)

宏展开之后 ​​__INLINE_SYSCALL_DISP (__INLINE_SYSCALL, openat, AT_FDCWD, file, oflag, mode)​

100 #define __INLINE_SYSCALL_DISP(b,...) \
101 __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)

展开之后: ​​__SYSCALL_CONCAT (__INLINE_SYSCALL, __INLINE_SYSCALL_NARGS(openat, AT_FDCWD, file, oflag, mode)) (openat, AT_FDCWD, file, oflag, mode)​

__INLINE_SYSCALL_NARGS

先具体分析__INLINE_SYSCALL_NARGS的宏展开方式:

98 #define __INLINE_SYSCALL_NARGS(...) \                      99   __INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)

展开如下,在原有参数后面增加了8个数字

​__INLINE_SYSCALL_NARGS_X (openat, AT_FDCWD, file, oflag, mode,7,6,5,4,3,2,1,0,)​

继续展开,可以看到这里是取第九个参数,对应上面这个表达式,也就是4,可以推出,如果输入有1个参数,那么返回0,输入有2个参数,返回1,依次类推,输入有8个参数,返回7。 ​​97 #define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n​

__SYSCALL_CONCAT

实际上就是把两个字符进行了连接,“##”在宏当中是进行字符连接的

27 #define __SYSCALL_CONCAT_X(a,b)     a##b28 #define __SYSCALL_CONCAT(a,b)       __SYSCALL_CONCAT_X (a, b)

至此,我们的宏展开就应该是这样的

​__INLINE_SYSCALL4(openat, AT_FDCWD, file, oflag, mode)​

__INLINE_SYSCALL4

对应的定义如下:就应该被展开为​​INLINE_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)​

80 #define __INLINE_SYSCALL0(name) \
81 INLINE_SYSCALL (name, 0)82 #define __INLINE_SYSCALL1(name, a1) \
83 INLINE_SYSCALL (name, 1, a1)84 #define __INLINE_SYSCALL2(name, a1, a2) \
85 INLINE_SYSCALL (name, 2, a1, a2)86 #define __INLINE_SYSCALL3(name, a1, a2, a3) \
87 INLINE_SYSCALL (name, 3, a1, a2, a3)88 #define __INLINE_SYSCALL4(name, a1, a2, a3, a4) \
89 INLINE_SYSCALL (name, 4, a1, a2, a3, a4)90 #define __INLINE_SYSCALL5(name, a1, a2, a3, a4, a5) \
91 INLINE_SYSCALL (name, 5, a1, a2, a3, a4, a5)92 #define __INLINE_SYSCALL6(name, a1, a2, a3, a4, a5, a6) \
93 INLINE_SYSCALL (name, 6, a1, a2, a3, a4, a5, a6)94 #define __INLINE_SYSCALL7(name, a1, a2, a3, a4, a5, a6, a7) \
95 INLINE_SYSCALL (name, 7, a1, a2, a3, a4, a5, a6, a7)

INLINE_SYSCALL

封装调用了INTERNAL_SYSCALL宏

​INTERNAL_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)​

38 /* Define a macro which expands into the inline wrapper code for a system
39 call. It sets the errno and returns -1 on a failure, or the syscall
40 return value otherwise. */41 #undef INLINE_SYSCALL42 #define INLINE_SYSCALL(name, nr, args...) \
43 ({ \
44 long int sc_ret = INTERNAL_SYSCALL (name, nr, args); \
45 __glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret)) \
46 ? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret)) \
47 : sc_ret; \ 48 })

INTERNAL_SYSCALL

这一步的实现就与每个平台不同了,这里以x86_64平台为例 上面的被依次如下封装:

​internal_syscall4 (SYS_ify (openat), AT_FDCWD, file, oflag, mode)​

​internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)​

//glibc/sysdeps/unix/sysv/linux/x86_64/sysdep.h29 /* For Linux we can use the system call table in the header file
30 /usr/include/asm/unistd.h
31 of the kernel. But these symbols do not follow the SYS_* syntax
32 so we have to redefine the `SYS_ify' macro here. */
33 #undef SYS_ify34 #define SYS_ify(syscall_name) __NR_##syscall_name233 #undef INTERNAL_SYSCALL
234 #define INTERNAL_SYSCALL(name, nr, args...) \
235 internal_syscall##nr (SYS_ify (name), args)

最终我们的调用为internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)

依次将4个参数装入寄存器中,然后调用syscall汇编代码实现调用,这里用到C语言的asm汇编语法

301 #undef internal_syscall4
302 #define internal_syscall4(number, arg1, arg2, arg3, arg4) \
303 ({ \
304 unsigned long int resultvar; \
305 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
306 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
307 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
308 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
309 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
310 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
311 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
312 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
313 asm volatile ( \
314 "syscall\n\t" \
315 : "=a" (resultvar) \
316 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
317 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
318 (long int) resultvar; \
319 })

我们看一下该架构下使用最多参数的宏,(x86_64架构最多只支持6个参数)

344 #undef internal_syscall6
345 #define internal_syscall6(number, arg1, arg2, arg3, arg4, arg5, arg6) \
346 ({ \
347 unsigned long int resultvar; \
348 TYPEFY (arg6, __arg6) = ARGIFY (arg6); \
349 TYPEFY (arg5, __arg5) = ARGIFY (arg5); \
350 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
351 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
352 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
353 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
354 register TYPEFY (arg6, _a6) asm ("r9") = __arg6; \
355 register TYPEFY (arg5, _a5) asm ("r8") = __arg5; \
356 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
357 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
358 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
359 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
360 asm volatile ( \
361 "syscall\n\t" \
362 : "=a" (resultvar) \
363 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
364 "r" (_a5), "r" (_a6) \
365 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
366 (long int) resultvar; \
367 })

寄存器的使用中参数依次装入rdi,rsi,rdx,r10,r8,r9

支持最多7个参数的只有如下的架构:

riscv架构:glibc/sysdeps/unix/sysv/linux/riscv/sysdep.h 分别使用a0到a6寄存器装参数

323 # define internal_syscall7(number, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \                                                                       
324 ({ \
325 long int _sys_result; \
326 long int _arg0 = (long int) (arg0); \
327 long int _arg1 = (long int) (arg1); \
328 long int _arg2 = (long int) (arg2); \
329 long int _arg3 = (long int) (arg3); \
330 long int _arg4 = (long int) (arg4); \
331 long int _arg5 = (long int) (arg5); \
332 long int _arg6 = (long int) (arg6); \
333 \
334 { \
335 register long int __a7 asm ("a7") = number; \
336 register long int __a0 asm ("a0") = _arg0; \
337 register long int __a1 asm ("a1") = _arg1; \
338 register long int __a2 asm ("a2") = _arg2; \
339 register long int __a3 asm ("a3") = _arg3; \
340 register long int __a4 asm ("a4") = _arg4; \
341 register long int __a5 asm ("a5") = _arg5; \
342 register long int __a6 asm ("a6") = _arg6; \
343 __asm__ volatile ( \
344 "scall\n\t" \
345 : "+r" (__a0) \
346 : "r" (__a7), "r" (__a1), "r" (__a2), "r" (__a3), \
347 "r" (__a4), "r" (__a5), "r" (__a6) \
348 : __SYSCALL_CLOBBERS); \
349 _sys_result = __a0; \
350 } \
351 _sys_result; \
352 })

mips架构:glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h

315 #define internal_syscall7(v0_init, input, number, err,          \
316 arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
317 ({ \
318 union __mips_syscall_return _sc_ret; \
319 _sc_ret.val = __mips_syscall7 ((long int) (arg1), \
320 (long int) (arg2), \
321 (long int) (arg3), \
322 (long int) (arg4), \
323 (long int) (arg5), \
324 (long int) (arg6), \
325 (long int) (arg7), \
326 (long int) (number)); \
327 _sc_ret.reg.v1 != 0 ? -_sc_ret.reg.v0 : _sc_ret.reg.v0; \
328 })

总结

从最开始的​​SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode)​​宏,依次转换为

​internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)​​,然后调用汇编实现底层syscall操作。