cstdio中的文件访问函数

stdio.h中定义了一系列文件访问函数(fopen,fclose,fflush,freopen,setbuf,setvbuf),接下来我们一起来分析一下fopen对应的源码实现。

  • fopen:打开文件
  • fclose:关闭文件
  • fflush:刷新文件流
  • freopen:重新打开文件流(不同的文件或访问模式)
  • setbuf:设置stream buf
  • setvbuf:改变stream buf

打开文件函数fopen

给定指定的文件名和对应的访问mode,返回对应的文件流对象FILE指针。

FILE * fopen ( const char * filename, const char * mode );

函数入口分析

对应代码位置:glibc/include/stdio.h

可以看到实际上内部的实现实际都是委托到__fopen_internal,这里有两种版本的函数,一种是32位,一种是64位,两者的调用区别在于__fopen_internal的第三个参数。

183 extern FILE *_IO_new_fopen (const char*, const char*);
184 # define fopen(fname, mode) _IO_new_fopen (fname, mode)//glibc/libio/iofopen.c83 FILE *
84 _IO_new_fopen (const char *filename, const char *mode)
85 {
86 return __fopen_internal (filename, mode, 1);
87 }

93 # if !defined O_LARGEFILE || O_LARGEFILE == 0 94 weak_alias (_IO_new_fopen, _IO_fopen64)
95 weak_alias (_IO_new_fopen, fopen64)
96 # endif
//glibc/libio/iofopen64.c34 FILE *
35 _IO_fopen64 (const char *filename, const char *mode)
36 {
37 return __fopen_internal (filename, mode, 0);
38 }

int is32标识是否是32位,所以在_IO_new_fopen中调用时默认传1,在_IO_fopen64中调用时默认传0。

55 FILE *
56 __fopen_internal (const char *filename, const char *mode, int is32)
57 {

__fopen_internal函数

基本的函数逻辑与​​C++学习---_IO_new_fdopen函数原理分析学习​​基本一致,对创建的局部数据结构体new_f分配数据,调用_IO_no_init,_IO_JUMPS,_IO_new_file_init_internal初始化其中的变量。

因为只知道当前的文件名,不知道fd,所以需要调用_IO_file_fopen进行后续的操作,如果失败,则需要unlink,释放内存,然后返回NULL。

55 FILE *
56 __fopen_internal (const char *filename, const char *mode, int is32)
57 {
58 struct locked_FILE59 {
60 struct _IO_FILE_plus fp;
61 #ifdef _IO_MTSAFE_IO62 _IO_lock_t lock;
63 #endif64 struct _IO_wide_data wd;
65 } *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
66
67 if (new_f == NULL)
68 return NULL;
69 #ifdef _IO_MTSAFE_IO70 new_f->fp.file._lock = &new_f->lock;
71 #endif72 _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
73 _IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
74 _IO_new_file_init_internal (&new_f->fp);
75 if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
76 return __fopen_maybe_mmap (&new_f->fp.file);
77
78 _IO_un_link (&new_f->fp);
79 free (new_f);
80 return NULL;
81 }

_IO_file_fopen函数

​_IO_file_fopen ((FILE *) new_f, filename, mode, is32)​

注意:上面的函数调用过程中将new_f指针从locked_FILE转为了FILE,这样做是合法的,因为locked_FILE中第一个变量_IO_FILE_plus的首个变量即是FILE,实际上这样做使得new_f指针的访问被截断,只能访问前面FILE中的内容。

//glibc/libio/libioP.h
324 struct _IO_FILE_plus
325 {
326 FILE file;
327 const struct _IO_jump_t *vtable;
328 };

这里做了符号映射,实际调用_IO_file_fopen被映射为_IO_new_file_fopen

//glibc/libio/fileops.c
1422 versioned_symbol (libc, _IO_new_file_fopen, _IO_file_fopen, GLIBC_2_1);

_IO_new_file_fopen函数

1.入参及局部变量准备

不多赘述,准备与fopen相关的变量

210 FILE *
211 _IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
212 int is32not64)
213 {
214 int oflags = 0, omode;
215 int read_write;
216 int oprot = 0666;
217 int i;
218 FILE *result;
219 const char *cs;
220 const char *last_recognized;

2.如果文件已经打开,则返回0

判断方式也很简单,查看fp的_fileno是否被赋值,正常打开一次之后,该值将被赋值为对应的fd

222   if (_IO_file_is_open (fp))
223 return 0;

565 #define _IO_file_is_open(__fp) ((__fp)->_fileno != -1)

3.解析文件打开的mode

omode记录File access modes:只读/只写/读写

oflags记录文件open的参数:

  • O_CREAT:Create file if it doesn't exist
  • O_TRUNC:Truncate file to zero length
  • O_APPEND:Writes append to the file
  • O_EXCL:Fail if file already exists
  • O_CLOEXEC:Set close_on_exec

read_write记录读写参数:

  • _IO_NO_READS:Reading not allowed
  • _IO_NO_WRITES:Writing not allowed
  • _IO_IS_APPENDING:追加模式

fp->_flags2记录第二个flags信息:

  • _IO_FLAGS2_MMAP:使用mmap
  • _IO_FLAGS2_NOTCANCEL:不取消模式
  • _IO_FLAGS2_CLOEXEC:lose_on_exec

last_recognized记录最后检测到的模式。

224   switch (*mode)
225 {
226 case 'r':
227 omode = O_RDONLY;
228 read_write = _IO_NO_WRITES;
229 break;
230 case 'w':
231 omode = O_WRONLY;
232 oflags = O_CREAT|O_TRUNC;
233 read_write = _IO_NO_READS;
234 break;
235 case 'a':
236 omode = O_WRONLY;
237 oflags = O_CREAT|O_APPEND;
238 read_write = _IO_NO_READS|_IO_IS_APPENDING;
239 break;
240 default:
241 __set_errno (EINVAL);
242 return NULL;
243 }
244 last_recognized = mode;
245 for (i = 1; i < 7; ++i)
246 {
247 switch (*++mode)
248 {
249 case '\0':
250 break;
251 case '+':
252 omode = O_RDWR;
253 read_write &= _IO_IS_APPENDING;
254 last_recognized = mode;
255 continue;
256 case 'x':
257 oflags |= O_EXCL;
258 last_recognized = mode;
259 continue;
260 case 'b':
261 last_recognized = mode;
262 continue;
263 case 'm':
264 fp->_flags2 |= _IO_FLAGS2_MMAP;
265 continue;
266 case 'c':
267 fp->_flags2 |= _IO_FLAGS2_NOTCANCEL;
268 continue;
269 case 'e':
270 oflags |= O_CLOEXEC;
271 fp->_flags2 |= _IO_FLAGS2_CLOEXEC;
272 continue;
273 default:
274 /* Ignore. */275 continue;
276 }
277 break;
278 }

4.调用_IO_file_open打开文件

注意,这里大部分参数都是传入的,或者刚解析出来的,这个oprot是前文定义的局部变量​​int oprot = 0666​​,表示

该文件拥有者对该文件拥有读写的权限但是没有操作的权限 该文件拥有者所在组的其他成员对该文件拥有读写的权限但是没有操作的权限 其他用户组的成员对该文件也拥有读写权限但是没有操作的权限

调用_IO_file_open的流程中大致可以分为如下几步:

  • 根据flags2决定是调用__open_nocancel还是__open;

这里的__open实际上就是系统函数__libc_open,参考​​C++学习---__libc_open函数的原理​​47 weak_alias (__libc_open, __open)__open_nocancel的原理类似,参见:glibc/sysdeps/unix/sysv/linux/open_nocancel.c

  • 调用_IO_mask_flags设定对应的flags;
  • 针对append模式,移动文件指针到_IO_seek_end;
  • 将打开后的fp link到_IO_list_all上
280   result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
281 is32not64);

179 FILE *
180 _IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
181 int read_write, int is32not64)
182 {
183 int fdesc;
184 if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
185 fdesc = __open_nocancel (filename,
186 posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
187 else188 fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
189 if (fdesc < 0)
190 return NULL;
191 fp->_fileno = fdesc;
192 _IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
193 /* For append mode, send the file offset to the end of the file. Don't
194 update the offset cache though, since the file handle is not active. */195 if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
196 == (_IO_IS_APPENDING | _IO_NO_READS))
197 {
198 off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
199 if (new_pos == _IO_pos_BAD && errno != ESPIPE)
200 {
201 __close_nocancel (fdesc);
202 return NULL;
203 }
204 }
205 _IO_link_in ((struct _IO_FILE_plus *) fp);
206 return fp;
207 }

5.查看打开的文件是否需要特殊转换

这里主要是针对宽字符进行相关的处理和模式设置,详细的内容就不赘述了,具体细节与正常的打开流程基本一致,最后设置宽字符的字符处理虚函数表_wide_vtable。

283   if (result != NULL)
284 {
285 /* Test whether the mode string specifies the conversion. */286 cs = strstr (last_recognized + 1, ",ccs=");
287 if (cs != NULL)
288 {
289 /* Yep. Load the appropriate conversions and set the orientation
290 to wide. */
...
347 /* From now on use the wide character callback functions. */348 _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
349
350 /* Set the mode now. */351 result->_mode = 1;
352 }
353 }

6.返回result,即FILE*指针

210 FILE *
211 _IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
212 int is32not64)
213 {
...
280 result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
281 is32not64);
...
355 return result;
356 }

__fopen_maybe_mmap函数

针对flags2为mmap且flags设定为"r"的模式,可以直接使用mmap内容的方式,因为不需要修改原文件内容,所以需要替换fp中字符操作的虚函数表,使用maybe_mmap类型的函数

33 FILE *
34 __fopen_maybe_mmap (FILE *fp)
35 {
36 #if _G_HAVE_MMAP37 if ((fp->_flags2 & _IO_FLAGS2_MMAP) && (fp->_flags & _IO_NO_WRITES))
38 {
39 /* Since this is read-only, we might be able to mmap the contents
40 directly. We delay the decision until the first read attempt by
41 giving it a jump table containing functions that choose mmap or
42 vanilla file operations and reset the jump table accordingly. */43
44 if (fp->_mode <= 0)
45 _IO_JUMPS_FILE_plus (fp) = &_IO_file_jumps_maybe_mmap;
46 else47 _IO_JUMPS_FILE_plus (fp) = &_IO_wfile_jumps_maybe_mmap;
48 fp->_wide_data->_wide_vtable = &_IO_wfile_jumps_maybe_mmap;
49 }
50 #endif51 return fp;
52 }

总结

通过一层层函数调用和参数检查准备,最后调用_IO_file_open打开文件,返回文件流指针,在此过程中也会针对一些特殊情况(如追加模式,只读模式,做参数调整或虚函数表的重新赋值)做处理,尽可能提升程序的效率。