fopen

在前人的基础上做一下总结和探索

执行链

__fopen_internal(iofopen.c)->_IO_no_init->  _IO_new_file_init_internal (&new_f->fp);
_IO_new_file_fopen->_IO_file_open

执行细节

  • 标志位:

    在执行过程中将oflags |omode作为系统调用的模式参数,read_write参数则作为flags传递个iofile结构体

  • 光标定位(通过判断flag进行):

    /* For append mode, send the file offset to the end of the file.  Don't
    update the offset cache though, since the file handle is not active. */
    if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
    == (_IO_IS_APPENDING | _IO_NO_READS))
    {
    off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
    if (new_pos == _IO_pos_BAD && errno != ESPIPE)
    {
    __close_nocancel (fdesc);
    return NULL;
    }
    }

总结来说,打开一个文件会先分配文件结构体,也就是iofile对象,之后会初始化iofile结构体,如一些flag的置位,虚表的赋值等等,然后会去处理用户输入的模式字段,来进行posix模式和flag的置位,也就是读,写,执行,追加。之后会利用系统调用open打开文件并返还文件描述符,最后完成文件结构的flag置位即可。

亟待解决

宽字符处理

fread

执行链

_IO_fread(iofread.c)->_IO_file_xsgetn(fileops.c)->_IO_doallocbuf(genops.c)->_IO_file_doallocate(filedoalloc.c)->_IO_new_file_underflow(fileops.c)

函数分析

1.判断有无缓冲区来分配缓冲区,如果没有缓冲区,会释放掉save缓冲区

2.维护一组指针来进行读操作,为: _IO_read_end和_IO_read_ptr和_IO_read_base

3.总的来说就是如果缓冲区有数据,那么就把缓冲区中的数据copy到指定位置然后更新指针,然后通过判断剩余读写的数量来决定是把文件中的数据读到缓冲区中,还是直接通过系统调用来读取数据,也会判断是否需要刷新缓冲区。

4.主要的操作就是维护标志位和指针的位置,会有一些列安全检查。

fread

size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
{
size_t want, have;
ssize_t count;
char *s = data;

want = n;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}

/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}

/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;

continue;
}

/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}

return n - want;
}

/* Allocate a file buffer, or switch to unbuffered I/O. Streams for
TTY devices default to line buffered. */
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct __stat64_t64 st;

size = BUFSIZ;
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);
return 1;
}



int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;

/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;

if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);

if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);

_IO_release_lock (stdout);
}

_IO_switch_to_get_mode (fp);

/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count;
if (count == 0)
{
/* If a stream is read to EOF, the calling application may switch active
handles. As a result, our offset cache would no longer be valid, so
unset it. */
fp->_offset = _IO_pos_BAD;
return EOF;
}
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}

fwrite

总结下来就是一件事,维护指针

首先分配缓冲区,得到bufbase,bufend,接下来就是向其中写入数据,并找准时机刷新,但是可能会因为不确定因素报错入:目标文件无法写入等等。

需要明白writebase,writeend,writeptr这一组指针是怎么维护的即可。

也有缓冲区过小直接调用系统调用的情形。。。。。。

大神点评:

从图中可以看到fwrite的主要实现在_IO_new_file_xsputn中,整体流程包含四个部分:

  1. 首先判断输出缓冲区还有多少剩余,如果有剩余则将目标输出数据拷贝至输出缓冲区(一般第二次调用才会用上)。
  2. 如果输出缓冲区没有剩余(输出缓冲区未建立也是没有剩余)或输出缓冲区不够则调用_IO_OVERFLOW建立输出缓冲区或刷新输出缓冲区。
  3. 输出缓冲区刷新后判断剩余的目标输出数据是否超过块的size,如果超过块的size,则不通过输出缓冲区直接以块为单位,使用new_do_write输出目标数据。
  4. 如果按块输出数据后还剩下一点数据则调用_IO_default_xsputn将数据拷贝至输出缓冲区。

fwrite

size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
size_t request = size * count;
size_t written = 0;
CHECK_FILE (fp, 0);
if (request == 0)
return 0;
_IO_acquire_lock (fp);
if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
written = _IO_sputn (fp, (const char *) buf, request);
_IO_release_lock (fp);
/* We have written all of the input in case the return value indicates
this or EOF is returned. The latter is a special case where we
simply did not manage to flush the buffer. But the data is in the
buffer and therefore written as far as fwrite is concerned. */
if (written == request || written == EOF)
return count;
else
return written / size;
}
size_t
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (const char *) data;
size_t to_do = n;
int must_flush = 0;
size_t count = 0;

if (n <= 0)
return 0;
/* This is an optimized implementation.
If the amount to be written straddles a block boundary
(or the filebuf is unbuffered), use sys_write directly. */

/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

/* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);

if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}

/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}
return n - to_do;
}
int
_IO_new_file_overflow (FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
/* Otherwise must be currently reading.
If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
logically slide the buffer forwards one block (by setting the
read pointers to all point at the beginning of the block). This
makes room for subsequent output.
Otherwise, set the read pointers to _IO_read_end (leaving that
alone, so it can continue to correspond to the external position). */
if (__glibc_unlikely (_IO_in_backup (f)))
{
size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
_IO_free_backup_area (f);
f->_IO_read_base -= MIN (nbackup,
f->_IO_read_base - f->_IO_buf_base);
f->_IO_read_ptr = f->_IO_read_base;
}

if (f->_IO_read_ptr == f->_IO_buf_end)
f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
f->_IO_write_ptr = f->_IO_read_ptr;
f->_IO_write_base = f->_IO_write_ptr;
f->_IO_write_end = f->_IO_buf_end;
f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;

f->_flags |= _IO_CURRENTLY_PUTTING;
if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
f->_IO_write_end = f->_IO_write_ptr;
}
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base);
if (f->_IO_write_ptr == f->_IO_buf_end ) /* Buffer is really full */
if (_IO_do_flush (f) == EOF)
return EOF;
*f->_IO_write_ptr++ = ch;
if ((f->_flags & _IO_UNBUFFERED)
|| ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
if (_IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base) == EOF)
return EOF;
return (unsigned char) ch;
}
int
_IO_new_do_write (FILE *fp, const char *data, size_t to_do)
{
return (to_do == 0
|| (size_t) new_do_write (fp, data, to_do) == to_do) ? 0 : EOF;
}
libc_hidden_ver (_IO_new_do_write, _IO_do_write)
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
{
size_t count;
if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do);
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}
size_t
_IO_default_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (char *) data;
size_t more = n;
if (more <= 0)
return 0;
for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}
return n - more;
}

参考文献

https://www.anquanke.com/post/id/177958

https://www.anquanke.com/post/id/177910

https://tttang.com/archive/1279/#toc_0x00