buffer size 用户传入的buffer size大小
struct file_ra_state *ra
unsigned long max_pages = ra->ra_pages;
/*
* oversize read
*/
if (req_size > max_pages)
goto initial_readahead;
readahead window
根据bdi→ra_pages、bdi→io_pages、buffer size等几个参数会得到一个max_pages上限值,这个值就决定了最终预读的窗口的上限(ondemand_readahead会用到这个函数)。计算公式如下:
/*
* Set the initial window size, round to next power of 2 and square
* for small size, x 4 for medium, and x 2 for large
* for 128k (32 page) max ra
* 1-8 page = 32k initial, > 8 page = 128k initial
*/
static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
{
unsigned long newsize = roundup_pow_of_two(size);
if (newsize <= max / 32)
newsize = newsize * 4;
else if (newsize <= max / 4)
newsize = newsize * 2;
else
newsize = max;
return newsize;
}
ra->start = offset;
ra->size = get_init_ra_size(req_size, max_pages);
ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
总的来说原则就是,buffer size小的时候,放大四倍,对于大一点的buffer size则是放大二倍,再大一点的就是直接用max_pages了。
bdi→ra_pages 每一个后段设备都会有一个预读大小,如果是物理设备则可以通过sysfs接口调整,默认128KB,fuse则是通过FUSE_INIT来设置
# s_bdi->ra_pages 默认是128K、ra_pages是fuse daemon返回的,两者取最小值,因此这@# 里最大就是128K了
sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
fc->sb->s_bdi->ra_pages =
min(fc->sb->s_bdi->ra_pages, ra_pages);
这个值影响了预读的行为,当我们通过fadvise调整预读窗口的时候,会根据bdi→ra_pages来调整,调整规则如下:
case POSIX_FADV_NORMAL:
file->f_ra.ra_pages = bdi->ra_pages;
case POSIX_FADV_SEQUENTIAL:
file->f_ra.ra_pages = bdi->ra_pages * 2;
bdi→io_pages 设备最大允许的io pages大小,默认是128K,物理设置在调整sectors时会调整io pages,fuse这种虚拟设备io pages为0
q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
上面四个参数决定了实际下发到后段设备的io size,算法如下:
根据buffer size、max_pages、bdi→io_pages来计算最终的max_pages
file_ra_state_init(struct file_ra_state *ra,
struct address_space *mapping)
{
ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
ra->prev_pos = -1;
}
struct file_ra_state *ra
// 初始默认是128K, 物理设备可以调整ra_pages,也可以通过fadvise来调整
unsigned long max_pages = ra->ra_pages;
/*
* If the request exceeds the readahead window, allow the read to
* be up to the optimal hardware IO size
*/
if (req_size > max_pages && bdi->io_pages > max_pages)
max_pages = min(req_size, bdi->io_pages);
可以看到当buffer size超过128K的时候,并且io_pages也大于128K的情况,最终从io_pages和buffer size两者之间取最小值,因此这里的max_pages最大不会超过io_pages的大小
get_init_ra_size 根据buffer size和max_pages(上面计算出来的max_pages)计算初始预读窗口大小,上限就是max_pages,根据buffer size的大小来决定,上限就是max了。
/*
* Set the initial window size, round to next power of 2 and square
* for small size, x 4 for medium, and x 2 for large
* for 128k (32 page) max ra
* 1-8 page = 32k initial, > 8 page = 128k initial
*/
static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
{
unsigned long newsize = roundup_pow_of_two(size);
if (newsize <= max / 32)
newsize = newsize * 4;
else if (newsize <= max / 4)
newsize = newsize * 2;
else
newsize = max;
return newsize;
}
这个点我们可以调整的就是bdi→ra_pages的值,或者是通过fadvise来调整了,可以调整max的上限值,buffer size无论多大都不影响这里的max上限,只会影响下限。对于fuse来说,bdi→ra_pages是没办法调整的,最大就是128K。原因如下:
# s_bdi->ra_pages 默认是128K、ra_pages是fuse daemon返回的,两者取最小值,
# 因此这里最大就是128K了
sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
fc->sb->s_bdi->ra_pages =
min(fc->sb->s_bdi->ra_pages, ra_pages);