虚函数效率真的低吗？

2021-02-17科学

要做比较，最起码要有一个公平的比较场景。

说虚函数效率低，那要看跟谁比：如果跟普通函数比，编译器对普通函数可以做预加载，可以做分支预测，可以做内联展开，当然虚函数慢。

但是，虚函数的RTTI是用在这个场景的吗？你都能在编译期知道实际调用哪个函数了，还叫作RTTI吗？其实 如果编译器在编译时能够识别出虚函数的实际调用目标，一样可以绕开虚表直接调用目标函数 ，这时候，甚至包括内联等优化手段也都是可以用的，和普通成员函数完全一样的待遇。

所以，换一个比法：在实现同样功能需求下，虚函数真的比其它实现RTTI的方式要慢吗？

例如说我随手摘两个linux内核的数据结构，看看经典的纯C代码是怎么解决这类问题的：

struct file { union { struct llist_node fu_llist ; struct rcu_head fu_rcuhead ; } f_u ; struct path f_path ; struct inode * f_inode ; /* cached value */ const struct file_operations * f_op ; /* * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock ; enum rw_hint f_write_hint ; atomic_long_t f_count ; unsigned int f_flags ; fmode_t f_mode ; struct mutex f_pos_lock ; loff_t f_pos ; struct fown_struct f_owner ; const struct cred * f_cred ; struct file_ra_state f_ra ; u64 f_version ; #ifdef CONFIG_SECURITY void * f_security ; #endif /* needed for tty driver, and maybe others */ void * private_data ; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links ; struct list_head f_tfile_llink ; #endif /* #ifdef CONFIG_EPOLL */ struct address_space * f_mapping ; errseq_t f_wb_err ; errseq_t f_sb_err ; /* for syncfs */ } __randomize_layout __attribute__ (( aligned ( 4 ))); /* lest something weird decides that 2 is OK */

其中，file_operations的定义是这样的：

struct file_operations { struct module * owner ; loff_t ( * llseek ) ( struct file * , loff_t , int ); ssize_t ( * read ) ( struct file * , char __user * , size_t , loff_t * ); ssize_t ( * write ) ( struct file * , const char __user * , size_t , loff_t * ); ssize_t ( * read_iter ) ( struct kiocb * , struct iov_iter * ); ssize_t ( * write_iter ) ( struct kiocb * , struct iov_iter * ); int ( * iopoll )( struct kiocb * kiocb , bool spin ); int ( * iterate ) ( struct file * , struct dir_context * ); int ( * iterate_shared ) ( struct file * , struct dir_context * ); __poll_t ( * poll ) ( struct file * , struct poll_table_struct * ); long ( * unlocked_ioctl ) ( struct file * , unsigned int , unsigned long ); long ( * compat_ioctl ) ( struct file * , unsigned int , unsigned long ); int ( * mmap ) ( struct file * , struct vm_area_struct * ); unsigned long mmap_supported_flags ; int ( * open ) ( struct inode * , struct file * ); int ( * flush ) ( struct file * , fl_owner_t id ); int ( * release ) ( struct inode * , struct file * ); int ( * fsync ) ( struct file * , loff_t , loff_t , int datasync ); int ( * fasync ) ( int , struct file * , int ); int ( * lock ) ( struct file * , int , struct file_lock * ); ssize_t ( * sendpage ) ( struct file * , struct page * , int , size_t , loff_t * , int ); unsigned long ( * get_unmapped_area )( struct file * , unsigned long , unsigned long , unsigned long , unsigned long ); int ( * check_flags )( int ); int ( * flock ) ( struct file * , int , struct file_lock * ); ssize_t ( * splice_write )( struct pipe_inode_info * , struct file * , loff_t * , size_t , unsigned int ); ssize_t ( * splice_read )( struct file * , loff_t * , struct pipe_inode_info * , size_t , unsigned int ); int ( * setlease )( struct file * , long , struct file_lock ** , void ** ); long ( * fallocate )( struct file * file , int mode , loff_t offset , loff_t len ); void ( * show_fdinfo )( struct seq_file * m , struct file * f ); #ifndef CONFIG_MMU unsigned ( * mmap_capabilities )( struct file * ); #endif ssize_t ( * copy_file_range )( struct file * , loff_t , struct file * , loff_t , size_t , unsigned int ); loff_t ( * remap_file_range )( struct file * file_in , loff_t pos_in , struct file * file_out , loff_t pos_out , loff_t len , unsigned int remap_flags ); int ( * fadvise )( struct file * , loff_t , loff_t , int ); } __randomize_layout ;

也就是说，在「一切皆文件」的linux内核里，它的实现手段本质上和C++的vptr+vtable是完全等价的，照样是查表跳转。所谓的「虚函数慢」的因素，在这里照样一个不拉。

如果有谁不服气，觉得linux在这里的实现方案不够精巧效率不够高，可以自己提一个方案出来试试看？