虛擬函式效率真的低嗎？

2021-02-17科學

要做比較，最起碼要有一個公平的比較場景。

說虛擬函式效率低，那要看跟誰比：如果跟普通函數比，編譯器對普通函數可以做預載入，可以做分支預測，可以做行內展開，當然虛擬函式慢。

但是，虛擬函式的RTTI是用在這個場景的嗎？你都能在編譯期知道實際呼叫哪個函數了，還叫作RTTI嗎？其實 如果編譯器在編譯時能夠辨識出虛擬函式的實際呼叫目標，一樣可以繞開虛表直接呼叫目標函數 ，這時候，甚至包括行內等最佳化手段也都是可以用的，和普通成員函數完全一樣的待遇。

所以，換一個比法：在實作同樣功能需求下，虛擬函式真的比其它實作RTTI的方式要慢嗎？

例如說我隨手摘兩個linux內核的數據結構，看看經典的純C程式碼是怎麽解決這類問題的：

struct file { union { struct llist_node fu_llist ; struct rcu_head fu_rcuhead ; } f_u ; struct path f_path ; struct inode * f_inode ; /* cached value */ const struct file_operations * f_op ; /* * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock ; enum rw_hint f_write_hint ; atomic_long_t f_count ; unsigned int f_flags ; fmode_t f_mode ; struct mutex f_pos_lock ; loff_t f_pos ; struct fown_struct f_owner ; const struct cred * f_cred ; struct file_ra_state f_ra ; u64 f_version ; #ifdef CONFIG_SECURITY void * f_security ; #endif /* needed for tty driver, and maybe others */ void * private_data ; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links ; struct list_head f_tfile_llink ; #endif /* #ifdef CONFIG_EPOLL */ struct address_space * f_mapping ; errseq_t f_wb_err ; errseq_t f_sb_err ; /* for syncfs */ } __randomize_layout __attribute__ (( aligned ( 4 ))); /* lest something weird decides that 2 is OK */

其中，file_operations的定義是這樣的：

struct file_operations { struct module * owner ; loff_t ( * llseek ) ( struct file * , loff_t , int ); ssize_t ( * read ) ( struct file * , char __user * , size_t , loff_t * ); ssize_t ( * write ) ( struct file * , const char __user * , size_t , loff_t * ); ssize_t ( * read_iter ) ( struct kiocb * , struct iov_iter * ); ssize_t ( * write_iter ) ( struct kiocb * , struct iov_iter * ); int ( * iopoll )( struct kiocb * kiocb , bool spin ); int ( * iterate ) ( struct file * , struct dir_context * ); int ( * iterate_shared ) ( struct file * , struct dir_context * ); __poll_t ( * poll ) ( struct file * , struct poll_table_struct * ); long ( * unlocked_ioctl ) ( struct file * , unsigned int , unsigned long ); long ( * compat_ioctl ) ( struct file * , unsigned int , unsigned long ); int ( * mmap ) ( struct file * , struct vm_area_struct * ); unsigned long mmap_supported_flags ; int ( * open ) ( struct inode * , struct file * ); int ( * flush ) ( struct file * , fl_owner_t id ); int ( * release ) ( struct inode * , struct file * ); int ( * fsync ) ( struct file * , loff_t , loff_t , int datasync ); int ( * fasync ) ( int , struct file * , int ); int ( * lock ) ( struct file * , int , struct file_lock * ); ssize_t ( * sendpage ) ( struct file * , struct page * , int , size_t , loff_t * , int ); unsigned long ( * get_unmapped_area )( struct file * , unsigned long , unsigned long , unsigned long , unsigned long ); int ( * check_flags )( int ); int ( * flock ) ( struct file * , int , struct file_lock * ); ssize_t ( * splice_write )( struct pipe_inode_info * , struct file * , loff_t * , size_t , unsigned int ); ssize_t ( * splice_read )( struct file * , loff_t * , struct pipe_inode_info * , size_t , unsigned int ); int ( * setlease )( struct file * , long , struct file_lock ** , void ** ); long ( * fallocate )( struct file * file , int mode , loff_t offset , loff_t len ); void ( * show_fdinfo )( struct seq_file * m , struct file * f ); #ifndef CONFIG_MMU unsigned ( * mmap_capabilities )( struct file * ); #endif ssize_t ( * copy_file_range )( struct file * , loff_t , struct file * , loff_t , size_t , unsigned int ); loff_t ( * remap_file_range )( struct file * file_in , loff_t pos_in , struct file * file_out , loff_t pos_out , loff_t len , unsigned int remap_flags ); int ( * fadvise )( struct file * , loff_t , loff_t , int ); } __randomize_layout ;

也就是說，在「一切皆檔」的linux內核裏，它的實作手段本質上和C++的vptr+vtable是完全等價的，照樣是查表跳轉。所謂的「虛擬函式慢」的因素，在這裏照樣一個不拉。

如果有誰不服氣，覺得linux在這裏的實作方案不夠精巧效率不夠高，可以自己提一個方案出來試試看？