要做比较,最起码要有一个公平的比较场景。
说虚函数效率低,那要看跟谁比:如果跟普通函数比,编译器对普通函数可以做预加载,可以做分支预测,可以做内联展开,当然虚函数慢。
但是,虚函数的RTTI是用在这个场景的吗?你都能在编译期知道实际调用哪个函数了,还叫作RTTI吗?其实 如果编译器在编译时能够识别出虚函数的实际调用目标,一样可以绕开虚表直接调用目标函数 ,这时候,甚至包括内联等优化手段也都是可以用的,和普通成员函数完全一样的待遇。
所以,换一个比法:在实现同样功能需求下,虚函数真的比其它实现RTTI的方式要慢吗?
例如说我随手摘两个linux内核的数据结构,看看经典的纯C代码是怎么解决这类问题的:
struct
file
{
union
{
struct
llist_node
fu_llist
;
struct
rcu_head
fu_rcuhead
;
}
f_u
;
struct
path
f_path
;
struct
inode
*
f_inode
;
/* cached value */
const
struct
file_operations
*
f_op
;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t
f_lock
;
enum
rw_hint
f_write_hint
;
atomic_long_t
f_count
;
unsigned
int
f_flags
;
fmode_t
f_mode
;
struct
mutex
f_pos_lock
;
loff_t
f_pos
;
struct
fown_struct
f_owner
;
const
struct
cred
*
f_cred
;
struct
file_ra_state
f_ra
;
u64
f_version
;
#ifdef CONFIG_SECURITY
void
*
f_security
;
#endif
/* needed for tty driver, and maybe others */
void
*
private_data
;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct
list_head
f_ep_links
;
struct
list_head
f_tfile_llink
;
#endif
/* #ifdef CONFIG_EPOLL */
struct
address_space
*
f_mapping
;
errseq_t
f_wb_err
;
errseq_t
f_sb_err
;
/* for syncfs */
}
__randomize_layout
__attribute__
((
aligned
(
4
)));
/* lest something weird decides that 2 is OK */
其中,file_operations的定义是这样的:
struct
file_operations
{
struct
module
*
owner
;
loff_t
(
*
llseek
)
(
struct
file
*
,
loff_t
,
int
);
ssize_t
(
*
read
)
(
struct
file
*
,
char
__user
*
,
size_t
,
loff_t
*
);
ssize_t
(
*
write
)
(
struct
file
*
,
const
char
__user
*
,
size_t
,
loff_t
*
);
ssize_t
(
*
read_iter
)
(
struct
kiocb
*
,
struct
iov_iter
*
);
ssize_t
(
*
write_iter
)
(
struct
kiocb
*
,
struct
iov_iter
*
);
int
(
*
iopoll
)(
struct
kiocb
*
kiocb
,
bool
spin
);
int
(
*
iterate
)
(
struct
file
*
,
struct
dir_context
*
);
int
(
*
iterate_shared
)
(
struct
file
*
,
struct
dir_context
*
);
__poll_t
(
*
poll
)
(
struct
file
*
,
struct
poll_table_struct
*
);
long
(
*
unlocked_ioctl
)
(
struct
file
*
,
unsigned
int
,
unsigned
long
);
long
(
*
compat_ioctl
)
(
struct
file
*
,
unsigned
int
,
unsigned
long
);
int
(
*
mmap
)
(
struct
file
*
,
struct
vm_area_struct
*
);
unsigned
long
mmap_supported_flags
;
int
(
*
open
)
(
struct
inode
*
,
struct
file
*
);
int
(
*
flush
)
(
struct
file
*
,
fl_owner_t
id
);
int
(
*
release
)
(
struct
inode
*
,
struct
file
*
);
int
(
*
fsync
)
(
struct
file
*
,
loff_t
,
loff_t
,
int
datasync
);
int
(
*
fasync
)
(
int
,
struct
file
*
,
int
);
int
(
*
lock
)
(
struct
file
*
,
int
,
struct
file_lock
*
);
ssize_t
(
*
sendpage
)
(
struct
file
*
,
struct
page
*
,
int
,
size_t
,
loff_t
*
,
int
);
unsigned
long
(
*
get_unmapped_area
)(
struct
file
*
,
unsigned
long
,
unsigned
long
,
unsigned
long
,
unsigned
long
);
int
(
*
check_flags
)(
int
);
int
(
*
flock
)
(
struct
file
*
,
int
,
struct
file_lock
*
);
ssize_t
(
*
splice_write
)(
struct
pipe_inode_info
*
,
struct
file
*
,
loff_t
*
,
size_t
,
unsigned
int
);
ssize_t
(
*
splice_read
)(
struct
file
*
,
loff_t
*
,
struct
pipe_inode_info
*
,
size_t
,
unsigned
int
);
int
(
*
setlease
)(
struct
file
*
,
long
,
struct
file_lock
**
,
void
**
);
long
(
*
fallocate
)(
struct
file
*
file
,
int
mode
,
loff_t
offset
,
loff_t
len
);
void
(
*
show_fdinfo
)(
struct
seq_file
*
m
,
struct
file
*
f
);
#ifndef CONFIG_MMU
unsigned
(
*
mmap_capabilities
)(
struct
file
*
);
#endif
ssize_t
(
*
copy_file_range
)(
struct
file
*
,
loff_t
,
struct
file
*
,
loff_t
,
size_t
,
unsigned
int
);
loff_t
(
*
remap_file_range
)(
struct
file
*
file_in
,
loff_t
pos_in
,
struct
file
*
file_out
,
loff_t
pos_out
,
loff_t
len
,
unsigned
int
remap_flags
);
int
(
*
fadvise
)(
struct
file
*
,
loff_t
,
loff_t
,
int
);
}
__randomize_layout
;
也就是说,在「一切皆文件」的linux内核里,它的实现手段本质上和C++的vptr+vtable是完全等价的,照样是查表跳转。所谓的「虚函数慢」的因素,在这里照样一个不拉。
如果有谁不服气,觉得linux在这里的实现方案不够精巧效率不够高,可以自己提一个方案出来试试看?