要做比較,最起碼要有一個公平的比較場景。
說虛擬函式效率低,那要看跟誰比:如果跟普通函數比,編譯器對普通函數可以做預載入,可以做分支預測,可以做行內展開,當然虛擬函式慢。
但是,虛擬函式的RTTI是用在這個場景的嗎?你都能在編譯期知道實際呼叫哪個函數了,還叫作RTTI嗎?其實 如果編譯器在編譯時能夠辨識出虛擬函式的實際呼叫目標,一樣可以繞開虛表直接呼叫目標函數 ,這時候,甚至包括行內等最佳化手段也都是可以用的,和普通成員函數完全一樣的待遇。
所以,換一個比法:在實作同樣功能需求下,虛擬函式真的比其它實作RTTI的方式要慢嗎?
例如說我隨手摘兩個linux內核的數據結構,看看經典的純C程式碼是怎麽解決這類問題的:
struct
file
{
union
{
struct
llist_node
fu_llist
;
struct
rcu_head
fu_rcuhead
;
}
f_u
;
struct
path
f_path
;
struct
inode
*
f_inode
;
/* cached value */
const
struct
file_operations
*
f_op
;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t
f_lock
;
enum
rw_hint
f_write_hint
;
atomic_long_t
f_count
;
unsigned
int
f_flags
;
fmode_t
f_mode
;
struct
mutex
f_pos_lock
;
loff_t
f_pos
;
struct
fown_struct
f_owner
;
const
struct
cred
*
f_cred
;
struct
file_ra_state
f_ra
;
u64
f_version
;
#ifdef CONFIG_SECURITY
void
*
f_security
;
#endif
/* needed for tty driver, and maybe others */
void
*
private_data
;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct
list_head
f_ep_links
;
struct
list_head
f_tfile_llink
;
#endif
/* #ifdef CONFIG_EPOLL */
struct
address_space
*
f_mapping
;
errseq_t
f_wb_err
;
errseq_t
f_sb_err
;
/* for syncfs */
}
__randomize_layout
__attribute__
((
aligned
(
4
)));
/* lest something weird decides that 2 is OK */
其中,file_operations的定義是這樣的:
struct
file_operations
{
struct
module
*
owner
;
loff_t
(
*
llseek
)
(
struct
file
*
,
loff_t
,
int
);
ssize_t
(
*
read
)
(
struct
file
*
,
char
__user
*
,
size_t
,
loff_t
*
);
ssize_t
(
*
write
)
(
struct
file
*
,
const
char
__user
*
,
size_t
,
loff_t
*
);
ssize_t
(
*
read_iter
)
(
struct
kiocb
*
,
struct
iov_iter
*
);
ssize_t
(
*
write_iter
)
(
struct
kiocb
*
,
struct
iov_iter
*
);
int
(
*
iopoll
)(
struct
kiocb
*
kiocb
,
bool
spin
);
int
(
*
iterate
)
(
struct
file
*
,
struct
dir_context
*
);
int
(
*
iterate_shared
)
(
struct
file
*
,
struct
dir_context
*
);
__poll_t
(
*
poll
)
(
struct
file
*
,
struct
poll_table_struct
*
);
long
(
*
unlocked_ioctl
)
(
struct
file
*
,
unsigned
int
,
unsigned
long
);
long
(
*
compat_ioctl
)
(
struct
file
*
,
unsigned
int
,
unsigned
long
);
int
(
*
mmap
)
(
struct
file
*
,
struct
vm_area_struct
*
);
unsigned
long
mmap_supported_flags
;
int
(
*
open
)
(
struct
inode
*
,
struct
file
*
);
int
(
*
flush
)
(
struct
file
*
,
fl_owner_t
id
);
int
(
*
release
)
(
struct
inode
*
,
struct
file
*
);
int
(
*
fsync
)
(
struct
file
*
,
loff_t
,
loff_t
,
int
datasync
);
int
(
*
fasync
)
(
int
,
struct
file
*
,
int
);
int
(
*
lock
)
(
struct
file
*
,
int
,
struct
file_lock
*
);
ssize_t
(
*
sendpage
)
(
struct
file
*
,
struct
page
*
,
int
,
size_t
,
loff_t
*
,
int
);
unsigned
long
(
*
get_unmapped_area
)(
struct
file
*
,
unsigned
long
,
unsigned
long
,
unsigned
long
,
unsigned
long
);
int
(
*
check_flags
)(
int
);
int
(
*
flock
)
(
struct
file
*
,
int
,
struct
file_lock
*
);
ssize_t
(
*
splice_write
)(
struct
pipe_inode_info
*
,
struct
file
*
,
loff_t
*
,
size_t
,
unsigned
int
);
ssize_t
(
*
splice_read
)(
struct
file
*
,
loff_t
*
,
struct
pipe_inode_info
*
,
size_t
,
unsigned
int
);
int
(
*
setlease
)(
struct
file
*
,
long
,
struct
file_lock
**
,
void
**
);
long
(
*
fallocate
)(
struct
file
*
file
,
int
mode
,
loff_t
offset
,
loff_t
len
);
void
(
*
show_fdinfo
)(
struct
seq_file
*
m
,
struct
file
*
f
);
#ifndef CONFIG_MMU
unsigned
(
*
mmap_capabilities
)(
struct
file
*
);
#endif
ssize_t
(
*
copy_file_range
)(
struct
file
*
,
loff_t
,
struct
file
*
,
loff_t
,
size_t
,
unsigned
int
);
loff_t
(
*
remap_file_range
)(
struct
file
*
file_in
,
loff_t
pos_in
,
struct
file
*
file_out
,
loff_t
pos_out
,
loff_t
len
,
unsigned
int
remap_flags
);
int
(
*
fadvise
)(
struct
file
*
,
loff_t
,
loff_t
,
int
);
}
__randomize_layout
;
也就是說,在「一切皆檔」的linux內核裏,它的實作手段本質上和C++的vptr+vtable是完全等價的,照樣是查表跳轉。所謂的「虛擬函式慢」的因素,在這裏照樣一個不拉。
如果有誰不服氣,覺得linux在這裏的實作方案不夠精巧效率不夠高,可以自己提一個方案出來試試看?