linux4.12内核 bridge简介
Bridge是什么?
Bridge(桥)是 Linux 上用来做 TCP/IP 二层协议交换的设备,与现实世界中的交换机功能相似。Bridge 设备实例可以和 Linux 上其他网络设备实例连接,既 attach 一个从设备,类似于在现实世界中的交换机和一个用户终端之间连接一根网线。当有数据到达时,Bridge 会根据报文中的 MAC 信息进行广播、转发、丢弃处理。
Bridge 设备工作过程 。
Bridge重要数据结构
structnet_bridge
{
spinlock_t lock;
spinlock_t hash_lock;
struct list_head port_list;
struct net_device *dev;
struct pcpu_sw_netstats __percpu *stats;
/* These fields are accessed on eachpacket */
#ifdefCONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
u8 vlan_stats_enabled;
__be16 vlan_proto;
u16 default_pvid;
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct hlist_head hash[BR_HASH_SIZE];
#ifIS_ENABLED(CONFIG_BRIDGE_NETFILTER)
union {
struct rtable fake_rtable;
struct rt6_info fake_rt6_info;
};
bool nf_call_iptables;
bool nf_call_ip6tables;
bool nf_call_arptables;
#endif
u16 group_fwd_mask;
u16 group_fwd_mask_required;
/* STP */
bridge_id designated_root;
bridge_id bridge_id;
u32 root_path_cost;
unsigned char topology_change;
unsigned char topology_change_detected;
u16 root_port;
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
unsigned long ageing_time;
unsigned long bridge_max_age;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
unsigned long bridge_ageing_time;
u8 group_addr[ETH_ALEN];
bool group_addr_set;
enum {
BR_NO_STP, /* no spanning tree */
BR_KERNEL_STP, /* old STP in kernel */
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
#ifdefCONFIG_BRIDGE_IGMP_SNOOPING
unsigned char multicast_router;
u8 multicast_disabled:1;
u8 multicast_querier:1;
u8 multicast_query_use_ifaddr:1;
u8 has_ipv6_addr:1;
u8 multicast_stats_enabled:1;
u32 hash_elasticity;
u32 hash_max;
u32 multicast_last_member_count;
u32 multicast_startup_query_count;
u8 multicast_igmp_version;
unsigned long multicast_last_member_interval;
unsigned long multicast_membership_interval;
unsigned long multicast_querier_interval;
unsigned long multicast_query_interval;
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
spinlock_t multicast_lock;
struct net_bridge_mdb_htable __rcu *mdb;
struct hlist_head router_list;
struct timer_list multicast_router_timer;
struct bridge_mcast_other_query ip4_other_query;
struct bridge_mcast_own_query ip4_own_query;
struct bridge_mcast_querier ip4_querier;
struct bridge_mcast_stats __percpu *mcast_stats;
#ifIS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_other_query ip6_other_query;
struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
u8 multicast_mld_version;
#endif/* IS_ENABLED(CONFIG_IPV6) */
#endif
struct timer_list hello_timer;
struct timer_list tcn_timer;
struct timer_list topology_change_timer;
struct delayed_work gc_work;
struct kobject *ifobj;
u32 auto_cnt;
#ifdefCONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
}
/*此结构是桥下面的port结构*/
structnet_bridge_port
{
struct net_bridge *br; /*指向所属的桥*/
struct net_device *dev; /*指向本port对应的实际网络设备*/
struct list_head list; /* port双向循环链表 */
/* STP */ /*STP相关 */
u8 priority;
u8 state;
u16 port_no; /* port编号,1-1023*/
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
unsigned long flags;
#defineBR_HAIRPIN_MODE 0x00000001
#ifdefCONFIG_BRIDGE_IGMP_SNOOPING
u32 multicast_startup_queries_sent;
unsigned char multicast_router;
struct timer_list multicast_router_timer;
struct timer_list multicast_query_timer;
struct hlist_head mglist;
struct hlist_node rlist;
#endif
#ifdefCONFIG_SYSFS
char sysfs_name[IFNAMSIZ];
#endif
};
/*转发表项结构*/
structnet_bridge_fdb_entry
{
struct hlist_node hlist;
struct net_bridge_port *dst; /*目的MAC对应的出口port*/
struct rcu_head rcu;
unsigned long ageing_timer; /* 超时值*/
mac_addr addr; /*目的MAC*/
unsigned char is_local; /* addr是本地网口的MAC*/
unsigned char is_static; /*静态的转发表项,不会超时,所有的本地转发表项都是静态的*/
};
Bridge数据结构关系图
Bridge模块初始化
在br_init函数中初始化了一个 钩子函数
br_ioctl_hook= br_ioctl_deviceless_stub; /* 用来处理用户层的ioctl的函数,主要是添加/删除桥等操作*/
Bridge的添加和删除
1. 添加Port的添加和删除
br_ioctl_deviceless_stub->br_add_bridge
2. 删除
br_ioctl_deviceless_stub->br_del_bridge
1. 添加
rtnetlink_rcv_msg-> br_add_slave-> br_add_if
br_add_if主要步骤
new_nbp;/*初始化port的相关成员,并将port添加到br的port链表中*/
br_fdb_insert/*根据port创建一个新的转发表项,并加入到br的转发表中*/
netdev_rx_handler_register/*将桥端口收包处理函数br_handle_frame 注册至其对应dev中*/
2. 删除
rtnetlink_rcv_msg ->br_dev_slave-> br_add_if
Bridge接收和转发报文流程
图-1 Bridge报文接收和转发处理
1. 在dev.c的__netif_receive_skb_core函数中会调用skb->dev->rx_handler,即br_handle_frame函数
2. 在br_handle_frame中首先做一些常规检测
3. 如果收包port是学习和转发状态,则调用br_fdb_update在port对应的桥的转发表中建立转发项,转发项的addr是源mac地址,这样下次如果有到此地址的数据则通过查询转发表直接从对应的port发送即可
4. 如果的状态是转发状态并且需要处理桥上的钩子,然后调用br_handle_frame_finish完成转发
5. 在br_handle_frame_finish中,如果桥对应的虚拟设备是混杂模式,调用br_pass_frame_up将skb的dev改为桥的虚拟设备,skb的包类型改为到本地的包,然后调用netif_receive_skb重新进入收包流程
6. 如果是多播包则调用br_multicast_flood进行多播。
7. 通过查找转发表,如果是到本机的包则送到本机,如果不是到本机的包则调用br_forward完成转发
8. 在br_forward中会调用NF_BR_FORWARD链上注册的hook函数,在该hook函数里会调用br_nf_forward_ip,再调用NF_INET_FORWARD上注册的hook函数。9. 如果在单播转发表没有找到对应的转发项或者多播没有找到对应的多播转发项列表或者广播则调用br_flood_forward进行广播。
Bridge发送报文流程
1. 当要发送数据时会调用dev设备的hard_start_xmit函数,从而调用桥设备的br_dev_xmit
2. 在br_dev_xmit中会根据目的地址判断,如果是单播包就根据目的mac在转发表中查找对应的转发项,查到到转发项调用br_forward,并更改源MAC地址为出口port的MAC,skb的dev改为出口port的dev调用__dev_queue_push_xmit发送,最终再次调用dev的hard_start_xmit发送。
3. 如果是多播则查找多播组转发项列表,查找到则调用br_multicast_flood多播发送。4. 如果是广播或者单播未找到转发项或者多播未找到转发项列表,则调用br_flood发送。