NAT44-ED features及节点图

VPP配置SNAT,内网接口GigabitEthernet2/0/0,外网接口GigabitEthernet3/0/0,NAT之后的地址为GigabitEthernet3/0/0接口的地址。

vpp# set interface state GigabitEthernet2/0/0 up
vpp# set interface state GigabitEthernet3/0/0 up
vpp#
vpp# set interface ip address GigabitEthernet2/0/0 30.1.1.1/24
vpp# set interface ip address GigabitEthernet3/0/0 192.168.1.203/24
vpp#
vpp# nat44 plugin enable
vpp# set interface nat44 in GigabitEthernet2/0/0 out GigabitEthernet3/0/0
vpp# nat44 add interface address GigabitEthernet3/0/0

NAT44-ED插件使用的feature和node节点如下。

ip4-output:
  ip4-sv-reassembly-output-feature 
  nat44-ed-in2out-output 
  nat44-in2out-output-worker-handoff  
  nat-pre-in2out-output  

ip4-unicast:
  ip4-sv-reassembly-feature 
  nat44-ed-out2in
  nat44-ed-in2out
  nat44-out2in-worker-handoff   
  nat44-in2out-worker-handoff 
  nat44-handoff-classify 
  nat44-ed-classify
  nat-pre-out2in 
  nat-pre-in2out

nat44-ed处理节点流程如下。VPP配置了多个worker线程的话,需要nat44-in2out-worker-handoff和nat44-out2in-worker-handoff节点;否则,使用nat-pre-in2out和nat-pre-out2in节点。

         |-----------------------|
         | ip4-input-no-checksum |
         |-----------------------|
                    |
      |---------------------------|      GigabitEthernet3/0/0     |-----------------------------|
      | ip4-sv-reassembly-feature |-------------------------------| nat44-out2in-worker-handoff | 
      |                           |                               |     /  nat-pre-out2in       |
      |---------------------------|                               |-----------------------------|
GigabitEthernet2/0/0 |                                                          |
      |-----------------------------|                                   |-----------------|
      | nat44-in2out-worker-handoff |                                   | nat44-ed-out2in |
      |     /  nat-pre-in2out       |                                   |                 |
      |-----------------------------|                                   |-----------------|
                   |                                                            |
          |-------------------|  nonexist |----------------------------|        |
          |  nat44-ed-in2out  |-----------|  nat44-ed-in2out-slowpath  |        |
          |-------------------|           |----------------------------|        |
existing session   |                                   |                        |
                   |                                   |                        |
           |----------------|                          |                        |
           |   ip4-lookup   |--------------------------|------------------------|
           |----------------|

一. 报文重组功能

IP报文分片之后,仅在第一个分片中可见4层的端口号信息,对于后续分片由于没有端口号,将不能进行NAT操作,所以需要IP报文重组功能。在ARC集合ip4-unicast中,nat-pre-in2out和nat-pre-out2in都是位于ip4-sv-reassembly-feature之后,报文先经过重组处理。

VNET_FEATURE_INIT (nat_pre_in2out, static) = {
  .arc_name = "ip4-unicast",
  .node_name = "nat-pre-in2out",
  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
                   "ip4-sv-reassembly-feature"),
};
VNET_FEATURE_INIT (nat_pre_out2in, static) = {
  .arc_name = "ip4-unicast",
  .node_name = "nat-pre-out2in",
  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
                               "ip4-dhcp-client-detect",
                   "ip4-sv-reassembly-feature"),
};

节点nat_pre_nat的下一个节点索引默认为NAT_NEXT_IN2OUT_ED_FAST_PATH,对应于节点nat44-ed-in2out。

VLIB_NODE_FN (nat_pre_in2out_node)
  (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  return nat_pre_node_fn_inline (vm, node, frame,
                 NAT_NEXT_IN2OUT_ED_FAST_PATH);
}

在函数nat_pre_node_fn_inline中,主要的工作就是记录ARC集合的下一个节点索引,记录在报文接口的nat.arc_next中,以便在NAT44模块处理完成之后,继续ARC集中的处理流程。通常情况下一个feature为ip4-lookup。

static inline uword
nat_pre_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
            vlib_frame_t *frame, u32 def_next)
{
  while (n_left_from > 0) {
      vlib_buffer_t *b0;
      b0 = *b;
      b++;

      next0 = def_next;
      vnet_feature_next (&arc_next0, b0);
      vnet_buffer2 (b0)->nat.arc_next = arc_next0;

      n_left_from--;
      next[0] = next0;
      next++;
  }

二. NAT44模块使能

对应命令nat44 plugin enable。如果VPP配置的worker线程数量大于1,需要创建frame_queue队列,在worker线程之间传递报文,保证同一个会话流在同一个worker上处理,避免使用锁。内网接口和外网接口分别注册fq_in2out_index和fq_out2in_index为索引的队列,对应node节点nat44-ed-in2out和nat44-ed-out2in。

int nat44_plugin_enable (nat44_config_t c)
{
  snat_main_t *sm = &snat_main;

  if (sm->num_workers > 1) { 
      vlib_main_t *vm = vlib_get_main ();
      vlib_node_t *node;
      
      if (sm->fq_in2out_index == ~0) { 
        node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
        sm->fq_in2out_index = vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
      } 
      if (sm->fq_out2in_index == ~0) { 
        node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
        sm->fq_out2in_index = vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
      } 

三. NAT44接口

设置接口的inside/outside属性。首先,判断如果接口已经在nat44的接口中,并且接口的inside/outside属性没有改变,结束处理。

int nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
{
  snat_main_t *sm = &snat_main;
  nat_fib_t *outside_fib;
  snat_interface_t *i;

  i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
  if (i){
      if ((nat44_ed_is_interface_inside (i) && is_inside) ||
          (nat44_ed_is_interface_outside (i) && !is_inside))
        return 0;

否则,这里是要修改接口的inside/outside属性,分两种情况。第一,worker线程数量大于1,接口从inside修改为outside,需要删除nat44-in2out-worker-handoff;否则,删除nat44-out2in-worker-handoff。

第二,worker线程小于等于1,接口从inside修改为outside,需要删除nat-pre-in2out,否则,删除nat-pre-out2in。

最后,开启ip4-sv-reassembly-feature,如果已经开启,增加引用计数。并且,删除记录下来的接口旧的feature,使能新的feature。以上提到的这些feature都位于ARC集合ip4-unicast。

      if (sm->num_workers > 1) {
        del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" :
                      "nat44-out2in-worker-handoff";
        feature_name = "nat44-handoff-classify";
      } else {
        del_feature_name = !is_inside ? "nat-pre-in2out" : "nat-pre-out2in";

        feature_name = "nat44-ed-classify";
      }
      rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
      if (rv)
        return rv;
      vnet_feature_enable_disable ("ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0);
      vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
    }

以下处理nat44模块中从未添加过此接口的情况。在worker线程数量大于1时,对于inside接口,使能nat44-in2out-worker-handoff;对于outside接口,使能nat44-out2in-worker-handoff。

使能报文重组feature:ip4-sv-reassembly-feature。将接口添加到nat44模块的接口池中(sm->interfaces)。

  else {
    if (sm->num_workers > 1)
      feature_name = is_inside ? "nat44-in2out-worker-handoff" : "nat44-out2in-worker-handoff";
    else
      feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";

      nat_validate_interface_counters (sm, sw_if_index);
      rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
      if (rv) return rv;
      vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
      pool_get (sm->interfaces, i);
      i->sw_if_index = sw_if_index;
      i->flags = 0;
    }

如下命令显示nat44模块的接口池内容:

vpp# show nat44 interfaces
NAT44 interfaces:
 GigabitEthernet2/0/0 in
 GigabitEthernet3/0/0 out

四. nat44添加地址

分配snat_address_resolve_t地址结构,记录接口索引等。

int 
nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat)
{   
  snat_main_t *sm = &snat_main;
  ip4_main_t *ip4_main = sm->ip4_main;
  ip4_address_t *first_int_addr;
  snat_address_resolve_t *ap;

  vec_add2 (sm->addr_to_resolve, ap, 1);
  ap->sw_if_index = sw_if_index;
  ap->is_twice_nat = twice_nat;
  ap->is_resolved = 0;

获取接口的首个IP地址,添加到nat44的地址池中。添加成功之后,设置is_resolved为真。

  first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
  if (first_int_addr) {
    rv = nat44_ed_add_address (first_int_addr, ~0, twice_nat);
    if (0 != rv) {
      nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat);
      return rv;
    }
    ap->is_resolved = 1;
  }

这里不涉及两次NAT的情况,twice_nat为false。添加的地址保存在nat44模块的addresses地址向量中,先检测其中是否已有此地址,避免重复添加。

int
nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat)
{
  snat_main_t *sm = &snat_main;
  snat_address_t *ap, *addresses;

  addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;

  // check if address already exists
  vec_foreach (ap, addresses) {
      if (ap->addr.as_u32 == addr->as_u32) {
          nat_log_err ("address exist");
          return VNET_API_ERROR_VALUE_EXIST;
      }
  }

在addresses向量中分配新成员,填入新地址。

  if (twice_nat)
      vec_add2 (sm->twice_nat_addresses, ap, 1);
  else
      vec_add2 (sm->addresses, ap, 1);

  ap->addr_len = ~0;
  ap->fib_index = ~0;
  ap->addr = *addr;

五. 节点流程

测试VPP配置了2个worker线程。如下,thread2在inside接口GigabitEthernet2/0/0接收到ICMP请求报文,不是IP分片,节点nat44-in2out-worker-handoff通过处理,将报文handoff到thread1。

------------------- Start of thread 2 vpp_wk_1 -------------------
Packet 2
00:06:05:665438: dpdk-input
  GigabitEthernet2/0/0 rx queue 1
  ICMP: 30.1.1.2 -> 192.168.12.254
  ICMP echo_request checksum 0x4bde id 1
00:06:05:665440: ethernet-input
  IP4: 50:7b:9d:c7:03:73 -> 00:60:e0:65:b5:c7
00:06:05:665440: ip4-input-no-checksum
  ICMP: 30.1.1.2 -> 192.168.12.254
  ICMP echo_request checksum 0x4bde id 1
00:06:05:665441: ip4-sv-reassembly-feature
  [not-fragmented]
00:06:05:665441: nat44-in2out-worker-handoff
    fragment id 0x3b72
  NAT44_IN2OUT_WORKER_HANDOFF : next-worker 1 trace index 1

以上通过frame-queue:fq_in2out_index,将报文handoff到了thread1的nat44-ed-in2out节点。对于第一个报文,session并不存在,需要进入慢速节点处理:nat44-ed-in2out-slowpath。

------------------- Start of thread 1 vpp_wk_0 -------------------
Packet 2
00:06:05:665444: handoff_trace
  HANDED-OFF: from thread 2 trace index 1
00:06:05:665444: nat44-ed-in2out
  NAT44_IN2OUT_ED_FAST_PATH: sw_if_index 5, next index 3
  search key local 30.1.1.2:1 remote 192.168.12.254:1 proto ICMP fib 0 thread-index 0 session-index 0
00:06:05:665447: nat44-ed-in2out-slowpath
  NAT44_IN2OUT_ED_SLOW_PATH: sw_if_index 5, next index 10, session 4, translation result 'success' via i2of
  i2of match: saddr 30.1.1.2 sport 1 daddr 192.168.12.254 dport 1 proto ICMP fib_idx 0 rewrite: saddr 192.168.12.12 daddr 192.168.12.254 icmp-id 28931 txfib 0
  o2if match: saddr 192.168.12.254 sport 28931 daddr 192.168.12.12 dport 28931 p
roto ICMP fib_idx 0 rewrite: daddr 30.1.1.2 icmp-id 1 txfib 0
00:06:05:665453: ip4-lookup
  fib 0 dpo-idx 3 flow hash: 0x00000000
  ICMP: 192.168.12.12 -> 192.168.12.254

另外,对于由外到内的报文,即由GigabitEthernet3/0/0接口接收的报文。节点nat44-out2in-worker-handoff匹配到session,并且session位于thread1,即当前的thread,不需要handoff处理;也不需要进入慢速路径节点nat44-ed-out2in-slowpath。

------------------- Start of thread 1 vpp_wk_0 -------------------
Packet 14
00:06:10:263962: ip4-input-no-checksum
  ICMP: 192.168.12.254 -> 192.168.12.12
  ICMP echo_reply checksum 0xe2da id 28931
00:06:10:263962: ip4-sv-reassembly-feature
  [not-fragmented]
00:06:10:263963: nat44-out2in-worker-handoff
  NAT44_OUT2IN_WORKER_HANDOFF : next-worker 1 trace index 13
00:06:10:263965: nat44-ed-out2in
  NAT44_OUT2IN_ED_FAST_PATH: sw_if_index 6, next index 10, session 4, translation result 'success' via o2if
  i2of match: saddr 30.1.1.2 sport 1 daddr 192.168.12.254 dport 1 proto ICMP fib_idx 0 rewrite: saddr 192.168.12.12 daddr 192.168.12.254 icmp-id 28931 txfib 0
  o2if match: saddr 192.168.12.254 sport 28931 daddr 192.168.12.12 dport 28931 p
roto ICMP fib_idx 0 rewrite: daddr 30.1.1.2 icmp-id 1 txfib 0
  search key local 192.168.12.254:28931 remote 192.168.12.12:28931 proto ICMP fib 0 thread-index 0 session-index 0
  no reason for slow path
00:06:10:263966: ip4-lookup
  fib 0 dpo-idx 4 flow hash: 0x00000000
  ICMP: 192.168.12.254 -> 30.1.1.2
  ICMP echo_reply checksum 0x53dd id 1
00:06:10:263967: ip4-rewrite
  tx_sw_if_index 5 dpo-idx 4 : ipv4 via 30.1.1.2 GigabitEthernet2/0/0: mtu:9000 next:3 flags:[] 507b9dc703730060e065b5c70800 flow hash: 0x00000000
  00000000: 507b9dc703730060e065b5c708004500003cb15400003f01ddc3c0a80cfe1e01
  00000020: 0102000053dd0001017e6162636465666768696a6b6c6d6e6f707172
00:06:10:263968: GigabitEthernet2/0/0-output

对于由GigabitEthernet2/0/0接口接收的第二个icmp请求报文,由于session已经存在,也不再需要进入慢速路径节点。由于报文被网卡送到了thread2线程,这里显示为handoff之后的处理。

------------------- Start of thread 1 vpp_wk_0 -------------------
Packet 13
00:06:10:263324: handoff_trace
  HANDED-OFF: from thread 2 trace index 5
00:06:10:263324: nat44-ed-in2out
  NAT44_IN2OUT_ED_FAST_PATH: sw_if_index 5, next index 10, session 4, translation result 'success' via i2of
  i2of match: saddr 30.1.1.2 sport 1 daddr 192.168.12.254 dport 1 proto ICMP fib
_idx 0 rewrite: saddr 192.168.12.12 daddr 192.168.12.254 icmp-id 28931 txfib 0
  o2if match: saddr 192.168.12.254 sport 28931 daddr 192.168.12.12 dport 28931 proto ICMP fib_idx 0 rewrite: daddr 30.1.1.2 icmp-id 1 txfib 0
  lookup skipped - cached session index used
00:06:10:263327: ip4-lookup
  fib 0 dpo-idx 6 flow hash: 0x00000000
  ICMP: 192.168.12.12 -> 192.168.12.254
  ICMP echo_request checksum 0xdada id 28931