diff --git a/fast-classifier/Makefile b/fast-classifier/Makefile deleted file mode 100644 index 58dd06e01..000000000 --- a/fast-classifier/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -obj-$(CONFIG_FAST_CLASSIFIER) += fast-classifier.o - -ifeq ($(SFE_SUPPORT_IPV6),) -SFE_SUPPORT_IPV6=y -endif -ccflags-$(SFE_SUPPORT_IPV6) += -DSFE_SUPPORT_IPV6 - -ccflags-y += -I$(obj)/../shortcut-fe - -obj ?= . diff --git a/fast-classifier/fast-classifier.c b/fast-classifier/fast-classifier.c deleted file mode 100644 index d79404cba..000000000 --- a/fast-classifier/fast-classifier.c +++ /dev/null @@ -1,1892 +0,0 @@ -/* - * fast-classifier.c - * Shortcut forwarding engine connection manager. - * fast-classifier - * - * Copyright (c) 2013-2018 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include "fast-classifier.h" - -typedef enum fast_classifier_exception { - FAST_CL_EXCEPTION_PACKET_BROADCAST, - FAST_CL_EXCEPTION_PACKET_MULTICAST, - FAST_CL_EXCEPTION_NO_IIF, - FAST_CL_EXCEPTION_NO_CT, - FAST_CL_EXCEPTION_CT_NO_TRACK, - FAST_CL_EXCEPTION_CT_NO_CONFIRM, - FAST_CL_EXCEPTION_CT_IS_ALG, - FAST_CL_EXCEPTION_IS_IPV4_MCAST, - FAST_CL_EXCEPTION_IS_IPV6_MCAST, - FAST_CL_EXCEPTION_TCP_NOT_ASSURED, - FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED, - FAST_CL_EXCEPTION_UNKNOW_PROTOCOL, - FAST_CL_EXCEPTION_NO_SRC_DEV, - FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV, - FAST_CL_EXCEPTION_NO_DEST_DEV, - FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV, - FAST_CL_EXCEPTION_NO_BRIDGE, - FAST_CL_EXCEPTION_LOCAL_OUT, - FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION, - FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL, - FAST_CL_EXCEPTION_CT_DESTROY_MISS, - FAST_CL_EXCEPTION_MAX -} fast_classifier_exception_t; - -static char *fast_classifier_exception_events_string[FAST_CL_EXCEPTION_MAX] = { - "PACKET_BROADCAST", - "PACKET_MULTICAST", - "NO_IIF", - "NO_CT", - "CT_NO_TRACK", - "CT_NO_CONFIRM", - "CT_IS_ALG", - "IS_IPV4_MCAST", - "IS_IPV6_MCAST", - "TCP_NOT_ASSURED", - "TCP_NOT_ESTABLISHED", - "UNKNOW_PROTOCOL", - "NO_SRC_DEV", - "NO_SRC_XLATE_DEV", - "NO_DEST_DEV", - "NO_DEST_XLATE_DEV", - "NO_BRIDGE", - "LOCAL_OUT", - "WAIT_FOR_ACCELERATION", - "UPDATE_PROTOCOL_FAIL", - "CT_DESTROY_MISS", -}; - -/* - * Per-module structure. - */ -struct fast_classifier { - spinlock_t lock; /* Lock for SMP correctness */ - - /* - * Control state. - */ - struct kobject *sys_fast_classifier; /* sysfs linkage */ - - /* - * Callback notifiers. - */ - struct notifier_block dev_notifier; /* Device notifier */ - struct notifier_block inet_notifier; /* IPv4 notifier */ - struct notifier_block inet6_notifier; /* IPv6 notifier */ - u32 exceptions[FAST_CL_EXCEPTION_MAX]; -}; - -static struct fast_classifier __sc; - -static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { - [FAST_CLASSIFIER_A_TUPLE] = { - .type = NLA_UNSPEC, - .len = sizeof(struct fast_classifier_tuple) - }, -}; - -static struct genl_multicast_group fast_classifier_genl_mcgrp[] = { - { - .name = FAST_CLASSIFIER_GENL_MCGRP, - }, -}; - -static struct genl_family fast_classifier_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE, - .name = FAST_CLASSIFIER_GENL_NAME, - .version = FAST_CLASSIFIER_GENL_VERSION, - .maxattr = FAST_CLASSIFIER_A_MAX, -}; - -static int fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info); -static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, struct netlink_callback *cb); - -static struct genl_ops fast_classifier_gnl_ops[] = { - { - .cmd = FAST_CLASSIFIER_C_OFFLOAD, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = fast_classifier_offload_genl_msg, - .dumpit = NULL, - }, - { - .cmd = FAST_CLASSIFIER_C_OFFLOADED, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = NULL, - .dumpit = fast_classifier_nl_genl_msg_DUMP, - }, - { - .cmd = FAST_CLASSIFIER_C_DONE, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = NULL, - .dumpit = fast_classifier_nl_genl_msg_DUMP, - }, -}; - -static atomic_t offload_msgs = ATOMIC_INIT(0); -static atomic_t offload_no_match_msgs = ATOMIC_INIT(0); -static atomic_t offloaded_msgs = ATOMIC_INIT(0); -static atomic_t done_msgs = ATOMIC_INIT(0); - -static atomic_t offloaded_fail_msgs = ATOMIC_INIT(0); -static atomic_t done_fail_msgs = ATOMIC_INIT(0); - -/* - * Accelerate incoming packets destined for bridge device - * If a incoming packet is ultimatly destined for - * a bridge device we will first see the packet coming - * from the phyiscal device, we can skip straight to - * processing the packet like it came from the bridge - * for some more performance gains - * - * This only works when the hook is above the bridge. We - * only implement ingress for now, because for egress we - * want to have the bridge devices qdiscs be used. - */ -static bool skip_to_bridge_ingress; - -/* - * fast_classifier_incr_exceptions() - * increase an exception counter. - */ -static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t except) -{ - struct fast_classifier *sc = &__sc; - - spin_lock_bh(&sc->lock); - sc->exceptions[except]++; - spin_unlock_bh(&sc->lock); -} - -/* - * fast_classifier_recv() - * Handle packet receives. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int fast_classifier_recv(struct sk_buff *skb) -{ - struct net_device *dev; - struct net_device *master_dev = NULL; - int ret = 0; - - /* - * We know that for the vast majority of packets we need the transport - * layer header so we may as well start to fetch it now! - */ - prefetch(skb->data + 32); - barrier(); - - dev = skb->dev; - - /* - * Process packet like it arrived on the bridge device - */ - if (skip_to_bridge_ingress && - (dev->priv_flags & IFF_BRIDGE_PORT)) { - master_dev = sfe_dev_get_master(dev); - if (!master_dev) { - DEBUG_WARN("master dev is NULL %s\n", dev->name); - goto rx_exit; - } - dev = master_dev; - } - - /* - * We're only interested in IPv4 and IPv6 packets. - */ - if (likely(htons(ETH_P_IP) == skb->protocol)) { - struct in_device *in_dev; - - /* - * Does our input device support IP processing? - */ - in_dev = (struct in_device *)dev->ip_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IP processing for device: %s\n", dev->name); - goto rx_exit; - } - - /* - * Does it have an IP address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(!in_dev->ifa_list)) { - DEBUG_TRACE("no IP address for device: %s\n", dev->name); - goto rx_exit; - } - - ret = sfe_ipv4_recv(dev, skb); - - } else if (likely(htons(ETH_P_IPV6) == skb->protocol)) { - struct inet6_dev *in_dev; - - /* - * Does our input device support IPv6 processing? - */ - in_dev = (struct inet6_dev *)dev->ip6_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); - goto rx_exit; - } - - /* - * Does it have an IPv6 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(list_empty(&in_dev->addr_list))) { - DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); - goto rx_exit; - } - - ret = sfe_ipv6_recv(dev, skb); - - } else { - DEBUG_TRACE("not IP packet\n"); - } - -rx_exit: - if (master_dev) { - dev_put(master_dev); - } - - return ret; -} - -/* - * fast_classifier_find_dev_and_mac_addr() - * Find the device and MAC address for a given IPv4 address. - * - * Returns true if we find the device and MAC address, otherwise false. - * - * We look up the rtable entry for the address and, from its neighbour - * structure, obtain the hardware address. This means this function also - * works if the neighbours are routers too. - */ -static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4) -{ - struct neighbour *neigh; - struct rtable *rt; - struct rt6_info *rt6; - struct dst_entry *dst; - struct net_device *mac_dev; - - /* - * Look up the rtable entry for the IP address then get the hardware - * address from its neighbour structure. This means this works when the - * neighbours are routers too. - */ - if (likely(is_v4)) { - rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); - if (unlikely(IS_ERR(rt))) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt; - } else { - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); - if (!rt6) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt6; - } - - rcu_read_lock(); - neigh = sfe_dst_get_neighbour(dst, addr); - if (unlikely(!neigh)) { - rcu_read_unlock(); - dst_release(dst); - goto ret_fail; - } - - if (unlikely(!(neigh->nud_state & NUD_VALID))) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - mac_dev = neigh->dev; - if (!mac_dev) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); - - dev_hold(mac_dev); - *dev = mac_dev; - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - - return true; - -ret_fail: - if (is_v4) { - DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", addr); - - } else { - DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr); - } - - return false; -} - -static DEFINE_SPINLOCK(sfe_connections_lock); - -struct sfe_connection { - struct hlist_node hl; - struct sfe_connection_create *sic; - struct nf_conn *ct; - int hits; - int offload_permit; - int offloaded; - bool is_v4; - unsigned char smac[ETH_ALEN]; - unsigned char dmac[ETH_ALEN]; -}; - -static int sfe_connections_size; - -#define FC_CONN_HASH_ORDER 13 -static DEFINE_HASHTABLE(fc_conn_ht, FC_CONN_HASH_ORDER); - -static u32 fc_conn_hash(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, bool is_v4) -{ - u32 idx, cnt = ((is_v4 ? sizeof(saddr->ip) : sizeof(saddr->ip6))/sizeof(u32)); - u32 hash = 0; - - for (idx = 0; idx < cnt; idx++) { - hash ^= ((u32 *)saddr)[idx] ^ ((u32 *)daddr)[idx]; - } - - return hash ^ (sport | (dport << 16)); -} - -/* - * fast_classifier_update_protocol() - * Update sfe_ipv4_create struct with new protocol information before we offload - */ -static int fast_classifier_update_protocol(struct sfe_connection_create *p_sic, struct nf_conn *ct) -{ - switch (p_sic->protocol) { - case IPPROTO_TCP: - p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale; - p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; - p_sic->src_td_end = ct->proto.tcp.seen[0].td_end; - p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend; - p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; - p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; - p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end; - p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; - - if (nf_ct_tcp_no_window_check - || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) - || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { - p_sic->flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - - /* - * If the connection is shutting down do not manage it. - * state can not be SYN_SENT, SYN_RECV because connection is assured - * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. - */ - spin_lock(&ct->lock); - if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { - spin_unlock(&ct->lock); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED); - DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", - ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port), - &p_sic->dest_ip, ntohs(p_sic->dest_port)); - return 0; - } - spin_unlock(&ct->lock); - break; - - case IPPROTO_UDP: - break; - - default: - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol); - return 0; - } - - return 1; -} - -/* fast_classifier_send_genl_msg() - * Function to send a generic netlink message - */ -static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple *fc_msg) -{ - struct sk_buff *skb; - int rc; - int buf_len; - int total_len; - void *msg_head; - - /* - * Calculate our packet payload size. - * Start with our family header. - */ - buf_len = fast_classifier_gnl_family.hdrsize; - - /* - * Add the nla_total_size of each attribute we're going to nla_put(). - */ - buf_len += nla_total_size(sizeof(*fc_msg)); - - /* - * Lastly we need to add space for the NL message header since - * genlmsg_new only accounts for the GENL header and not the - * outer NL header. To do this, we use a NL helper function which - * calculates the total size of a netlink message given a payload size. - * Note this value does not include the GENL header, but that's - * added automatically by genlmsg_new. - */ - total_len = nlmsg_total_size(buf_len); - skb = genlmsg_new(total_len, GFP_ATOMIC); - if (!skb) - return; - - msg_head = genlmsg_put(skb, 0, 0, &fast_classifier_gnl_family, 0, msg); - if (!msg_head) { - nlmsg_free(skb); - return; - } - - rc = nla_put(skb, FAST_CLASSIFIER_A_TUPLE, sizeof(struct fast_classifier_tuple), fc_msg); - if (rc != 0) { - genlmsg_cancel(skb, msg_head); - nlmsg_free(skb); - return; - } - -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 19 , 0)) - rc = genlmsg_end(skb, msg_head); - if (rc < 0) { - genlmsg_cancel(skb, msg_head); - nlmsg_free(skb); - return; - } -#else - genlmsg_end(skb, msg_head); - -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) - rc = genlmsg_multicast(&fast_classifier_gnl_family, skb, 0, 0, GFP_ATOMIC); -#else - rc = genlmsg_multicast(skb, 0, fast_classifier_genl_mcgrp[0].id, GFP_ATOMIC); -#endif - switch (msg) { - case FAST_CLASSIFIER_C_OFFLOADED: - if (rc == 0) { - atomic_inc(&offloaded_msgs); - } else { - atomic_inc(&offloaded_fail_msgs); - } - break; - case FAST_CLASSIFIER_C_DONE: - if (rc == 0) { - atomic_inc(&done_msgs); - } else { - atomic_inc(&done_fail_msgs); - } - break; - default: - DEBUG_ERROR("fast-classifer: Unknown message type sent!\n"); - break; - } - - DEBUG_TRACE("Notify NL message %d ", msg); - if (fc_msg->ethertype == AF_INET) { - DEBUG_TRACE("sip=%pI4 dip=%pI4 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); - } else { - DEBUG_TRACE("sip=%pI6 dip=%pI6 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); - } - DEBUG_TRACE("protocol=%d sport=%d dport=%d smac=%pM dmac=%pM\n", - fc_msg->proto, fc_msg->sport, fc_msg->dport, fc_msg->smac, fc_msg->dmac); -} - -/* - * fast_classifier_find_conn() - * find a connection object in the hash table - * @pre the sfe_connection_lock must be held before calling this function - */ -static struct sfe_connection * -fast_classifier_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, - unsigned char proto, bool is_v4) -{ - struct sfe_connection_create *p_sic; - struct sfe_connection *conn; - u32 key; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == sport && - p_sic->dest_port == dport && - sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip, daddr, is_v4)) { - return conn; - } - } - - DEBUG_TRACE("connection not found\n"); - return NULL; -} - -/* - * fast_classifier_sb_find_conn() - * find a connection object in the hash table according to information of packet - * if not found, reverse the tuple and try again. - * @pre the sfe_connection_lock must be held before calling this function - */ -static struct sfe_connection * -fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, - unsigned char proto, bool is_v4) -{ - struct sfe_connection_create *p_sic; - struct sfe_connection *conn; - u32 key; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == sport && - p_sic->dest_port_xlate == dport && - sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip_xlate, daddr, is_v4)) { - return conn; - } - } - - /* - * Reverse the tuple and try again - */ - key = fc_conn_hash(daddr, saddr, dport, sport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == dport && - p_sic->dest_port_xlate == sport && - sfe_addr_equal(&p_sic->src_ip, daddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip_xlate, saddr, is_v4)) { - return conn; - } - } - - DEBUG_TRACE("connection not found\n"); - return NULL; -} - -/* - * fast_classifier_add_conn() - * add a connection object in the hash table if no duplicate - * @conn connection to add - * @return conn if successful, NULL if duplicate - */ -static struct sfe_connection * -fast_classifier_add_conn(struct sfe_connection *conn) -{ - struct sfe_connection_create *sic = conn->sic; - u32 key; - - spin_lock_bh(&sfe_connections_lock); - if (fast_classifier_find_conn(&sic->src_ip, &sic->dest_ip, sic->src_port, - sic->dest_port, sic->protocol, conn->is_v4)) { - spin_unlock_bh(&sfe_connections_lock); - return NULL; - } - - key = fc_conn_hash(&sic->src_ip, &sic->dest_ip, - sic->src_port, sic->dest_port, conn->is_v4); - - hash_add(fc_conn_ht, &conn->hl, key); - sfe_connections_size++; - spin_unlock_bh(&sfe_connections_lock); - - DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", sfe_connections_size); - - if (conn->is_v4) { - DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); - } else { - DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); - } - - return conn; -} - -/* - * fast_classifier_offload_genl_msg() - * Called from user space to offload a connection - */ -static int -fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *na; - struct fast_classifier_tuple *fc_msg; - struct sfe_connection *conn; - - na = info->attrs[FAST_CLASSIFIER_A_TUPLE]; - fc_msg = nla_data(na); - - if (fc_msg->ethertype == AF_INET) { - DEBUG_TRACE("want to offload: %d-%d, %pI4, %pI4, %d, %d SMAC=%pM DMAC=%pM\n", - fc_msg->ethertype, - fc_msg->proto, - &fc_msg->src_saddr, - &fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->smac, - fc_msg->dmac); - } else { - DEBUG_TRACE("want to offload: %d-%d, %pI6, %pI6, %d, %d SMAC=%pM DMAC=%pM\n", - fc_msg->ethertype, - fc_msg->proto, - &fc_msg->src_saddr, - &fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->smac, - fc_msg->dmac); - } - - spin_lock_bh(&sfe_connections_lock); - conn = fast_classifier_sb_find_conn((sfe_ip_addr_t *)&fc_msg->src_saddr, - (sfe_ip_addr_t *)&fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->proto, - (fc_msg->ethertype == AF_INET)); - if (!conn) { - spin_unlock_bh(&sfe_connections_lock); - DEBUG_TRACE("REQUEST OFFLOAD NO MATCH\n"); - atomic_inc(&offload_no_match_msgs); - return 0; - } - - conn->offload_permit = 1; - spin_unlock_bh(&sfe_connections_lock); - atomic_inc(&offload_msgs); - - DEBUG_TRACE("INFO: calling sfe rule creation!\n"); - return 0; -} - -/* - * fast_classifier_nl_genl_msg_DUMP() - * ignore fast_classifier_messages OFFLOADED and DONE - */ -static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return 0; -} - -/* auto offload connection once we have this many packets*/ -static int offload_at_pkts = 128; - -/* - * fast_classifier_post_routing() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4) -{ - int ret; - struct sfe_connection_create sic; - struct sfe_connection_create *p_sic; - struct net_device *in; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct net_device *dev; - struct net_device *src_dev; - struct net_device *dest_dev; - struct net_device *src_dev_tmp; - struct net_device *dest_dev_tmp; - struct net_device *src_br_dev = NULL; - struct net_device *dest_br_dev = NULL; - struct nf_conntrack_tuple orig_tuple; - struct nf_conntrack_tuple reply_tuple; - struct sfe_connection *conn; - - /* - * Don't process broadcast or multicast packets. - */ - if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_BROADCAST); - DEBUG_TRACE("broadcast, ignoring\n"); - return NF_ACCEPT; - } - if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_MULTICAST); - DEBUG_TRACE("multicast, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process packets that are not being forwarded. - */ - in = dev_get_by_index(&init_net, skb->skb_iif); - if (!in) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_IIF); - DEBUG_TRACE("packet not forwarding\n"); - return NF_ACCEPT; - } - - dev_put(in); - - /* - * Don't process packets that aren't being tracked by conntrack. - */ - ct = nf_ct_get(skb, &ctinfo); - if (unlikely(!ct)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_CT); - DEBUG_TRACE("no conntrack connection, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process untracked connections. - */ - if (unlikely(nf_ct_is_untracked(ct))) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_TRACK); - DEBUG_TRACE("untracked connection\n"); - return NF_ACCEPT; - } - - /* - * Unconfirmed connection may be dropped by Linux at the final step, - * So we don't process unconfirmed connections. - */ - if (!nf_ct_is_confirmed(ct)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_CONFIRM); - DEBUG_TRACE("unconfirmed connection\n"); - return NF_ACCEPT; - } - - /* - * Don't process connections that require support from a 'helper' (typically a NAT ALG). - */ - if (unlikely(nfct_help(ct))) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_IS_ALG); - DEBUG_TRACE("connection has helper\n"); - return NF_ACCEPT; - } - - memset(&sic, 0, sizeof(sic)); - - /* - * Look up the details of our connection in conntrack. - * - * Note that the data we get from conntrack is for the "ORIGINAL" direction - * but our packet may actually be in the "REPLY" direction. - */ - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - sic.protocol = (s32)orig_tuple.dst.protonum; - - sic.flags = 0; - - /* - * Get addressing information, non-NAT first - */ - if (likely(is_v4)) { - u32 dscp; - - sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV4_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; - sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; - - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } else { - u32 dscp; - - sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || - ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV6_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); - sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); - - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } - - switch (sic.protocol) { - case IPPROTO_TCP: - sic.src_port = orig_tuple.src.u.tcp.port; - sic.dest_port = orig_tuple.dst.u.tcp.port; - sic.src_port_xlate = reply_tuple.dst.u.tcp.port; - sic.dest_port_xlate = reply_tuple.src.u.tcp.port; - - /* - * Don't try to manage a non-established connection. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ASSURED); - DEBUG_TRACE("non-established connection\n"); - return NF_ACCEPT; - } - - break; - - case IPPROTO_UDP: - sic.src_port = orig_tuple.src.u.udp.port; - sic.dest_port = orig_tuple.dst.u.udp.port; - sic.src_port_xlate = reply_tuple.dst.u.udp.port; - sic.dest_port_xlate = reply_tuple.src.u.udp.port; - break; - - default: - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - sic.original_accel = 1; - sic.reply_accel = 1; -#endif - - /* - * Get QoS information - */ - if (skb->priority) { - sic.dest_priority = skb->priority; - sic.src_priority = sic.dest_priority; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - if (is_v4) { - DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); - } else { - DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); - } - - /* - * If we already have this connection in our list, skip it - * XXX: this may need to be optimized - */ - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port, sic.protocol, is_v4); - if (conn) { - conn->hits++; - - if (!conn->offloaded) { - if (conn->offload_permit || conn->hits >= offload_at_pkts) { - DEBUG_TRACE("OFFLOADING CONNECTION, TOO MANY HITS\n"); - - if (fast_classifier_update_protocol(conn->sic, conn->ct) == 0) { - spin_unlock_bh(&sfe_connections_lock); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL); - DEBUG_TRACE("UNKNOWN PROTOCOL OR CONNECTION CLOSING, SKIPPING\n"); - return NF_ACCEPT; - } - - DEBUG_TRACE("INFO: calling sfe rule creation!\n"); - spin_unlock_bh(&sfe_connections_lock); - - ret = is_v4 ? sfe_ipv4_create_rule(conn->sic) : sfe_ipv6_create_rule(conn->sic); - if ((ret == 0) || (ret == -EADDRINUSE)) { - struct fast_classifier_tuple fc_msg; - - if (is_v4) { - fc_msg.ethertype = AF_INET; - fc_msg.src_saddr.in = *((struct in_addr *)&sic.src_ip); - fc_msg.dst_saddr.in = *((struct in_addr *)&sic.dest_ip_xlate); - } else { - fc_msg.ethertype = AF_INET6; - fc_msg.src_saddr.in6 = *((struct in6_addr *)&sic.src_ip); - fc_msg.dst_saddr.in6 = *((struct in6_addr *)&sic.dest_ip_xlate); - } - - fc_msg.proto = sic.protocol; - fc_msg.sport = sic.src_port; - fc_msg.dport = sic.dest_port_xlate; - memcpy(fc_msg.smac, conn->smac, ETH_ALEN); - memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); - fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_OFFLOADED, &fc_msg); - conn->offloaded = 1; - } - - return NF_ACCEPT; - } - } - - spin_unlock_bh(&sfe_connections_lock); - if (conn->offloaded) { - is_v4 ? sfe_ipv4_update_rule(conn->sic) : sfe_ipv6_update_rule(conn->sic); - } - - DEBUG_TRACE("FOUND, SKIPPING\n"); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION); - return NF_ACCEPT; - } - - spin_unlock_bh(&sfe_connections_lock); - - /* - * Get the net device and MAC addresses that correspond to the various source and - * destination host addresses. - */ - if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_DEV); - return NF_ACCEPT; - } - src_dev = src_dev_tmp; - - if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV); - goto done1; - } - dev_put(dev); - - if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_DEV); - goto done1; - } - dev_put(dev); - - if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV); - goto done1; - } - dest_dev = dest_dev_tmp; - - /* - * Our devices may actually be part of a bridge interface. If that's - * the case then find the bridge interface instead. - */ - if (src_dev->priv_flags & IFF_BRIDGE_PORT) { - src_br_dev = sfe_dev_get_master(src_dev); - if (!src_br_dev) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); - goto done2; - } - src_dev = src_br_dev; - } - - if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { - dest_br_dev = sfe_dev_get_master(dest_dev); - if (!dest_br_dev) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); - goto done3; - } - dest_dev = dest_br_dev; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = src_dev->mtu; - sic.dest_mtu = dest_dev->mtu; - - if (skb->mark) { - DEBUG_TRACE("SKB MARK NON ZERO %x\n", skb->mark); - } - sic.mark = skb->mark; - - conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) { - printk(KERN_CRIT "ERROR: no memory for sfe\n"); - goto done4; - } - conn->hits = 0; - conn->offload_permit = 0; - conn->offloaded = 0; - conn->is_v4 = is_v4; - DEBUG_TRACE("Source MAC=%pM\n", sic.src_mac); - memcpy(conn->smac, sic.src_mac, ETH_ALEN); - memcpy(conn->dmac, sic.dest_mac_xlate, ETH_ALEN); - - p_sic = kmalloc(sizeof(*p_sic), GFP_ATOMIC); - if (!p_sic) { - printk(KERN_CRIT "ERROR: no memory for sfe\n"); - kfree(conn); - goto done4; - } - - memcpy(p_sic, &sic, sizeof(sic)); - conn->sic = p_sic; - conn->ct = ct; - - if (!fast_classifier_add_conn(conn)) { - kfree(conn->sic); - kfree(conn); - } - - /* - * If we had bridge ports then release them too. - */ -done4: - if (dest_br_dev) { - dev_put(dest_br_dev); - } -done3: - if (src_br_dev) { - dev_put(src_br_dev); - } -done2: - dev_put(dest_dev_tmp); -done1: - dev_put(src_dev_tmp); - - return NF_ACCEPT; -} - -/* - * fast_classifier_ipv4_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -fast_classifier_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return fast_classifier_post_routing(skb, true); -} - -/* - * fast_classifier_ipv6_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -fast_classifier_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return fast_classifier_post_routing(skb, false); -} - -/* - * fast_classifier_update_mark() - * updates the mark for a fast-classifier connection - */ -static void fast_classifier_update_mark(struct sfe_connection_mark *mark, bool is_v4) -{ - struct sfe_connection *conn; - - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&mark->src_ip, &mark->dest_ip, - mark->src_port, mark->dest_port, - mark->protocol, is_v4); - if (conn) { - conn->sic->mark = mark->mark; - } - - spin_unlock_bh(&sfe_connections_lock); -} - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * fast_classifier_conntrack_event() - * Callback event invoked when a conntrack connection's state changes. - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static int fast_classifier_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) -#else -static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item) -#endif -{ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - struct nf_ct_event *item = ptr; -#endif - struct sfe_connection_destroy sid; - struct nf_conn *ct = item->ct; - struct nf_conntrack_tuple orig_tuple; - struct sfe_connection *conn; - struct fast_classifier_tuple fc_msg; - int offloaded = 0; - bool is_v4; - - /* - * If we don't have a conntrack entry then we're done. - */ - if (unlikely(!ct)) { - DEBUG_WARN("no ct in conntrack event callback\n"); - return NOTIFY_DONE; - } - - /* - * If this is an untracked connection then we can't have any state either. - */ - if (unlikely(nf_ct_is_untracked(ct))) { - DEBUG_TRACE("ignoring untracked conn\n"); - return NOTIFY_DONE; - } - - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - sid.protocol = (s32)orig_tuple.dst.protonum; - - /* - * Extract information from the conntrack connection. We're only interested - * in nominal connection information (i.e. we're ignoring any NAT information). - */ - if (likely(nf_ct_l3num(ct) == AF_INET)) { - sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - is_v4 = true; - } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { - sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - is_v4 = false; - } else { - DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); - return NOTIFY_DONE; - } - - switch (sid.protocol) { - case IPPROTO_TCP: - sid.src_port = orig_tuple.src.u.tcp.port; - sid.dest_port = orig_tuple.dst.u.tcp.port; - break; - - case IPPROTO_UDP: - sid.src_port = orig_tuple.src.u.udp.port; - sid.dest_port = orig_tuple.dst.u.udp.port; - break; - - default: - DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); - return NOTIFY_DONE; - } - - /* - * Check for an updated mark - */ - if ((events & (1 << IPCT_MARK)) && (ct->mark != 0)) { - struct sfe_connection_mark mark; - - mark.protocol = sid.protocol; - mark.src_ip = sid.src_ip; - mark.dest_ip = sid.dest_ip; - mark.src_port = sid.src_port; - mark.dest_port = sid.dest_port; - mark.mark = ct->mark; - - is_v4 ? sfe_ipv4_mark_rule(&mark) : sfe_ipv6_mark_rule(&mark); - fast_classifier_update_mark(&mark, is_v4); - } - - /* - * We're only interested in destroy events at this point - */ - if (unlikely(!(events & (1 << IPCT_DESTROY)))) { - DEBUG_TRACE("ignoring non-destroy event\n"); - return NOTIFY_DONE; - } - - if (is_v4) { - DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - sid.protocol, &sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port); - } else { - DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - sid.protocol, &sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port); - } - - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port, sid.protocol, is_v4); - if (conn && conn->offloaded) { - if (is_v4) { - fc_msg.ethertype = AF_INET; - fc_msg.src_saddr.in = *((struct in_addr *)&conn->sic->src_ip); - fc_msg.dst_saddr.in = *((struct in_addr *)&conn->sic->dest_ip_xlate); - } else { - fc_msg.ethertype = AF_INET6; - fc_msg.src_saddr.in6 = *((struct in6_addr *)&conn->sic->src_ip); - fc_msg.dst_saddr.in6 = *((struct in6_addr *)&conn->sic->dest_ip_xlate); - } - - fc_msg.proto = conn->sic->protocol; - fc_msg.sport = conn->sic->src_port; - fc_msg.dport = conn->sic->dest_port_xlate; - memcpy(fc_msg.smac, conn->smac, ETH_ALEN); - memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); - offloaded = 1; - } - - if (conn) { - DEBUG_TRACE("Free connection\n"); - - hash_del(&conn->hl); - sfe_connections_size--; - kfree(conn->sic); - kfree(conn); - } else { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_DESTROY_MISS); - } - - spin_unlock_bh(&sfe_connections_lock); - - is_v4 ? sfe_ipv4_destroy_rule(&sid) : sfe_ipv6_destroy_rule(&sid); - - if (offloaded) { - fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_DONE, &fc_msg); - } - - return NOTIFY_DONE; -} - -/* - * Netfilter conntrack event system to monitor connection tracking changes - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static struct notifier_block fast_classifier_conntrack_notifier = { - .notifier_call = fast_classifier_conntrack_event, -}; -#else -static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = { - .fcn = fast_classifier_conntrack_event, -}; -#endif -#endif - -/* - * Structure to establish a hook into the post routing netfilter point - this - * will pick up local outbound and packets going from one interface to another. - * - * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. - * We want to examine packets after NAT translation and any ALG processing. - */ -static struct nf_hook_ops fast_classifier_ops_post_routing[] __read_mostly = { - SFE_IPV4_NF_POST_ROUTING_HOOK(__fast_classifier_ipv4_post_routing_hook), - SFE_IPV6_NF_POST_ROUTING_HOOK(__fast_classifier_ipv6_post_routing_hook), -}; - -/* - * fast_classifier_sync_rule() - * Synchronize a connection's state. - */ -static void fast_classifier_sync_rule(struct sfe_connection_sync *sis) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - SFE_NF_CONN_ACCT(acct); - - /* - * Create a tuple so as to be able to look up a connection - */ - memset(&tuple, 0, sizeof(tuple)); - tuple.src.u.all = (__be16)sis->src_port; - tuple.dst.dir = IP_CT_DIR_ORIGINAL; - tuple.dst.protonum = (u8)sis->protocol; - tuple.dst.u.all = (__be16)sis->dest_port; - - if (sis->is_v6) { - tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); - tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); - tuple.src.l3num = AF_INET6; - - DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); - } else { - tuple.src.u3.ip = sis->src_ip.ip; - tuple.dst.u3.ip = sis->dest_ip.ip; - tuple.src.l3num = AF_INET; - - DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); - } - - /* - * Update packet count for ingress on bridge device - */ - if (skip_to_bridge_ingress) { - struct rtnl_link_stats64 nlstats; - nlstats.tx_packets = 0; - nlstats.tx_bytes = 0; - - if (sis->src_dev && IFF_EBRIDGE && - (sis->src_new_packet_count || sis->src_new_byte_count)) { - nlstats.rx_packets = sis->src_new_packet_count; - nlstats.rx_bytes = sis->src_new_byte_count; - spin_lock_bh(&sfe_connections_lock); - br_dev_update_stats(sis->src_dev, &nlstats); - spin_unlock_bh(&sfe_connections_lock); - } - if (sis->dest_dev && IFF_EBRIDGE && - (sis->dest_new_packet_count || sis->dest_new_byte_count)) { - nlstats.rx_packets = sis->dest_new_packet_count; - nlstats.rx_bytes = sis->dest_new_byte_count; - spin_lock_bh(&sfe_connections_lock); - br_dev_update_stats(sis->dest_dev, &nlstats); - spin_unlock_bh(&sfe_connections_lock); - } - } - - /* - * Look up conntrack connection - */ - h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); - if (unlikely(!h)) { - DEBUG_TRACE("no connection found\n"); - return; - } - - ct = nf_ct_tuplehash_to_ctrack(h); - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); - - /* - * Only update if this is not a fixed timeout - */ - if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { - spin_lock_bh(&ct->lock); - ct->timeout.expires += sis->delta_jiffies; - spin_unlock_bh(&ct->lock); - } - - acct = nf_conn_acct_find(ct); - if (acct) { - spin_lock_bh(&ct->lock); - atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); - atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); - atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); - spin_unlock_bh(&ct->lock); - } - - switch (sis->protocol) { - case IPPROTO_TCP: - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { - ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { - ct->proto.tcp.seen[0].td_end = sis->src_td_end; - } - if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { - ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; - } - if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { - ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { - ct->proto.tcp.seen[1].td_end = sis->dest_td_end; - } - if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { - ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; - } - spin_unlock_bh(&ct->lock); - break; - } - - /* - * Release connection - */ - nf_ct_put(ct); -} - -/* - * fast_classifier_device_event() - */ -static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_inet_event() - */ -static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_inet6_event() - */ -static int fast_classifier_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_get_offload_at_pkts() - */ -static ssize_t fast_classifier_get_offload_at_pkts(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", offload_at_pkts); -} - -/* - * fast_classifier_set_offload_at_pkts() - */ -static ssize_t fast_classifier_set_offload_at_pkts(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - long new; - int ret; - - ret = kstrtol(buf, 0, &new); - if (ret == -EINVAL || ((int)new != new)) - return -EINVAL; - - offload_at_pkts = new; - - return size; -} - -/* - * fast_classifier_get_debug_info() - */ -static ssize_t fast_classifier_get_debug_info(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - size_t len = 0; - struct sfe_connection *conn; - u32 i; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - spin_lock_bh(&sfe_connections_lock); - len += scnprintf(buf, PAGE_SIZE - len, "size=%d offload=%d offload_no_match=%d" - " offloaded=%d done=%d offloaded_fail=%d done_fail=%d\n", - sfe_connections_size, - atomic_read(&offload_msgs), - atomic_read(&offload_no_match_msgs), - atomic_read(&offloaded_msgs), - atomic_read(&done_msgs), - atomic_read(&offloaded_fail_msgs), - atomic_read(&done_fail_msgs)); - sfe_hash_for_each(fc_conn_ht, i, node, conn, hl) { - len += scnprintf(buf + len, PAGE_SIZE - len, - (conn->is_v4 ? "o=%d, p=%d [%pM]:%pI4:%u %pI4:%u:[%pM] m=%08x h=%d\n" : "o=%d, p=%d [%pM]:%pI6:%u %pI6:%u:[%pM] m=%08x h=%d\n"), - conn->offloaded, - conn->sic->protocol, - conn->sic->src_mac, - &conn->sic->src_ip, - conn->sic->src_port, - &conn->sic->dest_ip, - conn->sic->dest_port, - conn->sic->dest_mac_xlate, - conn->sic->mark, - conn->hits); - } - spin_unlock_bh(&sfe_connections_lock); - - return len; -} - -/* - * fast_classifier_get_skip_bridge_ingress() - */ -static ssize_t fast_classifier_get_skip_bridge_ingress(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", skip_to_bridge_ingress); -} - -/* - * fast_classifier_set_skip_bridge_ingress() - */ -static ssize_t fast_classifier_set_skip_bridge_ingress(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - long new; - int ret; - - ret = kstrtol(buf, 0, &new); - if (ret == -EINVAL || ((int)new != new)) - return -EINVAL; - - skip_to_bridge_ingress = new ? 1 : 0; - - return size; -} - -/* - * fast_classifier_get_exceptions - * dump exception counters - */ -static ssize_t fast_classifier_get_exceptions(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int idx, len; - struct fast_classifier *sc = &__sc; - - spin_lock_bh(&sc->lock); - for (len = 0, idx = 0; idx < FAST_CL_EXCEPTION_MAX; idx++) { - if (sc->exceptions[idx]) { - len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", fast_classifier_exception_events_string[idx], sc->exceptions[idx]); - } - } - spin_unlock_bh(&sc->lock); - - return len; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute fast_classifier_offload_at_pkts_attr = - __ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts); -static const struct device_attribute fast_classifier_debug_info_attr = - __ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL); -static const struct device_attribute fast_classifier_skip_bridge_ingress = - __ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress); -static const struct device_attribute fast_classifier_exceptions_attr = - __ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL); - -/* - * fast_classifier_init() - */ -static int __init fast_classifier_init(void) -{ - struct fast_classifier *sc = &__sc; - int result = -1; - - printk(KERN_ALERT "fast-classifier: starting up\n"); - DEBUG_INFO("SFE CM init\n"); - - hash_init(fc_conn_ht); - - /* - * Create sys/fast_classifier - */ - sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL); - if (!sc->sys_fast_classifier) { - DEBUG_ERROR("failed to register fast_classifier\n"); - goto exit1; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - if (result) { - DEBUG_ERROR("failed to register offload at pkgs: %d\n", result); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - if (result) { - DEBUG_ERROR("failed to register skip bridge on ingress: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); - if (result) { - DEBUG_ERROR("failed to register exceptions file: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - goto exit2; - } - - sc->dev_notifier.notifier_call = fast_classifier_device_event; - sc->dev_notifier.priority = 1; - register_netdevice_notifier(&sc->dev_notifier); - - sc->inet_notifier.notifier_call = fast_classifier_inet_event; - sc->inet_notifier.priority = 1; - register_inetaddr_notifier(&sc->inet_notifier); - - sc->inet6_notifier.notifier_call = fast_classifier_inet6_event; - sc->inet6_notifier.priority = 1; - register_inet6addr_notifier(&sc->inet6_notifier); - - /* - * Register our netfilter hooks. - */ - result = nf_register_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - if (result < 0) { - DEBUG_ERROR("can't register nf post routing hook: %d\n", result); - goto exit3; - } - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - /* - * Register a notifier hook to get fast notifications of expired connections. - */ - result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier); - if (result < 0) { - DEBUG_ERROR("can't register nf notifier hook: %d\n", result); - goto exit4; - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) - result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family, - fast_classifier_gnl_ops, - fast_classifier_genl_mcgrp); - if (result) { - DEBUG_ERROR("failed to register genl ops: %d\n", result); - goto exit5; - } -#else - result = genl_register_family(&fast_classifier_gnl_family); - if (result) { - printk(KERN_CRIT "unable to register genl family\n"); - goto exit5; - } - - result = genl_register_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); - if (result) { - printk(KERN_CRIT "unable to register ops\n"); - goto exit6; - } - - result = genl_register_mc_group(&fast_classifier_gnl_family, - fast_classifier_genl_mcgrp); - if (result) { - printk(KERN_CRIT "unable to register multicast group\n"); - goto exit6; - } -#endif - - printk(KERN_ALERT "fast-classifier: registered\n"); - - spin_lock_init(&sc->lock); - - /* - * Hook the receive path in the network stack. - */ - BUG_ON(athrs_fast_nat_recv); - RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv); - - /* - * Hook the shortcut sync callback. - */ - sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule); - sfe_ipv6_register_sync_rule_callback(fast_classifier_sync_rule); - return 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) -exit6: - genl_unregister_family(&fast_classifier_gnl_family); -#endif - -exit5: -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); - -exit4: -#endif - nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - -exit3: - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); - -exit2: - kobject_put(sc->sys_fast_classifier); - -exit1: - return result; -} - -/* - * fast_classifier_exit() - */ -static void __exit fast_classifier_exit(void) -{ - struct fast_classifier *sc = &__sc; - int result = -1; - - DEBUG_INFO("SFE CM exit\n"); - printk(KERN_ALERT "fast-classifier: shutting down\n"); - - /* - * Unregister our sync callback. - */ - sfe_ipv4_register_sync_rule_callback(NULL); - sfe_ipv6_register_sync_rule_callback(NULL); - - /* - * Unregister our receive callback. - */ - RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); - - /* - * Wait for all callbacks to complete. - */ - rcu_barrier(); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - sfe_ipv6_destroy_all_rules_for_dev(NULL); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) - result = genl_unregister_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); - if (result != 0) { - printk(KERN_CRIT "Unable to unreigster genl_ops\n"); - } -#endif - - result = genl_unregister_family(&fast_classifier_gnl_family); - if (result != 0) { - printk(KERN_CRIT "Unable to unreigster genl_family\n"); - } - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); - -#endif - nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - - kobject_put(sc->sys_fast_classifier); -} - -module_init(fast_classifier_init) -module_exit(fast_classifier_exit) - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/fast-classifier/fast-classifier.h b/fast-classifier/fast-classifier.h deleted file mode 100644 index 6b7a18cf6..000000000 --- a/fast-classifier/fast-classifier.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * User space header to send message to the fast classifier - * - * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include - -#define FAST_CLASSIFIER_GENL_VERSION (1) -#define FAST_CLASSIFIER_GENL_NAME "FC" -#define FAST_CLASSIFIER_GENL_MCGRP "FC_MCGRP" -#define FAST_CLASSIFIER_GENL_HDRSIZE (0) - -enum { - FAST_CLASSIFIER_A_UNSPEC, - FAST_CLASSIFIER_A_TUPLE, - __FAST_CLASSIFIER_A_MAX, -}; - -#define FAST_CLASSIFIER_A_MAX (__FAST_CLASSIFIER_A_MAX - 1) - -enum { - FAST_CLASSIFIER_C_UNSPEC, - FAST_CLASSIFIER_C_OFFLOAD, - FAST_CLASSIFIER_C_OFFLOADED, - FAST_CLASSIFIER_C_DONE, - __FAST_CLASSIFIER_C_MAX, -}; - -#define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1) - -struct fast_classifier_tuple { - unsigned short ethertype; - unsigned char proto; - union { - struct in_addr in; - struct in6_addr in6; - } src_saddr; - union { - struct in_addr in; - struct in6_addr in6; - } dst_saddr; - unsigned short sport; - unsigned short dport; - unsigned char smac[ETH_ALEN]; - unsigned char dmac[ETH_ALEN]; -}; diff --git a/fast-classifier/nl_classifier_test.c b/fast-classifier/nl_classifier_test.c deleted file mode 100644 index 639417964..000000000 --- a/fast-classifier/nl_classifier_test.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#define NL_CLASSIFIER_GENL_VERSION 1 -#define NL_CLASSIFIER_GENL_FAMILY "FC" -#define NL_CLASSIFIER_GENL_GROUP "FC_MCGRP" -#define NL_CLASSIFIER_GENL_HDRSIZE 0 - -enum NL_CLASSIFIER_CMD { - NL_CLASSIFIER_CMD_UNSPEC, - NL_CLASSIFIER_CMD_ACCEL, - NL_CLASSIFIER_CMD_ACCEL_OK, - NL_CLASSIFIER_CMD_CONNECTION_CLOSED, - NL_CLASSIFIER_CMD_MAX, -}; - -enum NL_CLASSIFIER_ATTR { - NL_CLASSIFIER_ATTR_UNSPEC, - NL_CLASSIFIER_ATTR_TUPLE, - NL_CLASSIFIER_ATTR_MAX, -}; - -union nl_classifier_tuple_ip { - struct in_addr in; - struct in6_addr in6; -}; - -struct nl_classifier_tuple { - unsigned short af; - unsigned char proto; - union nl_classifier_tuple_ip src_ip; - union nl_classifier_tuple_ip dst_ip; - unsigned short sport; - unsigned short dport; - unsigned char smac[6]; - unsigned char dmac[6]; -}; - -struct nl_classifier_instance { - struct nl_sock *sock; - int family_id; - int group_id; - int stop; -}; - -struct nl_classifier_instance nl_cls_inst; - -static struct nla_policy nl_classifier_genl_policy[(NL_CLASSIFIER_ATTR_MAX+1)] = { - [NL_CLASSIFIER_ATTR_TUPLE] = { .type = NLA_UNSPEC }, -}; - -void nl_classifier_dump_nl_tuple(struct nl_classifier_tuple *tuple) -{ - char ip_str[64]; - - printf("protocol = %s\n", (tuple->proto == IPPROTO_UDP) ? "udp" : ((tuple->proto == IPPROTO_TCP) ? "tcp" : "unknown")); - printf("source ip = %s\n", inet_ntop(tuple->af, &tuple->src_ip, ip_str, sizeof(ip_str))); - printf("destination ip = %s\n", inet_ntop(tuple->af, &tuple->dst_ip, ip_str, sizeof(ip_str))); - printf("source port = %d\n", ntohs(tuple->sport)); - printf("destination port = %d\n", ntohs(tuple->dport)); -} - -int nl_classifier_msg_recv(struct nl_msg *msg, void *arg) -{ - struct nlmsghdr *nlh = nlmsg_hdr(msg); - struct genlmsghdr *gnlh = nlmsg_data(nlh); - struct nlattr *attrs[(NL_CLASSIFIER_ATTR_MAX+1)]; - - genlmsg_parse(nlh, NL_CLASSIFIER_GENL_HDRSIZE, attrs, NL_CLASSIFIER_ATTR_MAX, nl_classifier_genl_policy); - - switch (gnlh->cmd) { - case NL_CLASSIFIER_CMD_ACCEL_OK: - printf("Acceleration successful:\n"); - nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); - return NL_OK; - case NL_CLASSIFIER_CMD_CONNECTION_CLOSED: - printf("Connection is closed:\n"); - nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); - return NL_OK; - default: - printf("nl classifier received unknow message %d\n", gnlh->cmd); - } - - return NL_SKIP; -} - -void nl_classifier_offload(struct nl_classifier_instance *inst, - unsigned char proto, unsigned long *src_saddr, - unsigned long *dst_saddr, unsigned short sport, - unsigned short dport, int af) -{ - struct nl_msg *msg; - int ret; - struct nl_classifier_tuple classifier_msg; - - memset(&classifier_msg, 0, sizeof(classifier_msg)); - classifier_msg.af = af; - classifier_msg.proto = proto; - memcpy(&classifier_msg.src_ip, src_saddr, (af == AF_INET ? 4 : 16)); - memcpy(&classifier_msg.dst_ip, dst_saddr, (af == AF_INET ? 4 : 16)); - classifier_msg.sport = sport; - classifier_msg.dport = dport; - - msg = nlmsg_alloc(); - if (!msg) { - printf("Unable to allocate message\n"); - return; - } - - genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, inst->family_id, - NL_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, - NL_CLASSIFIER_CMD_ACCEL, NL_CLASSIFIER_GENL_VERSION); - nla_put(msg, NL_CLASSIFIER_ATTR_TUPLE, sizeof(classifier_msg), &classifier_msg); - - ret = nl_send_auto(inst->sock, msg); - if (ret < 0) { - printf("send netlink message failed.\n"); - nlmsg_free(msg); - return; - } - - nlmsg_free(msg); - printf("nl classifier offload connection successful\n"); -} - -int nl_classifier_init(struct nl_classifier_instance *inst) -{ - int ret; - - inst->sock = nl_socket_alloc(); - if (!inst->sock) { - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(inst->sock); - - inst->family_id = genl_ctrl_resolve(inst->sock, NL_CLASSIFIER_GENL_FAMILY); - if (inst->family_id < 0) { - printf("Unable to resolve family %s\n", NL_CLASSIFIER_GENL_FAMILY); - goto init_failed; - } - - inst->group_id = genl_ctrl_resolve_grp(inst->sock, NL_CLASSIFIER_GENL_FAMILY, NL_CLASSIFIER_GENL_GROUP); - if (inst->group_id < 0) { - printf("Unable to resolve mcast group %s\n", NL_CLASSIFIER_GENL_GROUP); - goto init_failed; - } - - ret = nl_socket_add_membership(inst->sock, inst->group_id); - if (ret < 0) { - printf("Unable to add membership\n"); - goto init_failed; - } - - nl_socket_disable_seq_check(inst->sock); - nl_socket_modify_cb(inst->sock, NL_CB_VALID, NL_CB_CUSTOM, nl_classifier_msg_recv, NULL); - - printf("nl classifier init successful\n"); - return 0; - -init_failed: - if (inst->sock) { - nl_close(inst->sock); - nl_socket_free(inst->sock); - inst->sock = NULL; - } - return -1; -} - -void nl_classifier_exit(struct nl_classifier_instance *inst) -{ - if (inst->sock) { - nl_close(inst->sock); - nl_socket_free(inst->sock); - inst->sock = NULL; - } - printf("nl classifier exit successful\n"); -} - -int nl_classifier_parse_arg(int argc, char *argv[], unsigned char *proto, unsigned long *src_saddr, - unsigned long *dst_saddr, unsigned short *sport, unsigned short *dport, int *af) -{ - int ret; - unsigned short port; - - if (argc < 7) { - printf("help: nl_classifier \n"); - return -1; - } - - if (0 == strncmp(argv[1], "v4", 2)) { - *af = AF_INET; - } else if (0 == strncmp(argv[1], "v6", 2)) { - *af = AF_INET6; - } else { - printf("Address family is not supported"); - return -1; - } - - if (0 == strncmp(argv[2], "udp", 3)) { - *proto = IPPROTO_UDP; - } else if (0 == strncmp(argv[2], "tcp", 3)) { - *proto = IPPROTO_TCP; - } else { - printf("Protocol is not supported"); - return -1; - } - - ret = inet_pton(*af, argv[3], src_saddr); - if (ret <= 0) { - printf("source ip has wrong format\n"); - return -1; - } - - ret = inet_pton(*af, argv[4], dst_saddr); - if (ret <= 0) { - printf("destination ip has wrong format\n"); - return -1; - } - - port = strtol(argv[5], NULL, 0); - *sport = htons(port); - port = strtol(argv[6], NULL, 0); - *dport = htons(port); - - printf("nl classifier parse arguments successful\n"); - return 0; -} - -int main(int argc, char *argv[]) -{ - struct nl_classifier_instance *inst = &nl_cls_inst; - unsigned char proto; - unsigned long src_addr[4]; - unsigned long dst_addr[4]; - unsigned short sport; - unsigned short dport; - int af; - int ret; - - ret = nl_classifier_parse_arg(argc, argv, &proto, src_addr, dst_addr, &sport, &dport, &af); - if (ret < 0) { - printf("Failed to parse arguments\n"); - return ret; - } - - ret = nl_classifier_init(inst); - if (ret < 0) { - printf("Unable to init generic netlink\n"); - return ret; - } - - nl_classifier_offload(inst, proto, src_addr, dst_addr, sport, dport, af); - - /* main loop to listen on message */ - while (!inst->stop) { - nl_recvmsgs_default(inst->sock); - } - - nl_classifier_exit(inst); - - return 0; -} diff --git a/fast-classifier/userspace_example.c b/fast-classifier/userspace_example.c deleted file mode 100644 index 4f4113d99..000000000 --- a/fast-classifier/userspace_example.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include - -static struct nl_sock *sock; -static struct nl_sock *sock_event; -static int family; -static int grp_id; - -static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { - [FAST_CLASSIFIER_A_TUPLE] = { .type = NLA_UNSPEC }, -}; - -void dump_fc_tuple(struct fast_classifier_tuple *fc_msg) -{ - char src_str[INET_ADDRSTRLEN]; - char dst_str[INET_ADDRSTRLEN]; - - printf("TUPLE: %d, %s, %s, %d, %d" - " SMAC=%02x:%02x:%02x:%02x:%02x:%02x", - " DMAC=%02x:%02x:%02x:%02x:%02x:%02x\n", - fc_msg->proto, - inet_ntop(AF_INET, - &fc_msg->src_saddr.in.s_addr, - src_str, - INET_ADDRSTRLEN), - inet_ntop(AF_INET, - &fc_msg->dst_saddr.in.s_addr, - dst_str, - INET_ADDRSTRLEN), - fc_msg->sport, fc_msg->dport, - fc_msg->smac[0], fc_msg->smac[1], fc_msg->smac[2], - fc_msg->smac[3], fc_msg->smac[4], fc_msg->smac[5], - fc_msg->dmac[0], fc_msg->dmac[1], fc_msg->dmac[2], - fc_msg->dmac[3], fc_msg->dmac[4], fc_msg->dmac[5]); -} - -static int parse_cb(struct nl_msg *msg, void *arg) -{ - struct nlmsghdr *nlh = nlmsg_hdr(msg); - struct genlmsghdr *gnlh = nlmsg_data(nlh); - struct nlattr *attrs[FAST_CLASSIFIER_A_MAX]; - - genlmsg_parse(nlh, 0, attrs, FAST_CLASSIFIER_A_MAX, fast_classifier_genl_policy); - - switch (gnlh->cmd) { - case FAST_CLASSIFIER_C_OFFLOADED: - printf("Got a offloaded message\n"); - dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); - return NL_OK; - case FAST_CLASSIFIER_C_DONE: - printf("Got a done message\n"); - dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); - return NL_OK; - } - - return NL_SKIP; -} - -int fast_classifier_init(void) -{ - int err; - - sock = nl_socket_alloc(); - if (!sock) { - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(sock); - - sock_event = nl_socket_alloc(); - if (!sock_event) { - nl_close(sock); - nl_socket_free(sock); - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(sock_event); - - family = genl_ctrl_resolve(sock, FAST_CLASSIFIER_GENL_NAME); - if (family < 0) { - nl_close(sock_event); - nl_close(sock); - nl_socket_free(sock); - nl_socket_free(sock_event); - printf("Unable to resolve family\n"); - return -1; - } - - grp_id = genl_ctrl_resolve_grp(sock, FAST_CLASSIFIER_GENL_NAME, - FAST_CLASSIFIER_GENL_MCGRP); - if (grp_id < 0) { - printf("Unable to resolve mcast group\n"); - return -1; - } - - err = nl_socket_add_membership(sock_event, grp_id); - if (err < 0) { - printf("Unable to add membership\n"); - return -1; - } - - nl_socket_disable_seq_check(sock_event); - nl_socket_modify_cb(sock_event, NL_CB_VALID, NL_CB_CUSTOM, parse_cb, NULL); - - return 0; -} - -void fast_classifier_close(void) -{ - nl_close(sock_event); - nl_close(sock); - nl_socket_free(sock_event); - nl_socket_free(sock); -} - -void fast_classifier_ipv4_offload(unsigned char proto, unsigned long src_saddr, - unsigned long dst_saddr, unsigned short sport, - unsigned short dport) -{ - struct nl_msg *msg; - int ret; -#ifdef DEBUG - char src_str[INET_ADDRSTRLEN]; - char dst_str[INET_ADDRSTRLEN]; -#endif - struct fast_classifier_tuple fc_msg; - -#ifdef DEBUG - printf("DEBUG: would offload: %d, %s, %s, %d, %d\n", proto, - inet_ntop(AF_INET, &src_saddr, src_str, INET_ADDRSTRLEN), - inet_ntop(AF_INET, &dst_saddr, dst_str, INET_ADDRSTRLEN), - sport, dport); -#endif - - fc_msg.proto = proto; - fc_msg.src_saddr.in.s_addr = src_saddr; - fc_msg.dst_saddr.in.s_addr = dst_saddr; - fc_msg.sport = sport; - fc_msg.dport = dport; - fc_msg.smac[0] = 'a'; - fc_msg.smac[1] = 'b'; - fc_msg.smac[2] = 'c'; - fc_msg.smac[3] = 'd'; - fc_msg.smac[4] = 'e'; - fc_msg.smac[5] = 'f'; - fc_msg.dmac[0] = 'f'; - fc_msg.dmac[1] = 'e'; - fc_msg.dmac[2] = 'd'; - fc_msg.dmac[3] = 'c'; - fc_msg.dmac[4] = 'b'; - fc_msg.dmac[5] = 'a'; - - if (fast_classifier_init() < 0) { - printf("Unable to init generic netlink\n"); - exit(1); - } - - msg = nlmsg_alloc(); - if (!msg) { - nl_socket_free(sock); - printf("Unable to allocate message\n"); - return; - } - - genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, family, - FAST_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, - FAST_CLASSIFIER_C_OFFLOAD, FAST_CLASSIFIER_GENL_VERSION); - nla_put(msg, 1, sizeof(fc_msg), &fc_msg); - - ret = nl_send_auto_complete(sock, msg); - - nlmsg_free(msg); - if (ret < 0) { - printf("nlmsg_free failed"); - nl_close(sock); - nl_socket_free(sock); - return; - } - - ret = nl_wait_for_ack(sock); - if (ret < 0) { - printf("wait for ack failed"); - nl_close(sock); - nl_socket_free(sock); - return; - } -} - -void fast_classifier_listen_for_messages(void) -{ - printf("waiting for netlink events\n"); - - while (1) { - nl_recvmsgs_default(sock_event); - } -} - -int main(int argc, char *argv[]) -{ - if (fast_classifier_init() < 0) { - printf("Unable to init generic netlink\n"); - exit(1); - } - - fast_classifier_ipv4_offload('a', 0, 0, 0, 0); - - /* this never returns */ - fast_classifier_listen_for_messages(); - - fast_classifier_close(); - - return 0; -} diff --git a/luci-app-status/luasrc/controller/wan.lua b/luci-app-status/luasrc/controller/wan.lua index 667da64b8..b1b6eddae 100755 --- a/luci-app-status/luasrc/controller/wan.lua +++ b/luci-app-status/luasrc/controller/wan.lua @@ -213,9 +213,9 @@ function wizard_add() local channel = luci.http.formvalue("cbid.wifi.%s.channel" % wifi_intf) or "" local name = luci.http.formvalue("cbid.wifi.%s.name" % wifi_intf) or "" local key = luci.http.formvalue("cbid.wifi.%s.key" % wifi_intf) or "" - ucic:set("wireles",wifi_intf,"channel",channel) - ucic:set("wireles","default_" .. wifi_intf,"ssid",name) - ucic:set("wireles","default_" .. wifi_intf,"key",key) + ucic:set("wireless",wifi_intf,"channel",channel) + ucic:set("wireless","default_" .. wifi_intf,"ssid",name) + ucic:set("wireless","default_" .. wifi_intf,"key",key) end ucic:save("wireless") ucic:commit("wireless") diff --git a/shortcut-fe/.gitignore b/shortcut-fe/.gitignore deleted file mode 100644 index 958088547..000000000 --- a/shortcut-fe/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -# Ouptut files - -*.o -*.s - diff --git a/shortcut-fe/Kconfig b/shortcut-fe/Kconfig deleted file mode 100644 index 487f1e065..000000000 --- a/shortcut-fe/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# -# Shortcut forwarding engine -# - -config SHORTCUT_FE - tristate "Shortcut Forwarding Engine" - depends on NF_CONNTRACK - ---help--- - Shortcut is a fast in-kernel packet forwarding engine. - - To compile this code as a module, choose M here: the module will be - called shortcut-fe. - - If unsure, say N. diff --git a/shortcut-fe/Makefile b/shortcut-fe/Makefile deleted file mode 100644 index 3b1ceaa44..000000000 --- a/shortcut-fe/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# -# Makefile for Shortcut FE. -# - -obj-m += shortcut-fe.o - -ifdef SFE_SUPPORT_IPV6 -obj-m += shortcut-fe-ipv6.o -endif - -obj-m += shortcut-fe-cm.o - -shortcut-fe-objs := \ - sfe_ipv4.o - -ifdef SFE_SUPPORT_IPV6 -shortcut-fe-ipv6-objs := \ - sfe_ipv6.o -endif - -shortcut-fe-cm-objs := \ - sfe_cm.o - diff --git a/shortcut-fe/README b/shortcut-fe/README deleted file mode 100644 index 1bf1cc255..000000000 --- a/shortcut-fe/README +++ /dev/null @@ -1,122 +0,0 @@ -Shortcut Forwarding Engine --------------------------- - -Welcome to "Shortcut" :-) - -Here's a quick FAQ: - - -Q) What is Shortcut? - -A) Shortcut is an in-Linux-kernel IP packet forwarding engine. It's designed -to offer very high speed IP packet forwarding based on IP connection tracking. -It's dramatically faster than the standard netfilter-based NAT forwarding path -but is designed to synchronise state back to netfilter/conntrack so that it -doesn't need to deal with all of the complexities of special cases. - - -Q) What versions of IP does it support? - -A) The current version only supports IPv4 but will be extended to support IPv6 in -the future. - - -Q) What transport protocols does it support? - -A) TCP and UDP. It also knows enough about ICMP to spot ICMP error messages -related to TCP and UDP and handle things accordingly. - - -Q) Is there a design spec for this software? - -A) Not at the moment. I'll write one when I get more time. The code is -intended to be a good tutorial though - it's very heavily commented. If you -find yourself reading something and not understanding it then I take that to -mean I've probably not done a sufficently good job of explaining what it's -doing in the comments. Let me know - I will try to fix it :-) - - -Q) Why was it written? - -A) It was written as a demonstration of what can be done to provide high -performance forwarding inside the kernel. There were two initial motivations: - -1) To provide a platform to enable research into how QoS analysis systems can -offload work and avoid huge Linux overheads. - -2) To provide a tool to investigate the behaviour of various processors, SoCs -and software sets so that we can characterize and design new network processor -SoCs. - - -Q) How much faster is it than the Linux kernel forwarding path? - -A) At the time of pushing this to github it's been tested on a QCA AP135. -This has a Scorpion (QCA Scopion, not the QMC one :-)) SoC, QCA9550. The -SoC's processor is a MIPS74K running at 720 MHz and with a DDR2 memory -subsystem that offers a peak of 600 MT/s (16-bit transfers). - -Running IPv4 NAT forwarding of UDP between the board's 2 GMAC ports and -using a SmartBits 200 as a traffic generator Linux is able to forward 70k PPS. -Once the SFE code is invoked this will increase to 350k PPS! - -There's also a slightly hacky mode which causes SFE to bypass the Linux -bridge layer, but this isn't really ready for use because it doesn't have -sufficient MAC address checks or integration of statistics back to the -Ethernet bridge, but that runs at 436k PPS. - - -Q) Are there any diagnostics? - -A) Yes, this is a research tool after all! There's a complex way to do this -that's more general purpose and a simple one - here's the simple one: - - mknod /dev/sfe c 253 0 - -The file /dev/sfe is an XML-ish output and provides details of all the -network connections currently being offloaded. It also reports the numbers -of packets that took various "exception" paths within the code. In addition -it provides a summary of the number of connections, attempts to accelerate -connections, cancel accelerations, etc. It also reports the numbers of -packets that were forwarded and not forwarded by the engine and has some -stats on the effectiveness of the hashing algorithm it uses. - - -Q) How does the code interact with Linux? - -A) There are four minor patches required to make this software run with -Linux. These are currently against a 3.3.8 or 3.4.0 kernel: - -* (net/core/dev.c) adds a hook to allow packets to be extracted out. - -* (net/netfilter/nf_conntrack_proto_tcp.c) exposes a state variable inside - netfilter that's necessary to enable TCP sequence and ACK checking within - the offload path. Note that this specific patch is against the QCA QSDK - patched version of 3.3.8 - there's a slightly braindead "performance" - patch in that kernel, courtesy of the OpenWrt community that makes the - Linux forwarding path slightly faster at the expense of losing - functionality :-( - -* (net/Kconfig) adds the shortcut-fe option. - -* (net/Makefile) adds the shortcut-fe build support. - -Once these are applied and the module is loaded then everything else -is automatic :-) The patches are in this git repo. - - -Q) Are any of the pieces reused from other projects? - -A) Yes! Some of the forwarding concepts are reused from the Ubicom Network -Accelerator that morphed into part of the Akronite NSS. This code has all -been substantially changed though to accomodate Linux's needs. - -There are also some pieces that I borrowed from the QCA "FastNAT" software -written by Xiaoping Fan . Xiaoping's code was the -first actual demonstration within QCA that this in-kernel concept could yield -signficant performance gains. - - -Enjoy! -Dave Hudson - diff --git a/shortcut-fe/fast-classifier/fast-classifier.c b/shortcut-fe/fast-classifier/fast-classifier.c deleted file mode 100644 index d79404cba..000000000 --- a/shortcut-fe/fast-classifier/fast-classifier.c +++ /dev/null @@ -1,1892 +0,0 @@ -/* - * fast-classifier.c - * Shortcut forwarding engine connection manager. - * fast-classifier - * - * Copyright (c) 2013-2018 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include "fast-classifier.h" - -typedef enum fast_classifier_exception { - FAST_CL_EXCEPTION_PACKET_BROADCAST, - FAST_CL_EXCEPTION_PACKET_MULTICAST, - FAST_CL_EXCEPTION_NO_IIF, - FAST_CL_EXCEPTION_NO_CT, - FAST_CL_EXCEPTION_CT_NO_TRACK, - FAST_CL_EXCEPTION_CT_NO_CONFIRM, - FAST_CL_EXCEPTION_CT_IS_ALG, - FAST_CL_EXCEPTION_IS_IPV4_MCAST, - FAST_CL_EXCEPTION_IS_IPV6_MCAST, - FAST_CL_EXCEPTION_TCP_NOT_ASSURED, - FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED, - FAST_CL_EXCEPTION_UNKNOW_PROTOCOL, - FAST_CL_EXCEPTION_NO_SRC_DEV, - FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV, - FAST_CL_EXCEPTION_NO_DEST_DEV, - FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV, - FAST_CL_EXCEPTION_NO_BRIDGE, - FAST_CL_EXCEPTION_LOCAL_OUT, - FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION, - FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL, - FAST_CL_EXCEPTION_CT_DESTROY_MISS, - FAST_CL_EXCEPTION_MAX -} fast_classifier_exception_t; - -static char *fast_classifier_exception_events_string[FAST_CL_EXCEPTION_MAX] = { - "PACKET_BROADCAST", - "PACKET_MULTICAST", - "NO_IIF", - "NO_CT", - "CT_NO_TRACK", - "CT_NO_CONFIRM", - "CT_IS_ALG", - "IS_IPV4_MCAST", - "IS_IPV6_MCAST", - "TCP_NOT_ASSURED", - "TCP_NOT_ESTABLISHED", - "UNKNOW_PROTOCOL", - "NO_SRC_DEV", - "NO_SRC_XLATE_DEV", - "NO_DEST_DEV", - "NO_DEST_XLATE_DEV", - "NO_BRIDGE", - "LOCAL_OUT", - "WAIT_FOR_ACCELERATION", - "UPDATE_PROTOCOL_FAIL", - "CT_DESTROY_MISS", -}; - -/* - * Per-module structure. - */ -struct fast_classifier { - spinlock_t lock; /* Lock for SMP correctness */ - - /* - * Control state. - */ - struct kobject *sys_fast_classifier; /* sysfs linkage */ - - /* - * Callback notifiers. - */ - struct notifier_block dev_notifier; /* Device notifier */ - struct notifier_block inet_notifier; /* IPv4 notifier */ - struct notifier_block inet6_notifier; /* IPv6 notifier */ - u32 exceptions[FAST_CL_EXCEPTION_MAX]; -}; - -static struct fast_classifier __sc; - -static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { - [FAST_CLASSIFIER_A_TUPLE] = { - .type = NLA_UNSPEC, - .len = sizeof(struct fast_classifier_tuple) - }, -}; - -static struct genl_multicast_group fast_classifier_genl_mcgrp[] = { - { - .name = FAST_CLASSIFIER_GENL_MCGRP, - }, -}; - -static struct genl_family fast_classifier_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE, - .name = FAST_CLASSIFIER_GENL_NAME, - .version = FAST_CLASSIFIER_GENL_VERSION, - .maxattr = FAST_CLASSIFIER_A_MAX, -}; - -static int fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info); -static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, struct netlink_callback *cb); - -static struct genl_ops fast_classifier_gnl_ops[] = { - { - .cmd = FAST_CLASSIFIER_C_OFFLOAD, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = fast_classifier_offload_genl_msg, - .dumpit = NULL, - }, - { - .cmd = FAST_CLASSIFIER_C_OFFLOADED, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = NULL, - .dumpit = fast_classifier_nl_genl_msg_DUMP, - }, - { - .cmd = FAST_CLASSIFIER_C_DONE, - .flags = 0, - .policy = fast_classifier_genl_policy, - .doit = NULL, - .dumpit = fast_classifier_nl_genl_msg_DUMP, - }, -}; - -static atomic_t offload_msgs = ATOMIC_INIT(0); -static atomic_t offload_no_match_msgs = ATOMIC_INIT(0); -static atomic_t offloaded_msgs = ATOMIC_INIT(0); -static atomic_t done_msgs = ATOMIC_INIT(0); - -static atomic_t offloaded_fail_msgs = ATOMIC_INIT(0); -static atomic_t done_fail_msgs = ATOMIC_INIT(0); - -/* - * Accelerate incoming packets destined for bridge device - * If a incoming packet is ultimatly destined for - * a bridge device we will first see the packet coming - * from the phyiscal device, we can skip straight to - * processing the packet like it came from the bridge - * for some more performance gains - * - * This only works when the hook is above the bridge. We - * only implement ingress for now, because for egress we - * want to have the bridge devices qdiscs be used. - */ -static bool skip_to_bridge_ingress; - -/* - * fast_classifier_incr_exceptions() - * increase an exception counter. - */ -static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t except) -{ - struct fast_classifier *sc = &__sc; - - spin_lock_bh(&sc->lock); - sc->exceptions[except]++; - spin_unlock_bh(&sc->lock); -} - -/* - * fast_classifier_recv() - * Handle packet receives. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int fast_classifier_recv(struct sk_buff *skb) -{ - struct net_device *dev; - struct net_device *master_dev = NULL; - int ret = 0; - - /* - * We know that for the vast majority of packets we need the transport - * layer header so we may as well start to fetch it now! - */ - prefetch(skb->data + 32); - barrier(); - - dev = skb->dev; - - /* - * Process packet like it arrived on the bridge device - */ - if (skip_to_bridge_ingress && - (dev->priv_flags & IFF_BRIDGE_PORT)) { - master_dev = sfe_dev_get_master(dev); - if (!master_dev) { - DEBUG_WARN("master dev is NULL %s\n", dev->name); - goto rx_exit; - } - dev = master_dev; - } - - /* - * We're only interested in IPv4 and IPv6 packets. - */ - if (likely(htons(ETH_P_IP) == skb->protocol)) { - struct in_device *in_dev; - - /* - * Does our input device support IP processing? - */ - in_dev = (struct in_device *)dev->ip_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IP processing for device: %s\n", dev->name); - goto rx_exit; - } - - /* - * Does it have an IP address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(!in_dev->ifa_list)) { - DEBUG_TRACE("no IP address for device: %s\n", dev->name); - goto rx_exit; - } - - ret = sfe_ipv4_recv(dev, skb); - - } else if (likely(htons(ETH_P_IPV6) == skb->protocol)) { - struct inet6_dev *in_dev; - - /* - * Does our input device support IPv6 processing? - */ - in_dev = (struct inet6_dev *)dev->ip6_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); - goto rx_exit; - } - - /* - * Does it have an IPv6 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(list_empty(&in_dev->addr_list))) { - DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); - goto rx_exit; - } - - ret = sfe_ipv6_recv(dev, skb); - - } else { - DEBUG_TRACE("not IP packet\n"); - } - -rx_exit: - if (master_dev) { - dev_put(master_dev); - } - - return ret; -} - -/* - * fast_classifier_find_dev_and_mac_addr() - * Find the device and MAC address for a given IPv4 address. - * - * Returns true if we find the device and MAC address, otherwise false. - * - * We look up the rtable entry for the address and, from its neighbour - * structure, obtain the hardware address. This means this function also - * works if the neighbours are routers too. - */ -static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4) -{ - struct neighbour *neigh; - struct rtable *rt; - struct rt6_info *rt6; - struct dst_entry *dst; - struct net_device *mac_dev; - - /* - * Look up the rtable entry for the IP address then get the hardware - * address from its neighbour structure. This means this works when the - * neighbours are routers too. - */ - if (likely(is_v4)) { - rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); - if (unlikely(IS_ERR(rt))) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt; - } else { - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); - if (!rt6) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt6; - } - - rcu_read_lock(); - neigh = sfe_dst_get_neighbour(dst, addr); - if (unlikely(!neigh)) { - rcu_read_unlock(); - dst_release(dst); - goto ret_fail; - } - - if (unlikely(!(neigh->nud_state & NUD_VALID))) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - mac_dev = neigh->dev; - if (!mac_dev) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); - - dev_hold(mac_dev); - *dev = mac_dev; - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - - return true; - -ret_fail: - if (is_v4) { - DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", addr); - - } else { - DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr); - } - - return false; -} - -static DEFINE_SPINLOCK(sfe_connections_lock); - -struct sfe_connection { - struct hlist_node hl; - struct sfe_connection_create *sic; - struct nf_conn *ct; - int hits; - int offload_permit; - int offloaded; - bool is_v4; - unsigned char smac[ETH_ALEN]; - unsigned char dmac[ETH_ALEN]; -}; - -static int sfe_connections_size; - -#define FC_CONN_HASH_ORDER 13 -static DEFINE_HASHTABLE(fc_conn_ht, FC_CONN_HASH_ORDER); - -static u32 fc_conn_hash(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, bool is_v4) -{ - u32 idx, cnt = ((is_v4 ? sizeof(saddr->ip) : sizeof(saddr->ip6))/sizeof(u32)); - u32 hash = 0; - - for (idx = 0; idx < cnt; idx++) { - hash ^= ((u32 *)saddr)[idx] ^ ((u32 *)daddr)[idx]; - } - - return hash ^ (sport | (dport << 16)); -} - -/* - * fast_classifier_update_protocol() - * Update sfe_ipv4_create struct with new protocol information before we offload - */ -static int fast_classifier_update_protocol(struct sfe_connection_create *p_sic, struct nf_conn *ct) -{ - switch (p_sic->protocol) { - case IPPROTO_TCP: - p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale; - p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; - p_sic->src_td_end = ct->proto.tcp.seen[0].td_end; - p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend; - p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; - p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; - p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end; - p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; - - if (nf_ct_tcp_no_window_check - || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) - || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { - p_sic->flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - - /* - * If the connection is shutting down do not manage it. - * state can not be SYN_SENT, SYN_RECV because connection is assured - * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. - */ - spin_lock(&ct->lock); - if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { - spin_unlock(&ct->lock); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED); - DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", - ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port), - &p_sic->dest_ip, ntohs(p_sic->dest_port)); - return 0; - } - spin_unlock(&ct->lock); - break; - - case IPPROTO_UDP: - break; - - default: - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol); - return 0; - } - - return 1; -} - -/* fast_classifier_send_genl_msg() - * Function to send a generic netlink message - */ -static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple *fc_msg) -{ - struct sk_buff *skb; - int rc; - int buf_len; - int total_len; - void *msg_head; - - /* - * Calculate our packet payload size. - * Start with our family header. - */ - buf_len = fast_classifier_gnl_family.hdrsize; - - /* - * Add the nla_total_size of each attribute we're going to nla_put(). - */ - buf_len += nla_total_size(sizeof(*fc_msg)); - - /* - * Lastly we need to add space for the NL message header since - * genlmsg_new only accounts for the GENL header and not the - * outer NL header. To do this, we use a NL helper function which - * calculates the total size of a netlink message given a payload size. - * Note this value does not include the GENL header, but that's - * added automatically by genlmsg_new. - */ - total_len = nlmsg_total_size(buf_len); - skb = genlmsg_new(total_len, GFP_ATOMIC); - if (!skb) - return; - - msg_head = genlmsg_put(skb, 0, 0, &fast_classifier_gnl_family, 0, msg); - if (!msg_head) { - nlmsg_free(skb); - return; - } - - rc = nla_put(skb, FAST_CLASSIFIER_A_TUPLE, sizeof(struct fast_classifier_tuple), fc_msg); - if (rc != 0) { - genlmsg_cancel(skb, msg_head); - nlmsg_free(skb); - return; - } - -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 19 , 0)) - rc = genlmsg_end(skb, msg_head); - if (rc < 0) { - genlmsg_cancel(skb, msg_head); - nlmsg_free(skb); - return; - } -#else - genlmsg_end(skb, msg_head); - -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) - rc = genlmsg_multicast(&fast_classifier_gnl_family, skb, 0, 0, GFP_ATOMIC); -#else - rc = genlmsg_multicast(skb, 0, fast_classifier_genl_mcgrp[0].id, GFP_ATOMIC); -#endif - switch (msg) { - case FAST_CLASSIFIER_C_OFFLOADED: - if (rc == 0) { - atomic_inc(&offloaded_msgs); - } else { - atomic_inc(&offloaded_fail_msgs); - } - break; - case FAST_CLASSIFIER_C_DONE: - if (rc == 0) { - atomic_inc(&done_msgs); - } else { - atomic_inc(&done_fail_msgs); - } - break; - default: - DEBUG_ERROR("fast-classifer: Unknown message type sent!\n"); - break; - } - - DEBUG_TRACE("Notify NL message %d ", msg); - if (fc_msg->ethertype == AF_INET) { - DEBUG_TRACE("sip=%pI4 dip=%pI4 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); - } else { - DEBUG_TRACE("sip=%pI6 dip=%pI6 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); - } - DEBUG_TRACE("protocol=%d sport=%d dport=%d smac=%pM dmac=%pM\n", - fc_msg->proto, fc_msg->sport, fc_msg->dport, fc_msg->smac, fc_msg->dmac); -} - -/* - * fast_classifier_find_conn() - * find a connection object in the hash table - * @pre the sfe_connection_lock must be held before calling this function - */ -static struct sfe_connection * -fast_classifier_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, - unsigned char proto, bool is_v4) -{ - struct sfe_connection_create *p_sic; - struct sfe_connection *conn; - u32 key; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == sport && - p_sic->dest_port == dport && - sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip, daddr, is_v4)) { - return conn; - } - } - - DEBUG_TRACE("connection not found\n"); - return NULL; -} - -/* - * fast_classifier_sb_find_conn() - * find a connection object in the hash table according to information of packet - * if not found, reverse the tuple and try again. - * @pre the sfe_connection_lock must be held before calling this function - */ -static struct sfe_connection * -fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, - unsigned short sport, unsigned short dport, - unsigned char proto, bool is_v4) -{ - struct sfe_connection_create *p_sic; - struct sfe_connection *conn; - u32 key; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == sport && - p_sic->dest_port_xlate == dport && - sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip_xlate, daddr, is_v4)) { - return conn; - } - } - - /* - * Reverse the tuple and try again - */ - key = fc_conn_hash(daddr, saddr, dport, sport, is_v4); - - sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { - if (conn->is_v4 != is_v4) { - continue; - } - - p_sic = conn->sic; - - if (p_sic->protocol == proto && - p_sic->src_port == dport && - p_sic->dest_port_xlate == sport && - sfe_addr_equal(&p_sic->src_ip, daddr, is_v4) && - sfe_addr_equal(&p_sic->dest_ip_xlate, saddr, is_v4)) { - return conn; - } - } - - DEBUG_TRACE("connection not found\n"); - return NULL; -} - -/* - * fast_classifier_add_conn() - * add a connection object in the hash table if no duplicate - * @conn connection to add - * @return conn if successful, NULL if duplicate - */ -static struct sfe_connection * -fast_classifier_add_conn(struct sfe_connection *conn) -{ - struct sfe_connection_create *sic = conn->sic; - u32 key; - - spin_lock_bh(&sfe_connections_lock); - if (fast_classifier_find_conn(&sic->src_ip, &sic->dest_ip, sic->src_port, - sic->dest_port, sic->protocol, conn->is_v4)) { - spin_unlock_bh(&sfe_connections_lock); - return NULL; - } - - key = fc_conn_hash(&sic->src_ip, &sic->dest_ip, - sic->src_port, sic->dest_port, conn->is_v4); - - hash_add(fc_conn_ht, &conn->hl, key); - sfe_connections_size++; - spin_unlock_bh(&sfe_connections_lock); - - DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", sfe_connections_size); - - if (conn->is_v4) { - DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); - } else { - DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); - } - - return conn; -} - -/* - * fast_classifier_offload_genl_msg() - * Called from user space to offload a connection - */ -static int -fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *na; - struct fast_classifier_tuple *fc_msg; - struct sfe_connection *conn; - - na = info->attrs[FAST_CLASSIFIER_A_TUPLE]; - fc_msg = nla_data(na); - - if (fc_msg->ethertype == AF_INET) { - DEBUG_TRACE("want to offload: %d-%d, %pI4, %pI4, %d, %d SMAC=%pM DMAC=%pM\n", - fc_msg->ethertype, - fc_msg->proto, - &fc_msg->src_saddr, - &fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->smac, - fc_msg->dmac); - } else { - DEBUG_TRACE("want to offload: %d-%d, %pI6, %pI6, %d, %d SMAC=%pM DMAC=%pM\n", - fc_msg->ethertype, - fc_msg->proto, - &fc_msg->src_saddr, - &fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->smac, - fc_msg->dmac); - } - - spin_lock_bh(&sfe_connections_lock); - conn = fast_classifier_sb_find_conn((sfe_ip_addr_t *)&fc_msg->src_saddr, - (sfe_ip_addr_t *)&fc_msg->dst_saddr, - fc_msg->sport, - fc_msg->dport, - fc_msg->proto, - (fc_msg->ethertype == AF_INET)); - if (!conn) { - spin_unlock_bh(&sfe_connections_lock); - DEBUG_TRACE("REQUEST OFFLOAD NO MATCH\n"); - atomic_inc(&offload_no_match_msgs); - return 0; - } - - conn->offload_permit = 1; - spin_unlock_bh(&sfe_connections_lock); - atomic_inc(&offload_msgs); - - DEBUG_TRACE("INFO: calling sfe rule creation!\n"); - return 0; -} - -/* - * fast_classifier_nl_genl_msg_DUMP() - * ignore fast_classifier_messages OFFLOADED and DONE - */ -static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return 0; -} - -/* auto offload connection once we have this many packets*/ -static int offload_at_pkts = 128; - -/* - * fast_classifier_post_routing() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4) -{ - int ret; - struct sfe_connection_create sic; - struct sfe_connection_create *p_sic; - struct net_device *in; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct net_device *dev; - struct net_device *src_dev; - struct net_device *dest_dev; - struct net_device *src_dev_tmp; - struct net_device *dest_dev_tmp; - struct net_device *src_br_dev = NULL; - struct net_device *dest_br_dev = NULL; - struct nf_conntrack_tuple orig_tuple; - struct nf_conntrack_tuple reply_tuple; - struct sfe_connection *conn; - - /* - * Don't process broadcast or multicast packets. - */ - if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_BROADCAST); - DEBUG_TRACE("broadcast, ignoring\n"); - return NF_ACCEPT; - } - if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_MULTICAST); - DEBUG_TRACE("multicast, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process packets that are not being forwarded. - */ - in = dev_get_by_index(&init_net, skb->skb_iif); - if (!in) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_IIF); - DEBUG_TRACE("packet not forwarding\n"); - return NF_ACCEPT; - } - - dev_put(in); - - /* - * Don't process packets that aren't being tracked by conntrack. - */ - ct = nf_ct_get(skb, &ctinfo); - if (unlikely(!ct)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_CT); - DEBUG_TRACE("no conntrack connection, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process untracked connections. - */ - if (unlikely(nf_ct_is_untracked(ct))) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_TRACK); - DEBUG_TRACE("untracked connection\n"); - return NF_ACCEPT; - } - - /* - * Unconfirmed connection may be dropped by Linux at the final step, - * So we don't process unconfirmed connections. - */ - if (!nf_ct_is_confirmed(ct)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_CONFIRM); - DEBUG_TRACE("unconfirmed connection\n"); - return NF_ACCEPT; - } - - /* - * Don't process connections that require support from a 'helper' (typically a NAT ALG). - */ - if (unlikely(nfct_help(ct))) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_IS_ALG); - DEBUG_TRACE("connection has helper\n"); - return NF_ACCEPT; - } - - memset(&sic, 0, sizeof(sic)); - - /* - * Look up the details of our connection in conntrack. - * - * Note that the data we get from conntrack is for the "ORIGINAL" direction - * but our packet may actually be in the "REPLY" direction. - */ - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - sic.protocol = (s32)orig_tuple.dst.protonum; - - sic.flags = 0; - - /* - * Get addressing information, non-NAT first - */ - if (likely(is_v4)) { - u32 dscp; - - sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV4_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; - sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; - - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } else { - u32 dscp; - - sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || - ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV6_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); - sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); - - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } - - switch (sic.protocol) { - case IPPROTO_TCP: - sic.src_port = orig_tuple.src.u.tcp.port; - sic.dest_port = orig_tuple.dst.u.tcp.port; - sic.src_port_xlate = reply_tuple.dst.u.tcp.port; - sic.dest_port_xlate = reply_tuple.src.u.tcp.port; - - /* - * Don't try to manage a non-established connection. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ASSURED); - DEBUG_TRACE("non-established connection\n"); - return NF_ACCEPT; - } - - break; - - case IPPROTO_UDP: - sic.src_port = orig_tuple.src.u.udp.port; - sic.dest_port = orig_tuple.dst.u.udp.port; - sic.src_port_xlate = reply_tuple.dst.u.udp.port; - sic.dest_port_xlate = reply_tuple.src.u.udp.port; - break; - - default: - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - sic.original_accel = 1; - sic.reply_accel = 1; -#endif - - /* - * Get QoS information - */ - if (skb->priority) { - sic.dest_priority = skb->priority; - sic.src_priority = sic.dest_priority; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - if (is_v4) { - DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); - } else { - DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); - } - - /* - * If we already have this connection in our list, skip it - * XXX: this may need to be optimized - */ - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port, sic.protocol, is_v4); - if (conn) { - conn->hits++; - - if (!conn->offloaded) { - if (conn->offload_permit || conn->hits >= offload_at_pkts) { - DEBUG_TRACE("OFFLOADING CONNECTION, TOO MANY HITS\n"); - - if (fast_classifier_update_protocol(conn->sic, conn->ct) == 0) { - spin_unlock_bh(&sfe_connections_lock); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL); - DEBUG_TRACE("UNKNOWN PROTOCOL OR CONNECTION CLOSING, SKIPPING\n"); - return NF_ACCEPT; - } - - DEBUG_TRACE("INFO: calling sfe rule creation!\n"); - spin_unlock_bh(&sfe_connections_lock); - - ret = is_v4 ? sfe_ipv4_create_rule(conn->sic) : sfe_ipv6_create_rule(conn->sic); - if ((ret == 0) || (ret == -EADDRINUSE)) { - struct fast_classifier_tuple fc_msg; - - if (is_v4) { - fc_msg.ethertype = AF_INET; - fc_msg.src_saddr.in = *((struct in_addr *)&sic.src_ip); - fc_msg.dst_saddr.in = *((struct in_addr *)&sic.dest_ip_xlate); - } else { - fc_msg.ethertype = AF_INET6; - fc_msg.src_saddr.in6 = *((struct in6_addr *)&sic.src_ip); - fc_msg.dst_saddr.in6 = *((struct in6_addr *)&sic.dest_ip_xlate); - } - - fc_msg.proto = sic.protocol; - fc_msg.sport = sic.src_port; - fc_msg.dport = sic.dest_port_xlate; - memcpy(fc_msg.smac, conn->smac, ETH_ALEN); - memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); - fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_OFFLOADED, &fc_msg); - conn->offloaded = 1; - } - - return NF_ACCEPT; - } - } - - spin_unlock_bh(&sfe_connections_lock); - if (conn->offloaded) { - is_v4 ? sfe_ipv4_update_rule(conn->sic) : sfe_ipv6_update_rule(conn->sic); - } - - DEBUG_TRACE("FOUND, SKIPPING\n"); - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION); - return NF_ACCEPT; - } - - spin_unlock_bh(&sfe_connections_lock); - - /* - * Get the net device and MAC addresses that correspond to the various source and - * destination host addresses. - */ - if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_DEV); - return NF_ACCEPT; - } - src_dev = src_dev_tmp; - - if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV); - goto done1; - } - dev_put(dev); - - if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_DEV); - goto done1; - } - dev_put(dev); - - if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV); - goto done1; - } - dest_dev = dest_dev_tmp; - - /* - * Our devices may actually be part of a bridge interface. If that's - * the case then find the bridge interface instead. - */ - if (src_dev->priv_flags & IFF_BRIDGE_PORT) { - src_br_dev = sfe_dev_get_master(src_dev); - if (!src_br_dev) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); - goto done2; - } - src_dev = src_br_dev; - } - - if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { - dest_br_dev = sfe_dev_get_master(dest_dev); - if (!dest_br_dev) { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); - goto done3; - } - dest_dev = dest_br_dev; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = src_dev->mtu; - sic.dest_mtu = dest_dev->mtu; - - if (skb->mark) { - DEBUG_TRACE("SKB MARK NON ZERO %x\n", skb->mark); - } - sic.mark = skb->mark; - - conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) { - printk(KERN_CRIT "ERROR: no memory for sfe\n"); - goto done4; - } - conn->hits = 0; - conn->offload_permit = 0; - conn->offloaded = 0; - conn->is_v4 = is_v4; - DEBUG_TRACE("Source MAC=%pM\n", sic.src_mac); - memcpy(conn->smac, sic.src_mac, ETH_ALEN); - memcpy(conn->dmac, sic.dest_mac_xlate, ETH_ALEN); - - p_sic = kmalloc(sizeof(*p_sic), GFP_ATOMIC); - if (!p_sic) { - printk(KERN_CRIT "ERROR: no memory for sfe\n"); - kfree(conn); - goto done4; - } - - memcpy(p_sic, &sic, sizeof(sic)); - conn->sic = p_sic; - conn->ct = ct; - - if (!fast_classifier_add_conn(conn)) { - kfree(conn->sic); - kfree(conn); - } - - /* - * If we had bridge ports then release them too. - */ -done4: - if (dest_br_dev) { - dev_put(dest_br_dev); - } -done3: - if (src_br_dev) { - dev_put(src_br_dev); - } -done2: - dev_put(dest_dev_tmp); -done1: - dev_put(src_dev_tmp); - - return NF_ACCEPT; -} - -/* - * fast_classifier_ipv4_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -fast_classifier_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return fast_classifier_post_routing(skb, true); -} - -/* - * fast_classifier_ipv6_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -fast_classifier_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return fast_classifier_post_routing(skb, false); -} - -/* - * fast_classifier_update_mark() - * updates the mark for a fast-classifier connection - */ -static void fast_classifier_update_mark(struct sfe_connection_mark *mark, bool is_v4) -{ - struct sfe_connection *conn; - - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&mark->src_ip, &mark->dest_ip, - mark->src_port, mark->dest_port, - mark->protocol, is_v4); - if (conn) { - conn->sic->mark = mark->mark; - } - - spin_unlock_bh(&sfe_connections_lock); -} - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * fast_classifier_conntrack_event() - * Callback event invoked when a conntrack connection's state changes. - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static int fast_classifier_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) -#else -static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item) -#endif -{ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - struct nf_ct_event *item = ptr; -#endif - struct sfe_connection_destroy sid; - struct nf_conn *ct = item->ct; - struct nf_conntrack_tuple orig_tuple; - struct sfe_connection *conn; - struct fast_classifier_tuple fc_msg; - int offloaded = 0; - bool is_v4; - - /* - * If we don't have a conntrack entry then we're done. - */ - if (unlikely(!ct)) { - DEBUG_WARN("no ct in conntrack event callback\n"); - return NOTIFY_DONE; - } - - /* - * If this is an untracked connection then we can't have any state either. - */ - if (unlikely(nf_ct_is_untracked(ct))) { - DEBUG_TRACE("ignoring untracked conn\n"); - return NOTIFY_DONE; - } - - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - sid.protocol = (s32)orig_tuple.dst.protonum; - - /* - * Extract information from the conntrack connection. We're only interested - * in nominal connection information (i.e. we're ignoring any NAT information). - */ - if (likely(nf_ct_l3num(ct) == AF_INET)) { - sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - is_v4 = true; - } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { - sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - is_v4 = false; - } else { - DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); - return NOTIFY_DONE; - } - - switch (sid.protocol) { - case IPPROTO_TCP: - sid.src_port = orig_tuple.src.u.tcp.port; - sid.dest_port = orig_tuple.dst.u.tcp.port; - break; - - case IPPROTO_UDP: - sid.src_port = orig_tuple.src.u.udp.port; - sid.dest_port = orig_tuple.dst.u.udp.port; - break; - - default: - DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); - return NOTIFY_DONE; - } - - /* - * Check for an updated mark - */ - if ((events & (1 << IPCT_MARK)) && (ct->mark != 0)) { - struct sfe_connection_mark mark; - - mark.protocol = sid.protocol; - mark.src_ip = sid.src_ip; - mark.dest_ip = sid.dest_ip; - mark.src_port = sid.src_port; - mark.dest_port = sid.dest_port; - mark.mark = ct->mark; - - is_v4 ? sfe_ipv4_mark_rule(&mark) : sfe_ipv6_mark_rule(&mark); - fast_classifier_update_mark(&mark, is_v4); - } - - /* - * We're only interested in destroy events at this point - */ - if (unlikely(!(events & (1 << IPCT_DESTROY)))) { - DEBUG_TRACE("ignoring non-destroy event\n"); - return NOTIFY_DONE; - } - - if (is_v4) { - DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", - sid.protocol, &sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port); - } else { - DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", - sid.protocol, &sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port); - } - - spin_lock_bh(&sfe_connections_lock); - - conn = fast_classifier_find_conn(&sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port, sid.protocol, is_v4); - if (conn && conn->offloaded) { - if (is_v4) { - fc_msg.ethertype = AF_INET; - fc_msg.src_saddr.in = *((struct in_addr *)&conn->sic->src_ip); - fc_msg.dst_saddr.in = *((struct in_addr *)&conn->sic->dest_ip_xlate); - } else { - fc_msg.ethertype = AF_INET6; - fc_msg.src_saddr.in6 = *((struct in6_addr *)&conn->sic->src_ip); - fc_msg.dst_saddr.in6 = *((struct in6_addr *)&conn->sic->dest_ip_xlate); - } - - fc_msg.proto = conn->sic->protocol; - fc_msg.sport = conn->sic->src_port; - fc_msg.dport = conn->sic->dest_port_xlate; - memcpy(fc_msg.smac, conn->smac, ETH_ALEN); - memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); - offloaded = 1; - } - - if (conn) { - DEBUG_TRACE("Free connection\n"); - - hash_del(&conn->hl); - sfe_connections_size--; - kfree(conn->sic); - kfree(conn); - } else { - fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_DESTROY_MISS); - } - - spin_unlock_bh(&sfe_connections_lock); - - is_v4 ? sfe_ipv4_destroy_rule(&sid) : sfe_ipv6_destroy_rule(&sid); - - if (offloaded) { - fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_DONE, &fc_msg); - } - - return NOTIFY_DONE; -} - -/* - * Netfilter conntrack event system to monitor connection tracking changes - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static struct notifier_block fast_classifier_conntrack_notifier = { - .notifier_call = fast_classifier_conntrack_event, -}; -#else -static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = { - .fcn = fast_classifier_conntrack_event, -}; -#endif -#endif - -/* - * Structure to establish a hook into the post routing netfilter point - this - * will pick up local outbound and packets going from one interface to another. - * - * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. - * We want to examine packets after NAT translation and any ALG processing. - */ -static struct nf_hook_ops fast_classifier_ops_post_routing[] __read_mostly = { - SFE_IPV4_NF_POST_ROUTING_HOOK(__fast_classifier_ipv4_post_routing_hook), - SFE_IPV6_NF_POST_ROUTING_HOOK(__fast_classifier_ipv6_post_routing_hook), -}; - -/* - * fast_classifier_sync_rule() - * Synchronize a connection's state. - */ -static void fast_classifier_sync_rule(struct sfe_connection_sync *sis) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - SFE_NF_CONN_ACCT(acct); - - /* - * Create a tuple so as to be able to look up a connection - */ - memset(&tuple, 0, sizeof(tuple)); - tuple.src.u.all = (__be16)sis->src_port; - tuple.dst.dir = IP_CT_DIR_ORIGINAL; - tuple.dst.protonum = (u8)sis->protocol; - tuple.dst.u.all = (__be16)sis->dest_port; - - if (sis->is_v6) { - tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); - tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); - tuple.src.l3num = AF_INET6; - - DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); - } else { - tuple.src.u3.ip = sis->src_ip.ip; - tuple.dst.u3.ip = sis->dest_ip.ip; - tuple.src.l3num = AF_INET; - - DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); - } - - /* - * Update packet count for ingress on bridge device - */ - if (skip_to_bridge_ingress) { - struct rtnl_link_stats64 nlstats; - nlstats.tx_packets = 0; - nlstats.tx_bytes = 0; - - if (sis->src_dev && IFF_EBRIDGE && - (sis->src_new_packet_count || sis->src_new_byte_count)) { - nlstats.rx_packets = sis->src_new_packet_count; - nlstats.rx_bytes = sis->src_new_byte_count; - spin_lock_bh(&sfe_connections_lock); - br_dev_update_stats(sis->src_dev, &nlstats); - spin_unlock_bh(&sfe_connections_lock); - } - if (sis->dest_dev && IFF_EBRIDGE && - (sis->dest_new_packet_count || sis->dest_new_byte_count)) { - nlstats.rx_packets = sis->dest_new_packet_count; - nlstats.rx_bytes = sis->dest_new_byte_count; - spin_lock_bh(&sfe_connections_lock); - br_dev_update_stats(sis->dest_dev, &nlstats); - spin_unlock_bh(&sfe_connections_lock); - } - } - - /* - * Look up conntrack connection - */ - h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); - if (unlikely(!h)) { - DEBUG_TRACE("no connection found\n"); - return; - } - - ct = nf_ct_tuplehash_to_ctrack(h); - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); - - /* - * Only update if this is not a fixed timeout - */ - if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { - spin_lock_bh(&ct->lock); - ct->timeout.expires += sis->delta_jiffies; - spin_unlock_bh(&ct->lock); - } - - acct = nf_conn_acct_find(ct); - if (acct) { - spin_lock_bh(&ct->lock); - atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); - atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); - atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); - spin_unlock_bh(&ct->lock); - } - - switch (sis->protocol) { - case IPPROTO_TCP: - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { - ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { - ct->proto.tcp.seen[0].td_end = sis->src_td_end; - } - if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { - ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; - } - if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { - ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { - ct->proto.tcp.seen[1].td_end = sis->dest_td_end; - } - if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { - ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; - } - spin_unlock_bh(&ct->lock); - break; - } - - /* - * Release connection - */ - nf_ct_put(ct); -} - -/* - * fast_classifier_device_event() - */ -static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_inet_event() - */ -static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_inet6_event() - */ -static int fast_classifier_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * fast_classifier_get_offload_at_pkts() - */ -static ssize_t fast_classifier_get_offload_at_pkts(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", offload_at_pkts); -} - -/* - * fast_classifier_set_offload_at_pkts() - */ -static ssize_t fast_classifier_set_offload_at_pkts(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - long new; - int ret; - - ret = kstrtol(buf, 0, &new); - if (ret == -EINVAL || ((int)new != new)) - return -EINVAL; - - offload_at_pkts = new; - - return size; -} - -/* - * fast_classifier_get_debug_info() - */ -static ssize_t fast_classifier_get_debug_info(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - size_t len = 0; - struct sfe_connection *conn; - u32 i; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) - struct hlist_node *node; -#endif - - spin_lock_bh(&sfe_connections_lock); - len += scnprintf(buf, PAGE_SIZE - len, "size=%d offload=%d offload_no_match=%d" - " offloaded=%d done=%d offloaded_fail=%d done_fail=%d\n", - sfe_connections_size, - atomic_read(&offload_msgs), - atomic_read(&offload_no_match_msgs), - atomic_read(&offloaded_msgs), - atomic_read(&done_msgs), - atomic_read(&offloaded_fail_msgs), - atomic_read(&done_fail_msgs)); - sfe_hash_for_each(fc_conn_ht, i, node, conn, hl) { - len += scnprintf(buf + len, PAGE_SIZE - len, - (conn->is_v4 ? "o=%d, p=%d [%pM]:%pI4:%u %pI4:%u:[%pM] m=%08x h=%d\n" : "o=%d, p=%d [%pM]:%pI6:%u %pI6:%u:[%pM] m=%08x h=%d\n"), - conn->offloaded, - conn->sic->protocol, - conn->sic->src_mac, - &conn->sic->src_ip, - conn->sic->src_port, - &conn->sic->dest_ip, - conn->sic->dest_port, - conn->sic->dest_mac_xlate, - conn->sic->mark, - conn->hits); - } - spin_unlock_bh(&sfe_connections_lock); - - return len; -} - -/* - * fast_classifier_get_skip_bridge_ingress() - */ -static ssize_t fast_classifier_get_skip_bridge_ingress(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", skip_to_bridge_ingress); -} - -/* - * fast_classifier_set_skip_bridge_ingress() - */ -static ssize_t fast_classifier_set_skip_bridge_ingress(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - long new; - int ret; - - ret = kstrtol(buf, 0, &new); - if (ret == -EINVAL || ((int)new != new)) - return -EINVAL; - - skip_to_bridge_ingress = new ? 1 : 0; - - return size; -} - -/* - * fast_classifier_get_exceptions - * dump exception counters - */ -static ssize_t fast_classifier_get_exceptions(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int idx, len; - struct fast_classifier *sc = &__sc; - - spin_lock_bh(&sc->lock); - for (len = 0, idx = 0; idx < FAST_CL_EXCEPTION_MAX; idx++) { - if (sc->exceptions[idx]) { - len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", fast_classifier_exception_events_string[idx], sc->exceptions[idx]); - } - } - spin_unlock_bh(&sc->lock); - - return len; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute fast_classifier_offload_at_pkts_attr = - __ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts); -static const struct device_attribute fast_classifier_debug_info_attr = - __ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL); -static const struct device_attribute fast_classifier_skip_bridge_ingress = - __ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress); -static const struct device_attribute fast_classifier_exceptions_attr = - __ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL); - -/* - * fast_classifier_init() - */ -static int __init fast_classifier_init(void) -{ - struct fast_classifier *sc = &__sc; - int result = -1; - - printk(KERN_ALERT "fast-classifier: starting up\n"); - DEBUG_INFO("SFE CM init\n"); - - hash_init(fc_conn_ht); - - /* - * Create sys/fast_classifier - */ - sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL); - if (!sc->sys_fast_classifier) { - DEBUG_ERROR("failed to register fast_classifier\n"); - goto exit1; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - if (result) { - DEBUG_ERROR("failed to register offload at pkgs: %d\n", result); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - if (result) { - DEBUG_ERROR("failed to register skip bridge on ingress: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - goto exit2; - } - - result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); - if (result) { - DEBUG_ERROR("failed to register exceptions file: %d\n", result); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - goto exit2; - } - - sc->dev_notifier.notifier_call = fast_classifier_device_event; - sc->dev_notifier.priority = 1; - register_netdevice_notifier(&sc->dev_notifier); - - sc->inet_notifier.notifier_call = fast_classifier_inet_event; - sc->inet_notifier.priority = 1; - register_inetaddr_notifier(&sc->inet_notifier); - - sc->inet6_notifier.notifier_call = fast_classifier_inet6_event; - sc->inet6_notifier.priority = 1; - register_inet6addr_notifier(&sc->inet6_notifier); - - /* - * Register our netfilter hooks. - */ - result = nf_register_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - if (result < 0) { - DEBUG_ERROR("can't register nf post routing hook: %d\n", result); - goto exit3; - } - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - /* - * Register a notifier hook to get fast notifications of expired connections. - */ - result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier); - if (result < 0) { - DEBUG_ERROR("can't register nf notifier hook: %d\n", result); - goto exit4; - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) - result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family, - fast_classifier_gnl_ops, - fast_classifier_genl_mcgrp); - if (result) { - DEBUG_ERROR("failed to register genl ops: %d\n", result); - goto exit5; - } -#else - result = genl_register_family(&fast_classifier_gnl_family); - if (result) { - printk(KERN_CRIT "unable to register genl family\n"); - goto exit5; - } - - result = genl_register_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); - if (result) { - printk(KERN_CRIT "unable to register ops\n"); - goto exit6; - } - - result = genl_register_mc_group(&fast_classifier_gnl_family, - fast_classifier_genl_mcgrp); - if (result) { - printk(KERN_CRIT "unable to register multicast group\n"); - goto exit6; - } -#endif - - printk(KERN_ALERT "fast-classifier: registered\n"); - - spin_lock_init(&sc->lock); - - /* - * Hook the receive path in the network stack. - */ - BUG_ON(athrs_fast_nat_recv); - RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv); - - /* - * Hook the shortcut sync callback. - */ - sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule); - sfe_ipv6_register_sync_rule_callback(fast_classifier_sync_rule); - return 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) -exit6: - genl_unregister_family(&fast_classifier_gnl_family); -#endif - -exit5: -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); - -exit4: -#endif - nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - -exit3: - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); - sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); - -exit2: - kobject_put(sc->sys_fast_classifier); - -exit1: - return result; -} - -/* - * fast_classifier_exit() - */ -static void __exit fast_classifier_exit(void) -{ - struct fast_classifier *sc = &__sc; - int result = -1; - - DEBUG_INFO("SFE CM exit\n"); - printk(KERN_ALERT "fast-classifier: shutting down\n"); - - /* - * Unregister our sync callback. - */ - sfe_ipv4_register_sync_rule_callback(NULL); - sfe_ipv6_register_sync_rule_callback(NULL); - - /* - * Unregister our receive callback. - */ - RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); - - /* - * Wait for all callbacks to complete. - */ - rcu_barrier(); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - sfe_ipv6_destroy_all_rules_for_dev(NULL); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) - result = genl_unregister_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); - if (result != 0) { - printk(KERN_CRIT "Unable to unreigster genl_ops\n"); - } -#endif - - result = genl_unregister_family(&fast_classifier_gnl_family); - if (result != 0) { - printk(KERN_CRIT "Unable to unreigster genl_family\n"); - } - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); - -#endif - nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); - - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - - kobject_put(sc->sys_fast_classifier); -} - -module_init(fast_classifier_init) -module_exit(fast_classifier_exit) - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/fast-classifier/fast-classifier.h b/shortcut-fe/fast-classifier/fast-classifier.h deleted file mode 100644 index 6b7a18cf6..000000000 --- a/shortcut-fe/fast-classifier/fast-classifier.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * User space header to send message to the fast classifier - * - * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include - -#define FAST_CLASSIFIER_GENL_VERSION (1) -#define FAST_CLASSIFIER_GENL_NAME "FC" -#define FAST_CLASSIFIER_GENL_MCGRP "FC_MCGRP" -#define FAST_CLASSIFIER_GENL_HDRSIZE (0) - -enum { - FAST_CLASSIFIER_A_UNSPEC, - FAST_CLASSIFIER_A_TUPLE, - __FAST_CLASSIFIER_A_MAX, -}; - -#define FAST_CLASSIFIER_A_MAX (__FAST_CLASSIFIER_A_MAX - 1) - -enum { - FAST_CLASSIFIER_C_UNSPEC, - FAST_CLASSIFIER_C_OFFLOAD, - FAST_CLASSIFIER_C_OFFLOADED, - FAST_CLASSIFIER_C_DONE, - __FAST_CLASSIFIER_C_MAX, -}; - -#define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1) - -struct fast_classifier_tuple { - unsigned short ethertype; - unsigned char proto; - union { - struct in_addr in; - struct in6_addr in6; - } src_saddr; - union { - struct in_addr in; - struct in6_addr in6; - } dst_saddr; - unsigned short sport; - unsigned short dport; - unsigned char smac[ETH_ALEN]; - unsigned char dmac[ETH_ALEN]; -}; diff --git a/shortcut-fe/fast-classifier/nl_classifier_test.c b/shortcut-fe/fast-classifier/nl_classifier_test.c deleted file mode 100644 index 639417964..000000000 --- a/shortcut-fe/fast-classifier/nl_classifier_test.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#define NL_CLASSIFIER_GENL_VERSION 1 -#define NL_CLASSIFIER_GENL_FAMILY "FC" -#define NL_CLASSIFIER_GENL_GROUP "FC_MCGRP" -#define NL_CLASSIFIER_GENL_HDRSIZE 0 - -enum NL_CLASSIFIER_CMD { - NL_CLASSIFIER_CMD_UNSPEC, - NL_CLASSIFIER_CMD_ACCEL, - NL_CLASSIFIER_CMD_ACCEL_OK, - NL_CLASSIFIER_CMD_CONNECTION_CLOSED, - NL_CLASSIFIER_CMD_MAX, -}; - -enum NL_CLASSIFIER_ATTR { - NL_CLASSIFIER_ATTR_UNSPEC, - NL_CLASSIFIER_ATTR_TUPLE, - NL_CLASSIFIER_ATTR_MAX, -}; - -union nl_classifier_tuple_ip { - struct in_addr in; - struct in6_addr in6; -}; - -struct nl_classifier_tuple { - unsigned short af; - unsigned char proto; - union nl_classifier_tuple_ip src_ip; - union nl_classifier_tuple_ip dst_ip; - unsigned short sport; - unsigned short dport; - unsigned char smac[6]; - unsigned char dmac[6]; -}; - -struct nl_classifier_instance { - struct nl_sock *sock; - int family_id; - int group_id; - int stop; -}; - -struct nl_classifier_instance nl_cls_inst; - -static struct nla_policy nl_classifier_genl_policy[(NL_CLASSIFIER_ATTR_MAX+1)] = { - [NL_CLASSIFIER_ATTR_TUPLE] = { .type = NLA_UNSPEC }, -}; - -void nl_classifier_dump_nl_tuple(struct nl_classifier_tuple *tuple) -{ - char ip_str[64]; - - printf("protocol = %s\n", (tuple->proto == IPPROTO_UDP) ? "udp" : ((tuple->proto == IPPROTO_TCP) ? "tcp" : "unknown")); - printf("source ip = %s\n", inet_ntop(tuple->af, &tuple->src_ip, ip_str, sizeof(ip_str))); - printf("destination ip = %s\n", inet_ntop(tuple->af, &tuple->dst_ip, ip_str, sizeof(ip_str))); - printf("source port = %d\n", ntohs(tuple->sport)); - printf("destination port = %d\n", ntohs(tuple->dport)); -} - -int nl_classifier_msg_recv(struct nl_msg *msg, void *arg) -{ - struct nlmsghdr *nlh = nlmsg_hdr(msg); - struct genlmsghdr *gnlh = nlmsg_data(nlh); - struct nlattr *attrs[(NL_CLASSIFIER_ATTR_MAX+1)]; - - genlmsg_parse(nlh, NL_CLASSIFIER_GENL_HDRSIZE, attrs, NL_CLASSIFIER_ATTR_MAX, nl_classifier_genl_policy); - - switch (gnlh->cmd) { - case NL_CLASSIFIER_CMD_ACCEL_OK: - printf("Acceleration successful:\n"); - nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); - return NL_OK; - case NL_CLASSIFIER_CMD_CONNECTION_CLOSED: - printf("Connection is closed:\n"); - nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); - return NL_OK; - default: - printf("nl classifier received unknow message %d\n", gnlh->cmd); - } - - return NL_SKIP; -} - -void nl_classifier_offload(struct nl_classifier_instance *inst, - unsigned char proto, unsigned long *src_saddr, - unsigned long *dst_saddr, unsigned short sport, - unsigned short dport, int af) -{ - struct nl_msg *msg; - int ret; - struct nl_classifier_tuple classifier_msg; - - memset(&classifier_msg, 0, sizeof(classifier_msg)); - classifier_msg.af = af; - classifier_msg.proto = proto; - memcpy(&classifier_msg.src_ip, src_saddr, (af == AF_INET ? 4 : 16)); - memcpy(&classifier_msg.dst_ip, dst_saddr, (af == AF_INET ? 4 : 16)); - classifier_msg.sport = sport; - classifier_msg.dport = dport; - - msg = nlmsg_alloc(); - if (!msg) { - printf("Unable to allocate message\n"); - return; - } - - genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, inst->family_id, - NL_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, - NL_CLASSIFIER_CMD_ACCEL, NL_CLASSIFIER_GENL_VERSION); - nla_put(msg, NL_CLASSIFIER_ATTR_TUPLE, sizeof(classifier_msg), &classifier_msg); - - ret = nl_send_auto(inst->sock, msg); - if (ret < 0) { - printf("send netlink message failed.\n"); - nlmsg_free(msg); - return; - } - - nlmsg_free(msg); - printf("nl classifier offload connection successful\n"); -} - -int nl_classifier_init(struct nl_classifier_instance *inst) -{ - int ret; - - inst->sock = nl_socket_alloc(); - if (!inst->sock) { - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(inst->sock); - - inst->family_id = genl_ctrl_resolve(inst->sock, NL_CLASSIFIER_GENL_FAMILY); - if (inst->family_id < 0) { - printf("Unable to resolve family %s\n", NL_CLASSIFIER_GENL_FAMILY); - goto init_failed; - } - - inst->group_id = genl_ctrl_resolve_grp(inst->sock, NL_CLASSIFIER_GENL_FAMILY, NL_CLASSIFIER_GENL_GROUP); - if (inst->group_id < 0) { - printf("Unable to resolve mcast group %s\n", NL_CLASSIFIER_GENL_GROUP); - goto init_failed; - } - - ret = nl_socket_add_membership(inst->sock, inst->group_id); - if (ret < 0) { - printf("Unable to add membership\n"); - goto init_failed; - } - - nl_socket_disable_seq_check(inst->sock); - nl_socket_modify_cb(inst->sock, NL_CB_VALID, NL_CB_CUSTOM, nl_classifier_msg_recv, NULL); - - printf("nl classifier init successful\n"); - return 0; - -init_failed: - if (inst->sock) { - nl_close(inst->sock); - nl_socket_free(inst->sock); - inst->sock = NULL; - } - return -1; -} - -void nl_classifier_exit(struct nl_classifier_instance *inst) -{ - if (inst->sock) { - nl_close(inst->sock); - nl_socket_free(inst->sock); - inst->sock = NULL; - } - printf("nl classifier exit successful\n"); -} - -int nl_classifier_parse_arg(int argc, char *argv[], unsigned char *proto, unsigned long *src_saddr, - unsigned long *dst_saddr, unsigned short *sport, unsigned short *dport, int *af) -{ - int ret; - unsigned short port; - - if (argc < 7) { - printf("help: nl_classifier \n"); - return -1; - } - - if (0 == strncmp(argv[1], "v4", 2)) { - *af = AF_INET; - } else if (0 == strncmp(argv[1], "v6", 2)) { - *af = AF_INET6; - } else { - printf("Address family is not supported"); - return -1; - } - - if (0 == strncmp(argv[2], "udp", 3)) { - *proto = IPPROTO_UDP; - } else if (0 == strncmp(argv[2], "tcp", 3)) { - *proto = IPPROTO_TCP; - } else { - printf("Protocol is not supported"); - return -1; - } - - ret = inet_pton(*af, argv[3], src_saddr); - if (ret <= 0) { - printf("source ip has wrong format\n"); - return -1; - } - - ret = inet_pton(*af, argv[4], dst_saddr); - if (ret <= 0) { - printf("destination ip has wrong format\n"); - return -1; - } - - port = strtol(argv[5], NULL, 0); - *sport = htons(port); - port = strtol(argv[6], NULL, 0); - *dport = htons(port); - - printf("nl classifier parse arguments successful\n"); - return 0; -} - -int main(int argc, char *argv[]) -{ - struct nl_classifier_instance *inst = &nl_cls_inst; - unsigned char proto; - unsigned long src_addr[4]; - unsigned long dst_addr[4]; - unsigned short sport; - unsigned short dport; - int af; - int ret; - - ret = nl_classifier_parse_arg(argc, argv, &proto, src_addr, dst_addr, &sport, &dport, &af); - if (ret < 0) { - printf("Failed to parse arguments\n"); - return ret; - } - - ret = nl_classifier_init(inst); - if (ret < 0) { - printf("Unable to init generic netlink\n"); - return ret; - } - - nl_classifier_offload(inst, proto, src_addr, dst_addr, sport, dport, af); - - /* main loop to listen on message */ - while (!inst->stop) { - nl_recvmsgs_default(inst->sock); - } - - nl_classifier_exit(inst); - - return 0; -} diff --git a/shortcut-fe/fast-classifier/userspace_example.c b/shortcut-fe/fast-classifier/userspace_example.c deleted file mode 100644 index 4f4113d99..000000000 --- a/shortcut-fe/fast-classifier/userspace_example.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include - -static struct nl_sock *sock; -static struct nl_sock *sock_event; -static int family; -static int grp_id; - -static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { - [FAST_CLASSIFIER_A_TUPLE] = { .type = NLA_UNSPEC }, -}; - -void dump_fc_tuple(struct fast_classifier_tuple *fc_msg) -{ - char src_str[INET_ADDRSTRLEN]; - char dst_str[INET_ADDRSTRLEN]; - - printf("TUPLE: %d, %s, %s, %d, %d" - " SMAC=%02x:%02x:%02x:%02x:%02x:%02x", - " DMAC=%02x:%02x:%02x:%02x:%02x:%02x\n", - fc_msg->proto, - inet_ntop(AF_INET, - &fc_msg->src_saddr.in.s_addr, - src_str, - INET_ADDRSTRLEN), - inet_ntop(AF_INET, - &fc_msg->dst_saddr.in.s_addr, - dst_str, - INET_ADDRSTRLEN), - fc_msg->sport, fc_msg->dport, - fc_msg->smac[0], fc_msg->smac[1], fc_msg->smac[2], - fc_msg->smac[3], fc_msg->smac[4], fc_msg->smac[5], - fc_msg->dmac[0], fc_msg->dmac[1], fc_msg->dmac[2], - fc_msg->dmac[3], fc_msg->dmac[4], fc_msg->dmac[5]); -} - -static int parse_cb(struct nl_msg *msg, void *arg) -{ - struct nlmsghdr *nlh = nlmsg_hdr(msg); - struct genlmsghdr *gnlh = nlmsg_data(nlh); - struct nlattr *attrs[FAST_CLASSIFIER_A_MAX]; - - genlmsg_parse(nlh, 0, attrs, FAST_CLASSIFIER_A_MAX, fast_classifier_genl_policy); - - switch (gnlh->cmd) { - case FAST_CLASSIFIER_C_OFFLOADED: - printf("Got a offloaded message\n"); - dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); - return NL_OK; - case FAST_CLASSIFIER_C_DONE: - printf("Got a done message\n"); - dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); - return NL_OK; - } - - return NL_SKIP; -} - -int fast_classifier_init(void) -{ - int err; - - sock = nl_socket_alloc(); - if (!sock) { - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(sock); - - sock_event = nl_socket_alloc(); - if (!sock_event) { - nl_close(sock); - nl_socket_free(sock); - printf("Unable to allocation socket.\n"); - return -1; - } - genl_connect(sock_event); - - family = genl_ctrl_resolve(sock, FAST_CLASSIFIER_GENL_NAME); - if (family < 0) { - nl_close(sock_event); - nl_close(sock); - nl_socket_free(sock); - nl_socket_free(sock_event); - printf("Unable to resolve family\n"); - return -1; - } - - grp_id = genl_ctrl_resolve_grp(sock, FAST_CLASSIFIER_GENL_NAME, - FAST_CLASSIFIER_GENL_MCGRP); - if (grp_id < 0) { - printf("Unable to resolve mcast group\n"); - return -1; - } - - err = nl_socket_add_membership(sock_event, grp_id); - if (err < 0) { - printf("Unable to add membership\n"); - return -1; - } - - nl_socket_disable_seq_check(sock_event); - nl_socket_modify_cb(sock_event, NL_CB_VALID, NL_CB_CUSTOM, parse_cb, NULL); - - return 0; -} - -void fast_classifier_close(void) -{ - nl_close(sock_event); - nl_close(sock); - nl_socket_free(sock_event); - nl_socket_free(sock); -} - -void fast_classifier_ipv4_offload(unsigned char proto, unsigned long src_saddr, - unsigned long dst_saddr, unsigned short sport, - unsigned short dport) -{ - struct nl_msg *msg; - int ret; -#ifdef DEBUG - char src_str[INET_ADDRSTRLEN]; - char dst_str[INET_ADDRSTRLEN]; -#endif - struct fast_classifier_tuple fc_msg; - -#ifdef DEBUG - printf("DEBUG: would offload: %d, %s, %s, %d, %d\n", proto, - inet_ntop(AF_INET, &src_saddr, src_str, INET_ADDRSTRLEN), - inet_ntop(AF_INET, &dst_saddr, dst_str, INET_ADDRSTRLEN), - sport, dport); -#endif - - fc_msg.proto = proto; - fc_msg.src_saddr.in.s_addr = src_saddr; - fc_msg.dst_saddr.in.s_addr = dst_saddr; - fc_msg.sport = sport; - fc_msg.dport = dport; - fc_msg.smac[0] = 'a'; - fc_msg.smac[1] = 'b'; - fc_msg.smac[2] = 'c'; - fc_msg.smac[3] = 'd'; - fc_msg.smac[4] = 'e'; - fc_msg.smac[5] = 'f'; - fc_msg.dmac[0] = 'f'; - fc_msg.dmac[1] = 'e'; - fc_msg.dmac[2] = 'd'; - fc_msg.dmac[3] = 'c'; - fc_msg.dmac[4] = 'b'; - fc_msg.dmac[5] = 'a'; - - if (fast_classifier_init() < 0) { - printf("Unable to init generic netlink\n"); - exit(1); - } - - msg = nlmsg_alloc(); - if (!msg) { - nl_socket_free(sock); - printf("Unable to allocate message\n"); - return; - } - - genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, family, - FAST_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, - FAST_CLASSIFIER_C_OFFLOAD, FAST_CLASSIFIER_GENL_VERSION); - nla_put(msg, 1, sizeof(fc_msg), &fc_msg); - - ret = nl_send_auto_complete(sock, msg); - - nlmsg_free(msg); - if (ret < 0) { - printf("nlmsg_free failed"); - nl_close(sock); - nl_socket_free(sock); - return; - } - - ret = nl_wait_for_ack(sock); - if (ret < 0) { - printf("wait for ack failed"); - nl_close(sock); - nl_socket_free(sock); - return; - } -} - -void fast_classifier_listen_for_messages(void) -{ - printf("waiting for netlink events\n"); - - while (1) { - nl_recvmsgs_default(sock_event); - } -} - -int main(int argc, char *argv[]) -{ - if (fast_classifier_init() < 0) { - printf("Unable to init generic netlink\n"); - exit(1); - } - - fast_classifier_ipv4_offload('a', 0, 0, 0, 0); - - /* this never returns */ - fast_classifier_listen_for_messages(); - - fast_classifier_close(); - - return 0; -} diff --git a/shortcut-fe/patches/Kconfig.patch b/shortcut-fe/patches/Kconfig.patch deleted file mode 100644 index 7df0abe83..000000000 --- a/shortcut-fe/patches/Kconfig.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/net/Kconfig b/net/Kconfig -index 976cb63..4a7b0af 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -236,6 +236,7 @@ source "net/dcb/Kconfig" - source "net/dns_resolver/Kconfig" - source "net/batman-adv/Kconfig" - source "net/openvswitch/Kconfig" -+source "net/shortcut-fe/Kconfig" - - config RPS - boolean "RPS" diff --git a/shortcut-fe/patches/Makefile.patch b/shortcut-fe/patches/Makefile.patch deleted file mode 100644 index 1ccc2b0ec..000000000 --- a/shortcut-fe/patches/Makefile.patch +++ /dev/null @@ -1,10 +0,0 @@ -diff --git a/net/Makefile b/net/Makefile -index 6865dab..a8f0091 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -71,3 +71,5 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ - obj-$(CONFIG_NFC) += nfc/ - obj-$(CONFIG_OPENVSWITCH) += openvswitch/ - obj-$(CONFIG_NET_ACTIVITY_STATS) += activity_stats.o -+obj-$(CONFIG_SHORTCUT_FE) += shortcut-fe/ -+ diff --git a/shortcut-fe/patches/dev.c.patch b/shortcut-fe/patches/dev.c.patch deleted file mode 100644 index 55fc03a74..000000000 --- a/shortcut-fe/patches/dev.c.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff --git a/net/core/dev.c b/net/core/dev.c -index d23742f..1f0415f 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -3168,6 +3168,9 @@ void netdev_rx_handler_unregister(struct net_device *dev) - } - EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); - -+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly; -+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv); -+ - static int __netif_receive_skb(struct sk_buff *skb) - { - struct packet_type *ptype, *pt_prev; -@@ -3177,6 +3180,7 @@ static int __netif_receive_skb(struct sk_buff *skb) - bool deliver_exact = false; - int ret = NET_RX_DROP; - __be16 type; -+ int (*fast_recv)(struct sk_buff *skb); - - net_timestamp_check(!netdev_tstamp_prequeue, skb); - -@@ -3194,10 +3198,18 @@ static int __netif_receive_skb(struct sk_buff *skb) - skb_reset_transport_header(skb); - skb_reset_mac_len(skb); - -- pt_prev = NULL; -- - rcu_read_lock(); - -+ fast_recv = rcu_dereference(athrs_fast_nat_recv); -+ if (fast_recv) { -+ if (fast_recv(skb)) { -+ rcu_read_unlock(); -+ return NET_RX_SUCCESS; -+ } -+ } -+ -+ pt_prev = NULL; -+ - another_round: - - __this_cpu_inc(softnet_data.processed); diff --git a/shortcut-fe/patches/nf_conntrack_proto_tcp.c.patch b/shortcut-fe/patches/nf_conntrack_proto_tcp.c.patch deleted file mode 100644 index 2cd2313a2..000000000 --- a/shortcut-fe/patches/nf_conntrack_proto_tcp.c.patch +++ /dev/null @@ -1,25 +0,0 @@ ---- /home/dhudson/mips-orig/nf_conntrack_proto_tcp.c 2013-05-07 21:32:57.153896922 +0100 -+++ nf_conntrack_proto_tcp.c 2013-06-13 16:37:40.137102438 +0100 -@@ -27,18 +27,20 @@ - #include - #include - #include - #include - - /* Do not check the TCP window for incoming packets */ --static int nf_ct_tcp_no_window_check __read_mostly = 1; -+int nf_ct_tcp_no_window_check __read_mostly = 0; -+EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check); - - /* "Be conservative in what you do, - be liberal in what you accept from others." - If it's non-zero, we mark only out of window RST segments as INVALID. */ --static int nf_ct_tcp_be_liberal __read_mostly = 0; -+int nf_ct_tcp_be_liberal __read_mostly = 0; -+EXPORT_SYMBOL_GPL(nf_ct_tcp_be_liberal); - - /* If it is set to zero, we disable picking up already established - connections. */ - static int nf_ct_tcp_loose __read_mostly = 1; - - /* Max number of the retransmitted packets without receiving an (acceptable) diff --git a/shortcut-fe/sfe.h b/shortcut-fe/sfe.h deleted file mode 100644 index 279e7b3dc..000000000 --- a/shortcut-fe/sfe.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * sfe.h - * Shortcut forwarding engine. - * - * Copyright (c) 2013-2017 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - - -/* - * The following are debug macros used throughout the SFE. - * - * The DEBUG_LEVEL enables the followings based on its value, - * when dynamic debug option is disabled. - * - * 0 = OFF - * 1 = ASSERTS / ERRORS - * 2 = 1 + WARN - * 3 = 2 + INFO - * 4 = 3 + TRACE - */ -#define DEBUG_LEVEL 2 - -#if (DEBUG_LEVEL < 1) -#define DEBUG_ASSERT(s, ...) -#define DEBUG_ERROR(s, ...) -#else -#define DEBUG_ASSERT(c, s, ...) if (!(c)) { pr_emerg("ASSERT: %s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__); BUG(); } -#define DEBUG_ERROR(s, ...) pr_err("%s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if defined(CONFIG_DYNAMIC_DEBUG) -/* - * Compile messages for dynamic enable/disable - */ -#define DEBUG_WARN(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#define DEBUG_INFO(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#define DEBUG_TRACE(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#else - -/* - * Statically compile messages at different levels - */ -#if (DEBUG_LEVEL < 2) -#define DEBUG_WARN(s, ...) -#else -#define DEBUG_WARN(s, ...) pr_warn("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if (DEBUG_LEVEL < 3) -#define DEBUG_INFO(s, ...) -#else -#define DEBUG_INFO(s, ...) pr_notice("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if (DEBUG_LEVEL < 4) -#define DEBUG_TRACE(s, ...) -#else -#define DEBUG_TRACE(s, ...) pr_info("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif -#endif - -#ifdef CONFIG_NF_FLOW_COOKIE -typedef int (*flow_cookie_set_func_t)(u32 protocol, __be32 src_ip, __be16 src_port, - __be32 dst_ip, __be16 dst_port, u16 flow_cookie); -/* - * sfe_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb); - -/* - * sfe_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb); - -typedef int (*sfe_ipv6_flow_cookie_set_func_t)(u32 protocol, __be32 src_ip[4], __be16 src_port, - __be32 dst_ip[4], __be16 dst_port, u16 flow_cookie); - -/* - * sfe_ipv6_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); - -/* - * sfe_ipv6_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); - -#endif /*CONFIG_NF_FLOW_COOKIE*/ diff --git a/shortcut-fe/sfe_backport.h b/shortcut-fe/sfe_backport.h deleted file mode 100644 index d2d60c73c..000000000 --- a/shortcut-fe/sfe_backport.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * sfe_backport.h - * Shortcut forwarding engine compatible header file. - * - * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) -#include -#else -enum udp_conntrack { - UDP_CT_UNREPLIED, - UDP_CT_REPLIED, - UDP_CT_MAX -}; - -static inline unsigned int * -nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) -{ -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - struct nf_conn_timeout *timeout_ext; - unsigned int *timeouts; - - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); - - return timeouts; -#else - return l4proto->get_timeouts(net); -#endif /*CONFIG_NF_CONNTRACK_TIMEOUT*/ -} -#endif /*KERNEL_VERSION(3, 7, 0)*/ -#endif /*KERNEL_VERSION(3, 4, 0)*/ - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(void *priv, \ - struct sk_buff *SKB, \ - const struct nf_hook_state *state) -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(const struct nf_hook_ops *OPS, \ - struct sk_buff *SKB, \ - const struct net_device *UNUSED, \ - const struct net_device *OUT, \ - int (*OKFN)(struct sk_buff *)) -#else -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(unsigned int HOOKNUM, \ - struct sk_buff *SKB, \ - const struct net_device *UNUSED, \ - const struct net_device *OUT, \ - int (*OKFN)(struct sk_buff *)) -#endif - -#define sfe_cm_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__sfe_cm_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define sfe_cm_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__sfe_cm_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define fast_classifier_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__fast_classifier_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define fast_classifier_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__fast_classifier_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .pf = NFPROTO_IPV4, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#else -#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .owner = THIS_MODULE, \ - .pf = NFPROTO_IPV4, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .pf = NFPROTO_IPV6, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#else -#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .owner = THIS_MODULE, \ - .pf = NFPROTO_IPV6, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP6_PRI_NAT_SRC + 1, \ - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)) -#define SFE_NF_CT_DEFAULT_ZONE (&nf_ct_zone_dflt) -#else -#define SFE_NF_CT_DEFAULT_ZONE NF_CT_DEFAULT_ZONE -#endif - -/* - * sfe_dev_get_master - * get master of bridge port, and hold it - */ -static inline struct net_device *sfe_dev_get_master(struct net_device *dev) -{ - struct net_device *master; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) - rcu_read_lock(); - master = netdev_master_upper_dev_get_rcu(dev); - if (master) - dev_hold(master); - - rcu_read_unlock(); -#else - master = dev->master; - if (master) - dev_hold(master); -#endif - return master; -} - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) -#define SFE_DEV_EVENT_PTR(PTR) netdev_notifier_info_to_dev(PTR) -#else -#define SFE_DEV_EVENT_PTR(PTR) (struct net_device *)(PTR) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define SFE_NF_CONN_ACCT(NM) struct nf_conn_acct *NM -#else -#define SFE_NF_CONN_ACCT(NM) struct nf_conn_counter *NM -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define SFE_ACCT_COUNTER(NM) ((NM)->counter) -#else -#define SFE_ACCT_COUNTER(NM) (NM) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) -#define sfe_hash_for_each_possible(name, obj, node, member, key) \ - hash_for_each_possible(name, obj, member, key) -#else -#define sfe_hash_for_each_possible(name, obj, node, member, key) \ - hash_for_each_possible(name, obj, node, member, key) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) -#define sfe_hash_for_each(name, bkt, node, obj, member) \ - hash_for_each(name, bkt, obj, member) -#else -#define sfe_hash_for_each(name, bkt, node, obj, member) \ - hash_for_each(name, bkt, node, obj, member) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) -#define sfe_dst_get_neighbour(dst, daddr) dst_neigh_lookup(dst, addr) -#else -static inline struct neighbour * -sfe_dst_get_neighbour(struct dst_entry *dst, void *daddr) -{ - struct neighbour *neigh = dst_get_neighbour_noref(dst); - - if (neigh) - neigh_hold(neigh); - - return neigh; -} -#endif diff --git a/shortcut-fe/sfe_cm.c b/shortcut-fe/sfe_cm.c deleted file mode 100644 index bd1bb88aa..000000000 --- a/shortcut-fe/sfe_cm.c +++ /dev/null @@ -1,1154 +0,0 @@ -/* - * sfe-cm.c - * Shortcut forwarding engine connection manager. - * - * Copyright (c) 2013-2018, 2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" -#include "sfe_backport.h" - -typedef enum sfe_cm_exception { - SFE_CM_EXCEPTION_PACKET_BROADCAST, - SFE_CM_EXCEPTION_PACKET_MULTICAST, - SFE_CM_EXCEPTION_NO_IIF, - SFE_CM_EXCEPTION_NO_CT, - SFE_CM_EXCEPTION_CT_NO_TRACK, - SFE_CM_EXCEPTION_CT_NO_CONFIRM, - SFE_CM_EXCEPTION_CT_IS_ALG, - SFE_CM_EXCEPTION_IS_IPV4_MCAST, - SFE_CM_EXCEPTION_IS_IPV6_MCAST, - SFE_CM_EXCEPTION_TCP_NOT_ASSURED, - SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED, - SFE_CM_EXCEPTION_UNKNOW_PROTOCOL, - SFE_CM_EXCEPTION_NO_SRC_DEV, - SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV, - SFE_CM_EXCEPTION_NO_DEST_DEV, - SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV, - SFE_CM_EXCEPTION_NO_BRIDGE, - SFE_CM_EXCEPTION_LOCAL_OUT, - SFE_CM_EXCEPTION_MAX -} sfe_cm_exception_t; - -static char *sfe_cm_exception_events_string[SFE_CM_EXCEPTION_MAX] = { - "PACKET_BROADCAST", - "PACKET_MULTICAST", - "NO_IIF", - "NO_CT", - "CT_NO_TRACK", - "CT_NO_CONFIRM", - "CT_IS_ALG", - "IS_IPV4_MCAST", - "IS_IPV6_MCAST", - "TCP_NOT_ASSURED", - "TCP_NOT_ESTABLISHED", - "UNKNOW_PROTOCOL", - "NO_SRC_DEV", - "NO_SRC_XLATE_DEV", - "NO_DEST_DEV", - "NO_DEST_XLATE_DEV", - "NO_BRIDGE", - "LOCAL_OUT" -}; - -/* - * Per-module structure. - */ -struct sfe_cm { - spinlock_t lock; /* Lock for SMP correctness */ - - /* - * Control state. - */ - struct kobject *sys_sfe_cm; /* sysfs linkage */ - - /* - * Callback notifiers. - */ - struct notifier_block dev_notifier; /* Device notifier */ - struct notifier_block inet_notifier; /* IPv4 notifier */ - struct notifier_block inet6_notifier; /* IPv6 notifier */ - u32 exceptions[SFE_CM_EXCEPTION_MAX]; -}; - -static struct sfe_cm __sc; - -/* - * sfe_cm_incr_exceptions() - * increase an exception counter. - */ -static inline void sfe_cm_incr_exceptions(sfe_cm_exception_t except) -{ - struct sfe_cm *sc = &__sc; - - spin_lock_bh(&sc->lock); - sc->exceptions[except]++; - spin_unlock_bh(&sc->lock); -} - -/* - * sfe_cm_recv() - * Handle packet receives. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_cm_recv(struct sk_buff *skb) -{ - struct net_device *dev; - - /* - * We know that for the vast majority of packets we need the transport - * layer header so we may as well start to fetch it now! - */ - prefetch(skb->data + 32); - barrier(); - - dev = skb->dev; - - /* - * We're only interested in IPv4 and IPv6 packets. - */ - if (likely(htons(ETH_P_IP) == skb->protocol)) { - struct in_device *in_dev; - - /* - * Does our input device support IP processing? - */ - in_dev = (struct in_device *)dev->ip_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IP processing for device: %s\n", dev->name); - return 0; - } - - /* - * Does it have an IP address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(!in_dev->ifa_list)) { - DEBUG_TRACE("no IP address for device: %s\n", dev->name); - return 0; - } - - return sfe_ipv4_recv(dev, skb); - } - - if (likely(htons(ETH_P_IPV6) == skb->protocol)) { - struct inet6_dev *in_dev; - - /* - * Does our input device support IPv6 processing? - */ - in_dev = (struct inet6_dev *)dev->ip6_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); - return 0; - } - - /* - * Does it have an IPv6 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(list_empty(&in_dev->addr_list))) { - DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); - return 0; - } - - return sfe_ipv6_recv(dev, skb); - } - - DEBUG_TRACE("not IP packet\n"); - return 0; -} - -/* - * sfe_cm_find_dev_and_mac_addr() - * Find the device and MAC address for a given IPv4/IPv6 address. - * - * Returns true if we find the device and MAC address, otherwise false. - * - * We look up the rtable entry for the address and, from its neighbour - * structure, obtain the hardware address. This means this function also - * works if the neighbours are routers too. - */ -static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, int is_v4) -{ - struct neighbour *neigh; - struct rtable *rt; - struct rt6_info *rt6; - struct dst_entry *dst; - struct net_device *mac_dev; - - /* - * Look up the rtable entry for the IP address then get the hardware - * address from its neighbour structure. This means this work when the - * neighbours are routers too. - */ - if (likely(is_v4)) { - rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); - if (unlikely(IS_ERR(rt))) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt; - } else { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)) - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); -#else - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0); -#endif - if (!rt6) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt6; - } - - rcu_read_lock(); - neigh = sfe_dst_get_neighbour(dst, addr); - if (unlikely(!neigh)) { - rcu_read_unlock(); - dst_release(dst); - goto ret_fail; - } - - if (unlikely(!(neigh->nud_state & NUD_VALID))) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - mac_dev = neigh->dev; - if (!mac_dev) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); - - dev_hold(mac_dev); - *dev = mac_dev; - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - - return true; - -ret_fail: - if (is_v4) { - DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", &addr->ip); - - } else { - DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr->ip6); - } - - return false; -} - -/* - * sfe_cm_post_routing() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4) -{ - struct sfe_connection_create sic; - struct net_device *in; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct net_device *dev; - struct net_device *src_dev; - struct net_device *dest_dev; - struct net_device *src_dev_tmp; - struct net_device *dest_dev_tmp; - struct net_device *src_br_dev = NULL; - struct net_device *dest_br_dev = NULL; - struct nf_conntrack_tuple orig_tuple; - struct nf_conntrack_tuple reply_tuple; - SFE_NF_CONN_ACCT(acct); - - /* - * Don't process broadcast or multicast packets. - */ - if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_BROADCAST); - DEBUG_TRACE("broadcast, ignoring\n"); - return NF_ACCEPT; - } - if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_MULTICAST); - DEBUG_TRACE("multicast, ignoring\n"); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - /* - * Packet to xfrm for encapsulation, we can't process it - */ - if (unlikely(skb_dst(skb)->xfrm)) { - DEBUG_TRACE("packet to xfrm, ignoring\n"); - return NF_ACCEPT; - } -#endif - - /* - * Don't process locally generated packets. - */ - if (skb->sk) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_LOCAL_OUT); - DEBUG_TRACE("skip local out packet\n"); - return NF_ACCEPT; - } - - /* - * Don't process packets that are not being forwarded. - */ - in = dev_get_by_index(&init_net, skb->skb_iif); - if (!in) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_IIF); - DEBUG_TRACE("packet not forwarding\n"); - return NF_ACCEPT; - } - - dev_put(in); - - /* - * Don't process packets that aren't being tracked by conntrack. - */ - ct = nf_ct_get(skb, &ctinfo); - if (unlikely(!ct)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_CT); - DEBUG_TRACE("no conntrack connection, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process untracked connections. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) - if (unlikely(nf_ct_is_untracked(ct))) { -#else - if (unlikely(ctinfo == IP_CT_UNTRACKED)) { -#endif - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_TRACK); - DEBUG_TRACE("untracked connection\n"); - return NF_ACCEPT; - } - - /* - * Unconfirmed connection may be dropped by Linux at the final step, - * So we don't process unconfirmed connections. - */ - if (!nf_ct_is_confirmed(ct)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_CONFIRM); - DEBUG_TRACE("unconfirmed connection\n"); - return NF_ACCEPT; - } - - /* - * Don't process connections that require support from a 'helper' (typically a NAT ALG). - */ - if (unlikely(nfct_help(ct))) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_IS_ALG); - DEBUG_TRACE("connection has helper\n"); - return NF_ACCEPT; - } - - /* - * Check if the acceleration of a flow could be rejected quickly. - */ - acct = nf_conn_acct_find(ct); - if (acct) { - long long packets = atomic64_read(&SFE_ACCT_COUNTER(acct)[CTINFO2DIR(ctinfo)].packets); - if ((packets > 0xff) && (packets & 0xff)) { - /* - * Connection hits slow path at least 256 times, so it must be not able to accelerate. - * But we also give it a chance to walk through ECM every 256 packets - */ - return NF_ACCEPT; - } - } - - /* - * Look up the details of our connection in conntrack. - * - * Note that the data we get from conntrack is for the "ORIGINAL" direction - * but our packet may actually be in the "REPLY" direction. - */ - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - sic.protocol = (s32)orig_tuple.dst.protonum; - - sic.flags = 0; - - /* - * Get addressing information, non-NAT first - */ - if (likely(is_v4)) { - u32 dscp; - - sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV4_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; - sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; - - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } else { - u32 dscp; - - sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || - ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV6_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); - sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); - - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } - - switch (sic.protocol) { - case IPPROTO_TCP: - sic.src_port = orig_tuple.src.u.tcp.port; - sic.dest_port = orig_tuple.dst.u.tcp.port; - sic.src_port_xlate = reply_tuple.dst.u.tcp.port; - sic.dest_port_xlate = reply_tuple.src.u.tcp.port; - sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale; - sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; - sic.src_td_end = ct->proto.tcp.seen[0].td_end; - sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend; - sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; - sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; - sic.dest_td_end = ct->proto.tcp.seen[1].td_end; - sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; - - if (nf_ct_tcp_no_window_check - || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) - || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { - sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - - /* - * Don't try to manage a non-established connection. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ASSURED); - DEBUG_TRACE("non-established connection\n"); - return NF_ACCEPT; - } - - /* - * If the connection is shutting down do not manage it. - * state can not be SYN_SENT, SYN_RECV because connection is assured - * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. - */ - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { - spin_unlock_bh(&ct->lock); - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED); - DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", - ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port), - &sic.dest_ip, ntohs(sic.dest_port)); - return NF_ACCEPT; - } - spin_unlock_bh(&ct->lock); - break; - - case IPPROTO_UDP: - sic.src_port = orig_tuple.src.u.udp.port; - sic.dest_port = orig_tuple.dst.u.udp.port; - sic.src_port_xlate = reply_tuple.dst.u.udp.port; - sic.dest_port_xlate = reply_tuple.src.u.udp.port; - break; - - default: - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - sic.original_accel = 1; - sic.reply_accel = 1; - - /* - * For packets de-capsulated from xfrm, we still can accelerate it - * on the direction we just received the packet. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) - if (unlikely(skb->sp)) { -#else - if (unlikely(secpath_exists(skb))) { -#endif - if (sic.protocol == IPPROTO_TCP && - !(sic.flags & SFE_CREATE_FLAG_NO_SEQ_CHECK)) { - return NF_ACCEPT; - } - - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { - sic.reply_accel = 0; - } else { - sic.original_accel = 0; - } - } -#endif - - /* - * Get QoS information - */ - if (skb->priority) { - sic.dest_priority = skb->priority; - sic.src_priority = sic.dest_priority; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - /* - * Get the net device and MAC addresses that correspond to the various source and - * destination host addresses. - */ - if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_DEV); - return NF_ACCEPT; - } - src_dev = src_dev_tmp; - - if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV); - goto done1; - } - dev_put(dev); - - if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_DEV); - goto done1; - } - dev_put(dev); - - if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV); - goto done1; - } - dest_dev = dest_dev_tmp; - - /* - * Our devices may actually be part of a bridge interface. If that's - * the case then find the bridge interface instead. - */ - if (src_dev->priv_flags & IFF_BRIDGE_PORT) { - src_br_dev = sfe_dev_get_master(src_dev); - if (!src_br_dev) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); - goto done2; - } - src_dev = src_br_dev; - } - - if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { - dest_br_dev = sfe_dev_get_master(dest_dev); - if (!dest_br_dev) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); - goto done3; - } - dest_dev = dest_br_dev; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = src_dev->mtu; - sic.dest_mtu = dest_dev->mtu; - - if (likely(is_v4)) { - sfe_ipv4_create_rule(&sic); - } else { - sfe_ipv6_create_rule(&sic); - } - - /* - * If we had bridge ports then release them too. - */ - if (dest_br_dev) { - dev_put(dest_br_dev); - } -done3: - if (src_br_dev) { - dev_put(src_br_dev); - } -done2: - dev_put(dest_dev_tmp); -done1: - dev_put(src_dev_tmp); - - return NF_ACCEPT; -} - -/* - * sfe_cm_ipv4_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -sfe_cm_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return sfe_cm_post_routing(skb, true); -} - -/* - * sfe_cm_ipv6_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -sfe_cm_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return sfe_cm_post_routing(skb, false); -} - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * sfe_cm_conntrack_event() - * Callback event invoked when a conntrack connection's state changes. - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static int sfe_cm_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) -#else -static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item) -#endif -{ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - struct nf_ct_event *item = ptr; -#endif - struct sfe_connection_destroy sid; - struct nf_conn *ct = item->ct; - struct nf_conntrack_tuple orig_tuple; - - /* - * If we don't have a conntrack entry then we're done. - */ - if (unlikely(!ct)) { - DEBUG_WARN("no ct in conntrack event callback\n"); - return NOTIFY_DONE; - } - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) - if (unlikely(nf_ct_is_untracked(ct))) { - DEBUG_TRACE("ignoring untracked conn\n"); - return NOTIFY_DONE; - } -#endif - - /* - * We're only interested in destroy events. - */ - if (unlikely(!(events & (1 << IPCT_DESTROY)))) { - DEBUG_TRACE("ignoring non-destroy event\n"); - return NOTIFY_DONE; - } - - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - sid.protocol = (s32)orig_tuple.dst.protonum; - - /* - * Extract information from the conntrack connection. We're only interested - * in nominal connection information (i.e. we're ignoring any NAT information). - */ - switch (sid.protocol) { - case IPPROTO_TCP: - sid.src_port = orig_tuple.src.u.tcp.port; - sid.dest_port = orig_tuple.dst.u.tcp.port; - break; - - case IPPROTO_UDP: - sid.src_port = orig_tuple.src.u.udp.port; - sid.dest_port = orig_tuple.dst.u.udp.port; - break; - - default: - DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); - return NOTIFY_DONE; - } - - if (likely(nf_ct_l3num(ct) == AF_INET)) { - sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - sfe_ipv4_destroy_rule(&sid); - } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { - sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - sfe_ipv6_destroy_rule(&sid); - } else { - DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); - } - - return NOTIFY_DONE; -} - -/* - * Netfilter conntrack event system to monitor connection tracking changes - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static struct notifier_block sfe_cm_conntrack_notifier = { - .notifier_call = sfe_cm_conntrack_event, -}; -#else -static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = { - .fcn = sfe_cm_conntrack_event, -}; -#endif -#endif - -/* - * Structure to establish a hook into the post routing netfilter point - this - * will pick up local outbound and packets going from one interface to another. - * - * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. - * We want to examine packets after NAT translation and any ALG processing. - */ -static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = { - SFE_IPV4_NF_POST_ROUTING_HOOK(__sfe_cm_ipv4_post_routing_hook), -#ifdef SFE_SUPPORT_IPV6 - SFE_IPV6_NF_POST_ROUTING_HOOK(__sfe_cm_ipv6_post_routing_hook), -#endif -}; - -/* - * sfe_cm_sync_rule() - * Synchronize a connection's state. - */ -static void sfe_cm_sync_rule(struct sfe_connection_sync *sis) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - SFE_NF_CONN_ACCT(acct); - - /* - * Create a tuple so as to be able to look up a connection - */ - memset(&tuple, 0, sizeof(tuple)); - tuple.src.u.all = (__be16)sis->src_port; - tuple.dst.dir = IP_CT_DIR_ORIGINAL; - tuple.dst.protonum = (u8)sis->protocol; - tuple.dst.u.all = (__be16)sis->dest_port; - - if (sis->is_v6) { - tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); - tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); - tuple.src.l3num = AF_INET6; - - DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); - } else { - tuple.src.u3.ip = sis->src_ip.ip; - tuple.dst.u3.ip = sis->dest_ip.ip; - tuple.src.l3num = AF_INET; - - DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); - } - - /* - * Look up conntrack connection - */ - h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); - if (unlikely(!h)) { - DEBUG_TRACE("no connection found\n"); - return; - } - - ct = nf_ct_tuplehash_to_ctrack(h); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); -#endif - /* - * Only update if this is not a fixed timeout - */ - if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { - spin_lock_bh(&ct->lock); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) - ct->timeout.expires += sis->delta_jiffies; -#else - ct->timeout += sis->delta_jiffies; -#endif - spin_unlock_bh(&ct->lock); - } - - acct = nf_conn_acct_find(ct); - if (acct) { - spin_lock_bh(&ct->lock); - atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); - atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); - atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); - spin_unlock_bh(&ct->lock); - } - - switch (sis->protocol) { - case IPPROTO_TCP: - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { - ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { - ct->proto.tcp.seen[0].td_end = sis->src_td_end; - } - if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { - ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; - } - if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { - ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { - ct->proto.tcp.seen[1].td_end = sis->dest_td_end; - } - if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { - ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; - } - spin_unlock_bh(&ct->lock); - break; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) - case IPPROTO_UDP: - /* - * In Linux connection track, UDP flow has two timeout values: - * /proc/sys/net/netfilter/nf_conntrack_udp_timeout: - * this is for uni-direction UDP flow, normally its value is 60 seconds - * /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream: - * this is for bi-direction UDP flow, normally its value is 180 seconds - * - * Linux will update timer of UDP flow to stream timeout once it seen packets - * in reply direction. But if flow is accelerated by NSS or SFE, Linux won't - * see any packets. So we have to do the same thing in our stats sync message. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status) && acct) { - u_int64_t reply_pkts = atomic64_read(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - - if (reply_pkts != 0) { - unsigned int *timeouts; - struct nf_conntrack_l4proto *l4proto __maybe_unused; - set_bit(IPS_SEEN_REPLY_BIT, &ct->status); - set_bit(IPS_ASSURED_BIT, &ct->status); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)) - l4proto = __nf_ct_l4proto_find((sis->is_v6 ? AF_INET6 : AF_INET), IPPROTO_UDP); - timeouts = nf_ct_timeout_lookup(&init_net, ct, l4proto); - spin_lock_bh(&ct->lock); - ct->timeout.expires = jiffies + timeouts[UDP_CT_REPLIED]; - spin_unlock_bh(&ct->lock); -#else - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) { - timeouts = udp_get_timeouts(nf_ct_net(ct)); - } - - spin_lock_bh(&ct->lock); - ct->timeout = jiffies + timeouts[UDP_CT_REPLIED]; - spin_unlock_bh(&ct->lock); -#endif - } - } - break; -#endif /*KERNEL_VERSION(3, 4, 0)*/ - } - - /* - * Release connection - */ - nf_ct_put(ct); -} - -/* - * sfe_cm_device_event() - */ -int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_inet_event() - */ -static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_inet6_event() - */ -static int sfe_cm_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_get_exceptions - * dump exception counters - */ -static ssize_t sfe_cm_get_exceptions(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int idx, len; - struct sfe_cm *sc = &__sc; - - spin_lock_bh(&sc->lock); - for (len = 0, idx = 0; idx < SFE_CM_EXCEPTION_MAX; idx++) { - if (sc->exceptions[idx]) { - len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", sfe_cm_exception_events_string[idx], sc->exceptions[idx]); - } - } - spin_unlock_bh(&sc->lock); - - return len; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_cm_exceptions_attr = - __ATTR(exceptions, S_IRUGO, sfe_cm_get_exceptions, NULL); - -/* - * sfe_cm_init() - */ -static int __init sfe_cm_init(void) -{ - struct sfe_cm *sc = &__sc; - int result = -1; - - DEBUG_INFO("SFE CM init\n"); - - /* - * Create sys/sfe_cm - */ - sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL); - if (!sc->sys_sfe_cm) { - DEBUG_ERROR("failed to register sfe_cm\n"); - goto exit1; - } - - /* - * Create sys/sfe_cm/exceptions - */ - result = sysfs_create_file(sc->sys_sfe_cm, &sfe_cm_exceptions_attr.attr); - if (result) { - DEBUG_ERROR("failed to register exceptions file: %d\n", result); - goto exit2; - } - - sc->dev_notifier.notifier_call = sfe_cm_device_event; - sc->dev_notifier.priority = 1; - register_netdevice_notifier(&sc->dev_notifier); - - sc->inet_notifier.notifier_call = sfe_cm_inet_event; - sc->inet_notifier.priority = 1; - register_inetaddr_notifier(&sc->inet_notifier); - - sc->inet6_notifier.notifier_call = sfe_cm_inet6_event; - sc->inet6_notifier.priority = 1; - register_inet6addr_notifier(&sc->inet6_notifier); - /* - * Register our netfilter hooks. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - result = nf_register_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - result = nf_register_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - if (result < 0) { - DEBUG_ERROR("can't register nf post routing hook: %d\n", result); - goto exit3; - } - - /* - * Register a notifier hook to get fast notifications of expired connections. - * Note: In CONFIG_NF_CONNTRACK_CHAIN_EVENTS enabled case, nf_conntrack_register_notifier() - * function always returns 0. - */ -#ifdef CONFIG_NF_CONNTRACK_EVENTS -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - (void)nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier); -#else - result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier); - if (result < 0) { - DEBUG_ERROR("can't register nf notifier hook: %d\n", result); - goto exit4; - } -#endif -#endif - - spin_lock_init(&sc->lock); - - /* - * Hook the receive path in the network stack. - */ - BUG_ON(athrs_fast_nat_recv); - RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv); - - /* - * Hook the shortcut sync callback. - */ - sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule); - sfe_ipv6_register_sync_rule_callback(sfe_cm_sync_rule); - return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -exit4: -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - -#endif -#endif -exit3: - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); -exit2: - kobject_put(sc->sys_sfe_cm); - -exit1: - return result; -} - -/* - * sfe_cm_exit() - */ -static void __exit sfe_cm_exit(void) -{ - struct sfe_cm *sc = &__sc; - - DEBUG_INFO("SFE CM exit\n"); - - /* - * Unregister our sync callback. - */ - sfe_ipv4_register_sync_rule_callback(NULL); - sfe_ipv6_register_sync_rule_callback(NULL); - - /* - * Unregister our receive callback. - */ - RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); - - /* - * Wait for all callbacks to complete. - */ - rcu_barrier(); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - sfe_ipv6_destroy_all_rules_for_dev(NULL); - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier); - -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - - kobject_put(sc->sys_sfe_cm); -} - -module_init(sfe_cm_init) -module_exit(sfe_cm_exit) - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/sfe_cm.h b/shortcut-fe/sfe_cm.h deleted file mode 100644 index 23cbde859..000000000 --- a/shortcut-fe/sfe_cm.h +++ /dev/null @@ -1,259 +0,0 @@ -/* - * sfe_cm.h - * Shortcut forwarding engine. - * - * Copyright (c) 2013-2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * connection flags. - */ -#define SFE_CREATE_FLAG_NO_SEQ_CHECK BIT(0) - /* Indicates that we should not check sequence numbers */ -#define SFE_CREATE_FLAG_REMARK_PRIORITY BIT(1) - /* Indicates that we should remark priority of skb */ -#define SFE_CREATE_FLAG_REMARK_DSCP BIT(2) - /* Indicates that we should remark DSCP of packet */ - -/* - * IPv6 address structure - */ -struct sfe_ipv6_addr { - __be32 addr[4]; -}; - -typedef union { - __be32 ip; - struct sfe_ipv6_addr ip6[1]; -} sfe_ip_addr_t; - -/* - * connection creation structure. - */ -struct sfe_connection_create { - int protocol; - struct net_device *src_dev; - struct net_device *dest_dev; - u32 flags; - u32 src_mtu; - u32 dest_mtu; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t src_ip_xlate; - sfe_ip_addr_t dest_ip; - sfe_ip_addr_t dest_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u8 src_mac[ETH_ALEN]; - u8 src_mac_xlate[ETH_ALEN]; - u8 dest_mac[ETH_ALEN]; - u8 dest_mac_xlate[ETH_ALEN]; - u8 src_td_window_scale; - u32 src_td_max_window; - u32 src_td_end; - u32 src_td_max_end; - u8 dest_td_window_scale; - u32 dest_td_max_window; - u32 dest_td_end; - u32 dest_td_max_end; - u32 mark; -#ifdef CONFIG_XFRM - u32 original_accel; - u32 reply_accel; -#endif - u32 src_priority; - u32 dest_priority; - u32 src_dscp; - u32 dest_dscp; -}; - -/* - * connection destruction structure. - */ -struct sfe_connection_destroy { - int protocol; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t dest_ip; - __be16 src_port; - __be16 dest_port; -}; - -typedef enum sfe_sync_reason { - SFE_SYNC_REASON_STATS, /* Sync is to synchronize stats */ - SFE_SYNC_REASON_FLUSH, /* Sync is to flush a entry */ - SFE_SYNC_REASON_DESTROY /* Sync is to destroy a entry(requested by connection manager) */ -} sfe_sync_reason_t; - -/* - * Structure used to sync connection stats/state back within the system. - * - * NOTE: The addresses here are NON-NAT addresses, i.e. the true endpoint addressing. - * 'src' is the creator of the connection. - */ -struct sfe_connection_sync { - struct net_device *src_dev; - struct net_device *dest_dev; - int is_v6; /* Is it for ipv6? */ - int protocol; /* IP protocol number (IPPROTO_...) */ - sfe_ip_addr_t src_ip; /* Non-NAT source address, i.e. the creator of the connection */ - sfe_ip_addr_t src_ip_xlate; /* NATed source address */ - __be16 src_port; /* Non-NAT source port */ - __be16 src_port_xlate; /* NATed source port */ - sfe_ip_addr_t dest_ip; /* Non-NAT destination address, i.e. to whom the connection was created */ - sfe_ip_addr_t dest_ip_xlate; /* NATed destination address */ - __be16 dest_port; /* Non-NAT destination port */ - __be16 dest_port_xlate; /* NATed destination port */ - u32 src_td_max_window; - u32 src_td_end; - u32 src_td_max_end; - u64 src_packet_count; - u64 src_byte_count; - u32 src_new_packet_count; - u32 src_new_byte_count; - u32 dest_td_max_window; - u32 dest_td_end; - u32 dest_td_max_end; - u64 dest_packet_count; - u64 dest_byte_count; - u32 dest_new_packet_count; - u32 dest_new_byte_count; - u32 reason; /* reason for stats sync message, i.e. destroy, flush, period sync */ - u64 delta_jiffies; /* Time to be added to the current timeout to keep the connection alive */ -}; - -/* - * connection mark structure - */ -struct sfe_connection_mark { - int protocol; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t dest_ip; - __be16 src_port; - __be16 dest_port; - u32 mark; -}; - -/* - * Expose the hook for the receive processing. - */ -extern int (*athrs_fast_nat_recv)(struct sk_buff *skb); - -/* - * Expose what should be a static flag in the TCP connection tracker. - */ -extern int nf_ct_tcp_no_window_check; - -/* - * This callback will be called in a timer - * at 100 times per second to sync stats back to - * Linux connection track. - * - * A RCU lock is taken to prevent this callback - * from unregistering. - */ -typedef void (*sfe_sync_rule_callback_t)(struct sfe_connection_sync *); - -/* - * IPv4 APIs used by connection manager - */ -int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb); -int sfe_ipv4_create_rule(struct sfe_connection_create *sic); -void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid); -void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev); -void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t callback); -void sfe_ipv4_update_rule(struct sfe_connection_create *sic); -void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark); - -#ifdef SFE_SUPPORT_IPV6 -/* - * IPv6 APIs used by connection manager - */ -int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb); -int sfe_ipv6_create_rule(struct sfe_connection_create *sic); -void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid); -void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev); -void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback); -void sfe_ipv6_update_rule(struct sfe_connection_create *sic); -void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark); -#else -static inline int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) -{ - return 0; -} - -static inline int sfe_ipv6_create_rule(struct sfe_connection_create *sic) -{ - return 0; -} - -static inline void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) -{ - return; -} - -static inline void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) -{ - return; -} - -static inline void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback) -{ - return; -} - -static inline void sfe_ipv6_update_rule(struct sfe_connection_create *sic) -{ - return; -} - -static inline void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) -{ - return; -} -#endif - -/* - * sfe_ipv6_addr_equal() - * compare ipv6 address - * - * return: 1, equal; 0, no equal - */ -static inline int sfe_ipv6_addr_equal(struct sfe_ipv6_addr *a, - struct sfe_ipv6_addr *b) -{ - return a->addr[0] == b->addr[0] && - a->addr[1] == b->addr[1] && - a->addr[2] == b->addr[2] && - a->addr[3] == b->addr[3]; -} - -/* - * sfe_ipv4_addr_equal() - * compare ipv4 address - * - * return: 1, equal; 0, no equal - */ -#define sfe_ipv4_addr_equal(a, b) ((u32)(a) == (u32)(b)) - -/* - * sfe_addr_equal() - * compare ipv4 or ipv6 address - * - * return: 1, equal; 0, no equal - */ -static inline int sfe_addr_equal(sfe_ip_addr_t *a, - sfe_ip_addr_t *b, int is_v4) -{ - return is_v4 ? sfe_ipv4_addr_equal(a->ip, b->ip) : sfe_ipv6_addr_equal(a->ip6, b->ip6); -} diff --git a/shortcut-fe/sfe_ipv4.c b/shortcut-fe/sfe_ipv4.c deleted file mode 100644 index 9f7ebd1c9..000000000 --- a/shortcut-fe/sfe_ipv4.c +++ /dev/null @@ -1,3610 +0,0 @@ -/* - * sfe_ipv4.c - * Shortcut forwarding engine - IPv4 edition. - * - * Copyright (c) 2013-2016, 2019-2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" - -/* - * By default Linux IP header and transport layer header structures are - * unpacked, assuming that such headers should be 32-bit aligned. - * Unfortunately some wireless adaptors can't cope with this requirement and - * some CPUs can't handle misaligned accesses. For those platforms we - * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed. - * When we do this the compiler will generate slightly worse code than for the - * aligned case (on most platforms) but will be much quicker than fixing - * things up in an unaligned trap handler. - */ -#define SFE_IPV4_UNALIGNED_IP_HEADER 1 -#if SFE_IPV4_UNALIGNED_IP_HEADER -#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed)) -#else -#define SFE_IPV4_UNALIGNED_STRUCT -#endif - -/* - * An Ethernet header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_eth_hdr { - __be16 h_dest[ETH_ALEN / 2]; - __be16 h_source[ETH_ALEN / 2]; - __be16 h_proto; -} SFE_IPV4_UNALIGNED_STRUCT; - -#define SFE_IPV4_DSCP_MASK 0x3 -#define SFE_IPV4_DSCP_SHIFT 2 - -/* - * An IPv4 header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_ip_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 ihl:4, - version:4; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u8 version:4, - ihl:4; -#else -#error "Please fix " -#endif - __u8 tos; - __be16 tot_len; - __be16 id; - __be16 frag_off; - __u8 ttl; - __u8 protocol; - __sum16 check; - __be32 saddr; - __be32 daddr; - - /* - * The options start here. - */ -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * A UDP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_udp_hdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * A TCP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_tcp_hdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * Specifies the lower bound on ACK numbers carried in the TCP header - */ -#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520 - -/* - * IPv4 TCP connection match additional data. - */ -struct sfe_ipv4_tcp_connection_match { - u8 win_scale; /* Window scale */ - u32 max_win; /* Maximum window size seen */ - u32 end; /* Sequence number of the next byte to send (seq + segment length) */ - u32 max_end; /* Sequence number of the last byte to ack */ -}; - -/* - * Bit flags for IPv4 connection matching entry. - */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) - /* Perform source translation */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) - /* Perform destination translation */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) - /* Ignore TCP sequence numbers */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) - /* Fast Ethernet header write */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) - /* Fast Ethernet header write */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) - /* remark priority of SKB */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) - /* remark DSCP of packet */ - -/* - * IPv4 connection matching structure. - */ -struct sfe_ipv4_connection_match { - /* - * References to other objects. - */ - struct sfe_ipv4_connection_match *next; - struct sfe_ipv4_connection_match *prev; - struct sfe_ipv4_connection *connection; - struct sfe_ipv4_connection_match *counter_match; - /* Matches the flow in the opposite direction as the one in *connection */ - struct sfe_ipv4_connection_match *active_next; - struct sfe_ipv4_connection_match *active_prev; - bool active; /* Flag to indicate if we're on the active list */ - - /* - * Characteristics that identify flows that match this rule. - */ - struct net_device *match_dev; /* Network device */ - u8 match_protocol; /* Protocol */ - __be32 match_src_ip; /* Source IP address */ - __be32 match_dest_ip; /* Destination IP address */ - __be16 match_src_port; /* Source port/connection ident */ - __be16 match_dest_port; /* Destination port/connection ident */ - - /* - * Control the operations of the match. - */ - u32 flags; /* Bit flags */ -#ifdef CONFIG_NF_FLOW_COOKIE - u32 flow_cookie; /* used flow cookie, for debug */ -#endif -#ifdef CONFIG_XFRM - u32 flow_accel; /* The flow accelerated or not */ -#endif - - /* - * Connection state that we track once we match. - */ - union { /* Protocol-specific state */ - struct sfe_ipv4_tcp_connection_match tcp; - } protocol_state; - /* - * Stats recorded in a sync period. These stats will be added to - * rx_packet_count64/rx_byte_count64 after a sync period. - */ - u32 rx_packet_count; - u32 rx_byte_count; - - /* - * Packet translation information. - */ - __be32 xlate_src_ip; /* Address after source translation */ - __be16 xlate_src_port; /* Port/connection ident after source translation */ - u16 xlate_src_csum_adjustment; - /* Transport layer checksum adjustment after source translation */ - u16 xlate_src_partial_csum_adjustment; - /* Transport layer pseudo header checksum adjustment after source translation */ - - __be32 xlate_dest_ip; /* Address after destination translation */ - __be16 xlate_dest_port; /* Port/connection ident after destination translation */ - u16 xlate_dest_csum_adjustment; - /* Transport layer checksum adjustment after destination translation */ - u16 xlate_dest_partial_csum_adjustment; - /* Transport layer pseudo header checksum adjustment after destination translation */ - - /* - * QoS information - */ - u32 priority; - u32 dscp; - - /* - * Packet transmit information. - */ - struct net_device *xmit_dev; /* Network device on which to transmit */ - unsigned short int xmit_dev_mtu; - /* Interface MTU */ - u16 xmit_dest_mac[ETH_ALEN / 2]; - /* Destination MAC address to use when forwarding */ - u16 xmit_src_mac[ETH_ALEN / 2]; - /* Source MAC address to use when forwarding */ - - /* - * Summary stats. - */ - u64 rx_packet_count64; - u64 rx_byte_count64; -}; - -/* - * Per-connection data structure. - */ -struct sfe_ipv4_connection { - struct sfe_ipv4_connection *next; - /* Pointer to the next entry in a hash chain */ - struct sfe_ipv4_connection *prev; - /* Pointer to the previous entry in a hash chain */ - int protocol; /* IP protocol number */ - __be32 src_ip; /* Src IP addr pre-translation */ - __be32 src_ip_xlate; /* Src IP addr post-translation */ - __be32 dest_ip; /* Dest IP addr pre-translation */ - __be32 dest_ip_xlate; /* Dest IP addr post-translation */ - __be16 src_port; /* Src port pre-translation */ - __be16 src_port_xlate; /* Src port post-translation */ - __be16 dest_port; /* Dest port pre-translation */ - __be16 dest_port_xlate; /* Dest port post-translation */ - struct sfe_ipv4_connection_match *original_match; - /* Original direction matching structure */ - struct net_device *original_dev; - /* Original direction source device */ - struct sfe_ipv4_connection_match *reply_match; - /* Reply direction matching structure */ - struct net_device *reply_dev; /* Reply direction source device */ - u64 last_sync_jiffies; /* Jiffies count for the last sync */ - struct sfe_ipv4_connection *all_connections_next; - /* Pointer to the next entry in the list of all connections */ - struct sfe_ipv4_connection *all_connections_prev; - /* Pointer to the previous entry in the list of all connections */ - u32 mark; /* mark for outgoing packet */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * IPv4 connections and hash table size information. - */ -#define SFE_IPV4_CONNECTION_HASH_SHIFT 12 -#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT) -#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1) - -#ifdef CONFIG_NF_FLOW_COOKIE -#define SFE_FLOW_COOKIE_SIZE 2048 -#define SFE_FLOW_COOKIE_MASK 0x7ff - -struct sfe_flow_cookie_entry { - struct sfe_ipv4_connection_match *match; - unsigned long last_clean_time; -}; -#endif - -enum sfe_ipv4_exception_events { - SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL, - SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, - SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL, - SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, - SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, - SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK, - SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, - SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL, - SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, - SFE_IPV4_EXCEPTION_EVENT_NON_V4, - SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, - SFE_IPV4_EXCEPTION_EVENT_LAST -}; - -static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = { - "UDP_HEADER_INCOMPLETE", - "UDP_NO_CONNECTION", - "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "UDP_SMALL_TTL", - "UDP_NEEDS_FRAGMENTATION", - "TCP_HEADER_INCOMPLETE", - "TCP_NO_CONNECTION_SLOW_FLAGS", - "TCP_NO_CONNECTION_FAST_FLAGS", - "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "TCP_SMALL_TTL", - "TCP_NEEDS_FRAGMENTATION", - "TCP_FLAGS", - "TCP_SEQ_EXCEEDS_RIGHT_EDGE", - "TCP_SMALL_DATA_OFFS", - "TCP_BAD_SACK", - "TCP_BIG_DATA_OFFS", - "TCP_SEQ_BEFORE_LEFT_EDGE", - "TCP_ACK_EXCEEDS_RIGHT_EDGE", - "TCP_ACK_BEFORE_LEFT_EDGE", - "ICMP_HEADER_INCOMPLETE", - "ICMP_UNHANDLED_TYPE", - "ICMP_IPV4_HEADER_INCOMPLETE", - "ICMP_IPV4_NON_V4", - "ICMP_IPV4_IP_OPTIONS_INCOMPLETE", - "ICMP_IPV4_UDP_HEADER_INCOMPLETE", - "ICMP_IPV4_TCP_HEADER_INCOMPLETE", - "ICMP_IPV4_UNHANDLED_PROTOCOL", - "ICMP_NO_CONNECTION", - "ICMP_FLUSHED_CONNECTION", - "HEADER_INCOMPLETE", - "BAD_TOTAL_LENGTH", - "NON_V4", - "NON_INITIAL_FRAGMENT", - "DATAGRAM_INCOMPLETE", - "IP_OPTIONS_INCOMPLETE", - "UNHANDLED_PROTOCOL" -}; - -/* - * Per-module structure. - */ -struct sfe_ipv4 { - spinlock_t lock; /* Lock for SMP correctness */ - struct sfe_ipv4_connection_match *active_head; - /* Head of the list of recently active connections */ - struct sfe_ipv4_connection_match *active_tail; - /* Tail of the list of recently active connections */ - struct sfe_ipv4_connection *all_connections_head; - /* Head of the list of all connections */ - struct sfe_ipv4_connection *all_connections_tail; - /* Tail of the list of all connections */ - unsigned int num_connections; /* Number of connections */ - struct timer_list timer; /* Timer used for periodic sync ops */ - sfe_sync_rule_callback_t __rcu sync_rule_callback; - /* Callback function registered by a connection manager for stats syncing */ - struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; - /* Connection hash table */ - struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; - /* Connection match hash table */ -#ifdef CONFIG_NF_FLOW_COOKIE - struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; - /* flow cookie table*/ - flow_cookie_set_func_t flow_cookie_set_func; - /* function used to configure flow cookie in hardware*/ - int flow_cookie_enable; - /* Enable/disable flow cookie at runtime */ -#endif - - /* - * Stats recorded in a sync period. These stats will be added to - * connection_xxx64 after a sync period. - */ - u32 connection_create_requests; - /* Number of IPv4 connection create requests */ - u32 connection_create_collisions; - /* Number of IPv4 connection create requests that collided with existing hash table entries */ - u32 connection_destroy_requests; - /* Number of IPv4 connection destroy requests */ - u32 connection_destroy_misses; - /* Number of IPv4 connection destroy requests that missed our hash table */ - u32 connection_match_hash_hits; - /* Number of IPv4 connection match hash hits */ - u32 connection_match_hash_reorders; - /* Number of IPv4 connection match hash reorders */ - u32 connection_flushes; /* Number of IPv4 connection flushes */ - u32 packets_forwarded; /* Number of IPv4 packets forwarded */ - u32 packets_not_forwarded; /* Number of IPv4 packets not forwarded */ - u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST]; - - /* - * Summary statistics. - */ - u64 connection_create_requests64; - /* Number of IPv4 connection create requests */ - u64 connection_create_collisions64; - /* Number of IPv4 connection create requests that collided with existing hash table entries */ - u64 connection_destroy_requests64; - /* Number of IPv4 connection destroy requests */ - u64 connection_destroy_misses64; - /* Number of IPv4 connection destroy requests that missed our hash table */ - u64 connection_match_hash_hits64; - /* Number of IPv4 connection match hash hits */ - u64 connection_match_hash_reorders64; - /* Number of IPv4 connection match hash reorders */ - u64 connection_flushes64; /* Number of IPv4 connection flushes */ - u64 packets_forwarded64; /* Number of IPv4 packets forwarded */ - u64 packets_not_forwarded64; - /* Number of IPv4 packets not forwarded */ - u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST]; - - /* - * Control state. - */ - struct kobject *sys_sfe_ipv4; /* sysfs linkage */ - int debug_dev; /* Major number of the debug char device */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * Enumeration of the XML output. - */ -enum sfe_ipv4_debug_xml_states { - SFE_IPV4_DEBUG_XML_STATE_START, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END, - SFE_IPV4_DEBUG_XML_STATE_STATS, - SFE_IPV4_DEBUG_XML_STATE_END, - SFE_IPV4_DEBUG_XML_STATE_DONE -}; - -/* - * XML write state. - */ -struct sfe_ipv4_debug_xml_write_state { - enum sfe_ipv4_debug_xml_states state; - /* XML output file state machine state */ - int iter_exception; /* Next exception iterator */ -}; - -typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws); - -static struct sfe_ipv4 __si; - -/* - * sfe_ipv4_gen_ip_csum() - * Generate the IP checksum for an IPv4 header. - * - * Note that this function assumes that we have only 20 bytes of IP header. - */ -static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph) -{ - u32 sum; - u16 *i = (u16 *)iph; - - iph->check = 0; - - /* - * Generate the sum. - */ - sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9]; - - /* - * Fold it to ones-complement form. - */ - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - - return (u16)sum ^ 0xffff; -} - -/* - * sfe_ipv4_get_connection_match_hash() - * Generate the hash used in connection match lookups. - */ -static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - size_t dev_addr = (size_t)dev; - u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv4_find_sfe_ipv4_connection_match() - * Get the IPv4 flow match info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static struct sfe_ipv4_connection_match * -sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *head; - unsigned int conn_match_idx; - - conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); - cm = si->conn_match_hash[conn_match_idx]; - - /* - * If we don't have anything in this chain then bail. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((cm->match_src_port == src_port) - && (cm->match_dest_port == dest_port) - && (cm->match_src_ip == src_ip) - && (cm->match_dest_ip == dest_ip) - && (cm->match_protocol == protocol) - && (cm->match_dev == dev)) { - si->connection_match_hash_hits++; - return cm; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain and - * move matching entry to the top of the hash chain. We presume that this - * will be reused again very quickly. - */ - head = cm; - do { - cm = cm->next; - } while (cm && (cm->match_src_port != src_port - || cm->match_dest_port != dest_port - || cm->match_src_ip != src_ip - || cm->match_dest_ip != dest_ip - || cm->match_protocol != protocol - || cm->match_dev != dev)); - - /* - * Not found then we're done. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * We found a match so move it. - */ - if (cm->next) { - cm->next->prev = cm->prev; - } - cm->prev->next = cm->next; - cm->prev = NULL; - cm->next = head; - head->prev = cm; - si->conn_match_hash[conn_match_idx] = cm; - si->connection_match_hash_reorders++; - - return cm; -} - -/* - * sfe_ipv4_connection_match_update_summary_stats() - * Update the summary stats for a connection match entry. - */ -static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm) -{ - cm->rx_packet_count64 += cm->rx_packet_count; - cm->rx_packet_count = 0; - cm->rx_byte_count64 += cm->rx_byte_count; - cm->rx_byte_count = 0; -} - -/* - * sfe_ipv4_connection_match_compute_translations() - * Compute port and address translations for a connection match entry. - */ -static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm) -{ - /* - * Before we insert the entry look to see if this is tagged as doing address - * translations. If it is then work out the adjustment that we need to apply - * to the transport checksum. - */ - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - u16 src_ip_hi = cm->match_src_ip >> 16; - u16 src_ip_lo = cm->match_src_ip & 0xffff; - u32 xlate_src_ip = ~cm->xlate_src_ip; - u16 xlate_src_ip_hi = xlate_src_ip >> 16; - u16 xlate_src_ip_lo = xlate_src_ip & 0xffff; - u16 xlate_src_port = ~cm->xlate_src_port; - u32 adj; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - adj = src_ip_hi + src_ip_lo + cm->match_src_port - + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_csum_adjustment = (u16)adj; - - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - u16 dest_ip_hi = cm->match_dest_ip >> 16; - u16 dest_ip_lo = cm->match_dest_ip & 0xffff; - u32 xlate_dest_ip = ~cm->xlate_dest_ip; - u16 xlate_dest_ip_hi = xlate_dest_ip >> 16; - u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff; - u16 xlate_dest_port = ~cm->xlate_dest_port; - u32 adj; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port - + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { - u32 adj = ~cm->match_src_ip + cm->xlate_src_ip; - if (adj < cm->xlate_src_ip) { - adj++; - } - - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_partial_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { - u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip; - if (adj < cm->xlate_dest_ip) { - adj++; - } - - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_partial_csum_adjustment = (u16)adj; - } - -} - -/* - * sfe_ipv4_update_summary_stats() - * Update the summary stats. - */ -static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si) -{ - int i; - - si->connection_create_requests64 += si->connection_create_requests; - si->connection_create_requests = 0; - si->connection_create_collisions64 += si->connection_create_collisions; - si->connection_create_collisions = 0; - si->connection_destroy_requests64 += si->connection_destroy_requests; - si->connection_destroy_requests = 0; - si->connection_destroy_misses64 += si->connection_destroy_misses; - si->connection_destroy_misses = 0; - si->connection_match_hash_hits64 += si->connection_match_hash_hits; - si->connection_match_hash_hits = 0; - si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; - si->connection_match_hash_reorders = 0; - si->connection_flushes64 += si->connection_flushes; - si->connection_flushes = 0; - si->packets_forwarded64 += si->packets_forwarded; - si->packets_forwarded = 0; - si->packets_not_forwarded64 += si->packets_not_forwarded; - si->packets_not_forwarded = 0; - - for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) { - si->exception_events64[i] += si->exception_events[i]; - si->exception_events[i] = 0; - } -} - -/* - * sfe_ipv4_insert_sfe_ipv4_connection_match() - * Insert a connection match into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, - struct sfe_ipv4_connection_match *cm) -{ - struct sfe_ipv4_connection_match **hash_head; - struct sfe_ipv4_connection_match *prev_head; - unsigned int conn_match_idx - = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - - hash_head = &si->conn_match_hash[conn_match_idx]; - prev_head = *hash_head; - cm->prev = NULL; - if (prev_head) { - prev_head->prev = cm; - } - - cm->next = prev_head; - *hash_head = cm; - -#ifdef CONFIG_NF_FLOW_COOKIE - if (!si->flow_cookie_enable) - return; - - /* - * Configure hardware to put a flow cookie in packet of this flow, - * then we can accelerate the lookup process when we received this packet. - */ - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { - flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) { - entry->match = cm; - cm->flow_cookie = conn_match_idx; - } - } - rcu_read_unlock(); - - break; - } - } -#endif -} - -/* - * sfe_ipv4_remove_sfe_ipv4_connection_match() - * Remove a connection match object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) -{ -#ifdef CONFIG_NF_FLOW_COOKIE - if (si->flow_cookie_enable) { - /* - * Tell hardware that we no longer need a flow cookie in packet of this flow - */ - unsigned int conn_match_idx; - - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if (cm == entry->match) { - flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port, 0); - } - rcu_read_unlock(); - - cm->flow_cookie = 0; - entry->match = NULL; - entry->last_clean_time = jiffies; - break; - } - } - } -#endif - - /* - * Unlink the connection match entry from the hash. - */ - if (cm->prev) { - cm->prev->next = cm->next; - } else { - unsigned int conn_match_idx - = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - si->conn_match_hash[conn_match_idx] = cm->next; - } - - if (cm->next) { - cm->next->prev = cm->prev; - } - - /* - * If the connection match entry is in the active list remove it. - */ - if (cm->active) { - if (likely(cm->active_prev)) { - cm->active_prev->active_next = cm->active_next; - } else { - si->active_head = cm->active_next; - } - - if (likely(cm->active_next)) { - cm->active_next->active_prev = cm->active_prev; - } else { - si->active_tail = cm->active_prev; - } - } -} - -/* - * sfe_ipv4_get_connection_hash() - * Generate the hash used in connection lookups. - */ -static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv4_find_sfe_ipv4_connection() - * Get the IPv4 connection info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - struct sfe_ipv4_connection *c; - unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); - c = si->conn_hash[conn_idx]; - - /* - * If we don't have anything in this chain then bale. - */ - if (unlikely(!c)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((c->src_port == src_port) - && (c->dest_port == dest_port) - && (c->src_ip == src_ip) - && (c->dest_ip == dest_ip) - && (c->protocol == protocol)) { - return c; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain. - */ - do { - c = c->next; - } while (c && (c->src_port != src_port - || c->dest_port != dest_port - || c->src_ip != src_ip - || c->dest_ip != dest_ip - || c->protocol != protocol)); - - /* - * Will need connection entry for next create/destroy metadata, - * So no need to re-order entry for these requests - */ - return c; -} - -/* - * sfe_ipv4_mark_rule() - * Updates the mark for a current offloaded connection - * - * Will take hash lock upon entry - */ -void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - - spin_lock_bh(&si->lock); - c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol, - mark->src_ip.ip, mark->src_port, - mark->dest_ip.ip, mark->dest_port); - if (c) { - WARN_ON((0 != c->mark) && (0 == mark->mark)); - c->mark = mark->mark; - } - spin_unlock_bh(&si->lock); - - if (c) { - DEBUG_TRACE("Matching connection found for mark, " - "setting from %08x to %08x\n", - c->mark, mark->mark); - } -} - -/* - * sfe_ipv4_insert_sfe_ipv4_connection() - * Insert a connection into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) -{ - struct sfe_ipv4_connection **hash_head; - struct sfe_ipv4_connection *prev_head; - unsigned int conn_idx; - - /* - * Insert entry into the connection hash. - */ - conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - hash_head = &si->conn_hash[conn_idx]; - prev_head = *hash_head; - c->prev = NULL; - if (prev_head) { - prev_head->prev = c; - } - - c->next = prev_head; - *hash_head = c; - - /* - * Insert entry into the "all connections" list. - */ - if (si->all_connections_tail) { - c->all_connections_prev = si->all_connections_tail; - si->all_connections_tail->all_connections_next = c; - } else { - c->all_connections_prev = NULL; - si->all_connections_head = c; - } - - si->all_connections_tail = c; - c->all_connections_next = NULL; - si->num_connections++; - - /* - * Insert the connection match objects too. - */ - sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match); - sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match); -} - -/* - * sfe_ipv4_remove_sfe_ipv4_connection() - * Remove a sfe_ipv4_connection object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) -{ - /* - * Remove the connection match objects. - */ - sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match); - sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match); - - /* - * Unlink the connection. - */ - if (c->prev) { - c->prev->next = c->next; - } else { - unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - si->conn_hash[conn_idx] = c->next; - } - - if (c->next) { - c->next->prev = c->prev; - } - - /* - * Unlink connection from all_connections list - */ - if (c->all_connections_prev) { - c->all_connections_prev->all_connections_next = c->all_connections_next; - } else { - si->all_connections_head = c->all_connections_next; - } - - if (c->all_connections_next) { - c->all_connections_next->all_connections_prev = c->all_connections_prev; - } else { - si->all_connections_tail = c->all_connections_prev; - } - - si->num_connections--; -} - -/* - * sfe_ipv4_sync_sfe_ipv4_connection() - * Sync a connection. - * - * On entry to this function we expect that the lock for the connection is either - * already held or isn't required. - */ -static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, - struct sfe_connection_sync *sis, sfe_sync_reason_t reason, - u64 now_jiffies) -{ - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - - /* - * Fill in the update message. - */ - sis->is_v6 = 0; - sis->protocol = c->protocol; - sis->src_ip.ip = c->src_ip; - sis->src_ip_xlate.ip = c->src_ip_xlate; - sis->dest_ip.ip = c->dest_ip; - sis->dest_ip_xlate.ip = c->dest_ip_xlate; - sis->src_port = c->src_port; - sis->src_port_xlate = c->src_port_xlate; - sis->dest_port = c->dest_port; - sis->dest_port_xlate = c->dest_port_xlate; - - original_cm = c->original_match; - reply_cm = c->reply_match; - sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; - sis->src_td_end = original_cm->protocol_state.tcp.end; - sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; - sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; - sis->dest_td_end = reply_cm->protocol_state.tcp.end; - sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; - - sis->src_new_packet_count = original_cm->rx_packet_count; - sis->src_new_byte_count = original_cm->rx_byte_count; - sis->dest_new_packet_count = reply_cm->rx_packet_count; - sis->dest_new_byte_count = reply_cm->rx_byte_count; - - sfe_ipv4_connection_match_update_summary_stats(original_cm); - sfe_ipv4_connection_match_update_summary_stats(reply_cm); - - sis->src_dev = original_cm->match_dev; - sis->src_packet_count = original_cm->rx_packet_count64; - sis->src_byte_count = original_cm->rx_byte_count64; - - sis->dest_dev = reply_cm->match_dev; - sis->dest_packet_count = reply_cm->rx_packet_count64; - sis->dest_byte_count = reply_cm->rx_byte_count64; - - sis->reason = reason; - - /* - * Get the time increment since our last sync. - */ - sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; - c->last_sync_jiffies = now_jiffies; -} - -/* - * sfe_ipv4_flush_sfe_ipv4_connection() - * Flush a connection and free all associated resources. - * - * We need to be called with bottom halves disabled locally as we need to acquire - * the connection hash lock and release it again. In general we're actually called - * from within a BH and so we're fine, but we're also called when connections are - * torn down. - */ -static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, - struct sfe_ipv4_connection *c, - sfe_sync_reason_t reason) -{ - struct sfe_connection_sync sis; - u64 now_jiffies; - sfe_sync_rule_callback_t sync_rule_callback; - - rcu_read_lock(); - spin_lock_bh(&si->lock); - si->connection_flushes++; - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - spin_unlock_bh(&si->lock); - - if (sync_rule_callback) { - /* - * Generate a sync message and then sync. - */ - now_jiffies = get_jiffies_64(); - sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies); - sync_rule_callback(&sis); - } - - rcu_read_unlock(); - - /* - * Release our hold of the source and dest devices and free the memory - * for our connection objects. - */ - dev_put(c->original_dev); - dev_put(c->reply_dev); - kfree(c->original_match); - kfree(c->reply_match); - kfree(c); -} - -/* - * sfe_ipv4_recv_udp() - * Handle UDP packet receives and forwarding. - */ -static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv4_udp_hdr *udph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - u8 ttl; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for UDP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the UDP header cached though so allow more time for any prefetching. - */ - src_ip = iph->saddr; - dest_ip = iph->daddr; - - udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); - src_port = udph->source; - dest_port = udph->dest; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our TTL allow forwarding? - */ - ttl = iph->ttl; - if (unlikely(ttl < 2)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ttl too low\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely(len > cm->xmit_dev_mtu)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and udph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; - } - - /* - * Decrement our TTL. - */ - iph->ttl = ttl - 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 udp_csum; - - iph->saddr = cm->xlate_src_ip; - udph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum; - - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = udp_csum + cm->xlate_src_partial_csum_adjustment; - } else { - sum = udp_csum + cm->xlate_src_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 udp_csum; - - iph->daddr = cm->xlate_dest_ip; - udph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum; - - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = udp_csum + cm->xlate_dest_partial_csum_adjustment; - } else { - sum = udp_csum + cm->xlate_dest_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Replace the IP checksum. - */ - iph->check = sfe_ipv4_gen_ip_csum(iph); - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IP, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IP); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet. - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv4_process_tcp_option_sack() - * Parse TCP SACK option and update ack according - */ -static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs, - u32 *ack) -{ - u32 length = sizeof(struct sfe_ipv4_tcp_hdr); - u8 *ptr = (u8 *)th + length; - - /* - * Ignore processing if TCP packet has only TIMESTAMP option. - */ - if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) - && likely(ptr[0] == TCPOPT_NOP) - && likely(ptr[1] == TCPOPT_NOP) - && likely(ptr[2] == TCPOPT_TIMESTAMP) - && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { - return true; - } - - /* - * TCP options. Parse SACK option. - */ - while (length < data_offs) { - u8 size; - u8 kind; - - ptr = (u8 *)th + length; - kind = *ptr; - - /* - * NOP, for padding - * Not in the switch because to fast escape and to not calculate size - */ - if (kind == TCPOPT_NOP) { - length++; - continue; - } - - if (kind == TCPOPT_SACK) { - u32 sack = 0; - u8 re = 1 + 1; - - size = *(ptr + 1); - if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) - || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) - || (size > (data_offs - length))) { - return false; - } - - re += 4; - while (re < size) { - u32 sack_re; - u8 *sptr = ptr + re; - sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; - if (sack_re > sack) { - sack = sack_re; - } - re += TCPOLEN_SACK_PERBLOCK; - } - if (sack > *ack) { - *ack = sack; - } - length += size; - continue; - } - if (kind == TCPOPT_EOL) { - return true; - } - size = *(ptr + 1); - if (size < 2) { - return false; - } - length += size; - } - - return true; -} - -/* - * sfe_ipv4_recv_tcp() - * Handle TCP packet receives and forwarding. - */ -static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv4_tcp_hdr *tcph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *counter_cm; - u8 ttl; - u32 flags; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for TCP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the TCP header cached though so allow more time for any prefetching. - */ - src_ip = iph->saddr; - dest_ip = iph->daddr; - - tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); - src_port = tcph->source; - dest_port = tcph->dest; - flags = tcp_flag_word(tcph); - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - /* - * We didn't get a connection but as TCP is connection-oriented that - * may be because this is a non-fast connection (not running established). - * For diagnostic purposes we differentiate this here. - */ - if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - fast flags\n"); - return 0; - } - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - slow flags: 0x%x\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - /* - * Does our TTL allow forwarding? - */ - ttl = iph->ttl; - if (unlikely(ttl < 2)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ttl too low\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN - * set is not a fast path packet. - */ - if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP flags: 0x%x are not fast\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - counter_cm = cm->counter_match; - - /* - * Are we doing sequence number checking? - */ - if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { - u32 seq; - u32 ack; - u32 sack; - u32 data_offs; - u32 end; - u32 left_edge; - u32 scaled_win; - u32 max_end; - - /* - * Is our sequence fully past the right hand edge of the window? - */ - seq = ntohl(tcph->seq); - if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u exceeds right edge: %u\n", - seq, cm->protocol_state.tcp.max_end + 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't too short. - */ - data_offs = tcph->doff << 2; - if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Update ACK according to any SACK option. - */ - ack = ntohl(tcph->ack_seq); - sack = ack; - if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP option SACK size is wrong\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't past the end of the packet. - */ - data_offs += sizeof(struct sfe_ipv4_ip_hdr); - if (unlikely(len < data_offs)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", - data_offs, len); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - end = seq + len - data_offs; - - /* - * Is our sequence fully before the left hand edge of the window? - */ - if (unlikely((s32)(end - (cm->protocol_state.tcp.end - - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u before left edge: %u\n", - end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Are we acking data that is to the right of what has been sent? - */ - if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u exceeds right edge: %u\n", - sack, counter_cm->protocol_state.tcp.end + 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Is our ack too far before the left hand edge of the window? - */ - left_edge = counter_cm->protocol_state.tcp.end - - cm->protocol_state.tcp.max_win - - SFE_IPV4_TCP_MAX_ACK_WINDOW - - 1; - if (unlikely((s32)(sack - left_edge) < 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Have we just seen the largest window size yet for this connection? If yes - * then we need to record the new value. - */ - scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; - scaled_win += (sack - ack); - if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { - cm->protocol_state.tcp.max_win = scaled_win; - } - - /* - * If our sequence and/or ack numbers have advanced then record the new state. - */ - if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { - cm->protocol_state.tcp.end = end; - } - - max_end = sack + scaled_win; - if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { - counter_cm->protocol_state.tcp.max_end = max_end; - } - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and tcph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; - } - - /* - * Decrement our TTL. - */ - iph->ttl = ttl - 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 tcp_csum; - u32 sum; - - iph->saddr = cm->xlate_src_ip; - tcph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = tcp_csum + cm->xlate_src_partial_csum_adjustment; - } else { - sum = tcp_csum + cm->xlate_src_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 tcp_csum; - u32 sum; - - iph->daddr = cm->xlate_dest_ip; - tcph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment; - } else { - sum = tcp_csum + cm->xlate_dest_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Replace the IP checksum. - */ - iph->check = sfe_ipv4_gen_ip_csum(iph); - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IP, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IP); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv4_recv_icmp() - * Handle ICMP packet receives. - * - * ICMP packets aren't handled as a "fast path" and always have us process them - * through the default Linux stack. What we do need to do is look for any errors - * about connections we are handling in the fast path. If we find any such - * connections then we want to flush their state so that the ICMP error path - * within Linux has all of the correct state should it need it. - */ -static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl) -{ - struct icmphdr *icmph; - struct sfe_ipv4_ip_hdr *icmp_iph; - unsigned int icmp_ihl_words; - unsigned int icmp_ihl; - u32 *icmp_trans_h; - struct sfe_ipv4_udp_hdr *icmp_udph; - struct sfe_ipv4_tcp_hdr *icmp_tcph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection *c; - u32 pull_len = sizeof(struct icmphdr) + ihl; - - /* - * Is our packet too short to contain a valid ICMP header? - */ - len -= ihl; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for ICMP header\n"); - return 0; - } - - /* - * We only handle "destination unreachable" and "time exceeded" messages. - */ - icmph = (struct icmphdr *)(skb->data + ihl); - if ((icmph->type != ICMP_DEST_UNREACH) - && (icmph->type != ICMP_TIME_EXCEEDED)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type); - return 0; - } - - /* - * Do we have the full embedded IP header? - */ - len -= sizeof(struct icmphdr); - pull_len += sizeof(struct sfe_ipv4_ip_hdr); - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded IP header not complete\n"); - return 0; - } - - /* - * Is our embedded IP version wrong? - */ - icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1); - if (unlikely(icmp_iph->version != 4)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", icmp_iph->version); - return 0; - } - - /* - * Do we have the full embedded IP header, including any options? - */ - icmp_ihl_words = icmp_iph->ihl; - icmp_ihl = icmp_ihl_words << 2; - pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr); - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded header not large enough for IP options\n"); - return 0; - } - - len -= icmp_ihl; - icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words; - - /* - * Handle the embedded transport layer header. - */ - switch (icmp_iph->protocol) { - case IPPROTO_UDP: - /* - * We should have 8 bytes of UDP header - that's enough to identify - * the connection. - */ - pull_len += 8; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Incomplete embedded UDP header\n"); - return 0; - } - - icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h; - src_port = icmp_udph->source; - dest_port = icmp_udph->dest; - break; - - case IPPROTO_TCP: - /* - * We should have 8 bytes of TCP header - that's enough to identify - * the connection. - */ - pull_len += 8; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Incomplete embedded TCP header\n"); - return 0; - } - - icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h; - src_port = icmp_tcph->source; - dest_port = icmp_tcph->dest; - break; - - default: - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol); - return 0; - } - - src_ip = icmp_iph->saddr; - dest_ip = icmp_iph->daddr; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. Note that we reverse the source and destination - * here because our embedded message contains a packet that was sent in the - * opposite direction to the one in which we just received it. It will have - * been sent on the interface from which we received it though so that's still - * ok to use. - */ - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port); - if (unlikely(!cm)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * We found a connection so now remove it from the connection list and flush - * its state. - */ - c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; -} - -/* - * sfe_ipv4_recv() - * Handle packet receives and forwaring. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb) -{ - struct sfe_ipv4 *si = &__si; - unsigned int len; - unsigned int tot_len; - unsigned int frag_off; - unsigned int ihl; - bool flush_on_find; - bool ip_options; - struct sfe_ipv4_ip_hdr *iph; - u32 protocol; - - /* - * Check that we have space for an IP header here. - */ - len = skb->len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short\n", len); - return 0; - } - - /* - * Check that our "total length" is large enough for an IP header. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - tot_len = ntohs(iph->tot_len); - if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("tot_len: %u is too short\n", tot_len); - return 0; - } - - /* - * Is our IP version wrong? - */ - if (unlikely(iph->version != 4)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", iph->version); - return 0; - } - - /* - * Does our datagram fit inside the skb? - */ - if (unlikely(tot_len > len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len); - return 0; - } - - /* - * Do we have a non-initial fragment? - */ - frag_off = ntohs(iph->frag_off); - if (unlikely(frag_off & IP_OFFSET)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - - /* - * If we have a (first) fragment then mark it to cause any connection to flush. - */ - flush_on_find = unlikely(frag_off & IP_MF) ? true : false; - - /* - * Do we have any IP options? That's definite a slow path! If we do have IP - * options we need to recheck our header size. - */ - ihl = iph->ihl << 2; - ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false; - if (unlikely(ip_options)) { - if (unlikely(len < ihl)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl); - return 0; - } - - flush_on_find = true; - } - - protocol = iph->protocol; - if (IPPROTO_UDP == protocol) { - return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_TCP == protocol) { - return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_ICMP == protocol) { - return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl); - } - - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol); - return 0; -} - -static void -sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c, - struct sfe_connection_create *sic) -{ - struct sfe_ipv4_connection_match *orig_cm; - struct sfe_ipv4_connection_match *repl_cm; - struct sfe_ipv4_tcp_connection_match *orig_tcp; - struct sfe_ipv4_tcp_connection_match *repl_tcp; - - orig_cm = c->original_match; - repl_cm = c->reply_match; - orig_tcp = &orig_cm->protocol_state.tcp; - repl_tcp = &repl_cm->protocol_state.tcp; - - /* update orig */ - if (orig_tcp->max_win < sic->src_td_max_window) { - orig_tcp->max_win = sic->src_td_max_window; - } - if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { - orig_tcp->end = sic->src_td_end; - } - if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { - orig_tcp->max_end = sic->src_td_max_end; - } - - /* update reply */ - if (repl_tcp->max_win < sic->dest_td_max_window) { - repl_tcp->max_win = sic->dest_td_max_window; - } - if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { - repl_tcp->end = sic->dest_td_end; - } - if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { - repl_tcp->max_end = sic->dest_td_max_end; - } - - /* update match flags */ - orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } -} - -static void -sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c, - struct sfe_connection_create *sic) -{ - switch (sic->protocol) { - case IPPROTO_TCP: - sfe_ipv4_update_tcp_state(c, sic); - break; - } -} - -void sfe_ipv4_update_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv4_connection *c; - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - - c = sfe_ipv4_find_sfe_ipv4_connection(si, - sic->protocol, - sic->src_ip.ip, - sic->src_port, - sic->dest_ip.ip, - sic->dest_port); - if (c != NULL) { - sfe_ipv4_update_protocol_state(c, sic); - } - - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv4_create_rule() - * Create a forwarding rule. - */ -int sfe_ipv4_create_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - struct net_device *dest_dev; - struct net_device *src_dev; - - dest_dev = sic->dest_dev; - src_dev = sic->src_dev; - - if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || - (src_dev->reg_state != NETREG_REGISTERED))) { - return -EINVAL; - } - - spin_lock_bh(&si->lock); - si->connection_create_requests++; - - /* - * Check to see if there is already a flow that matches the rule we're - * trying to create. If there is then we can't create a new one. - */ - c = sfe_ipv4_find_sfe_ipv4_connection(si, - sic->protocol, - sic->src_ip.ip, - sic->src_port, - sic->dest_ip.ip, - sic->dest_port); - if (c != NULL) { - si->connection_create_collisions++; - - /* - * If we already have the flow then it's likely that this - * request to create the connection rule contains more - * up-to-date information. Check and update accordingly. - */ - sfe_ipv4_update_protocol_state(c, sic); - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" - " s: %s:%pxM:%pI4:%u, d: %s:%pxM:%pI4:%u\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port), - sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port)); - return -EADDRINUSE; - } - - /* - * Allocate the various connection tracking objects. - */ - c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC); - if (unlikely(!c)) { - spin_unlock_bh(&si->lock); - return -ENOMEM; - } - - original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); - if (unlikely(!original_cm)) { - spin_unlock_bh(&si->lock); - kfree(c); - return -ENOMEM; - } - - reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); - if (unlikely(!reply_cm)) { - spin_unlock_bh(&si->lock); - kfree(original_cm); - kfree(c); - return -ENOMEM; - } - - /* - * Fill in the "original" direction connection matching object. - * Note that the transmit MAC address is "dest_mac_xlate" because - * we always know both ends of a connection by their translated - * addresses and not their public addresses. - */ - original_cm->match_dev = src_dev; - original_cm->match_protocol = sic->protocol; - original_cm->match_src_ip = sic->src_ip.ip; - original_cm->match_src_port = sic->src_port; - original_cm->match_dest_ip = sic->dest_ip.ip; - original_cm->match_dest_port = sic->dest_port; - original_cm->xlate_src_ip = sic->src_ip_xlate.ip; - original_cm->xlate_src_port = sic->src_port_xlate; - original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip; - original_cm->xlate_dest_port = sic->dest_port_xlate; - original_cm->rx_packet_count = 0; - original_cm->rx_packet_count64 = 0; - original_cm->rx_byte_count = 0; - original_cm->rx_byte_count64 = 0; - original_cm->xmit_dev = dest_dev; - original_cm->xmit_dev_mtu = sic->dest_mtu; - memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); - memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); - original_cm->connection = c; - original_cm->counter_match = reply_cm; - original_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - original_cm->priority = sic->src_priority; - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT; - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - original_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - original_cm->flow_accel = sic->original_accel; -#endif - original_cm->active_next = NULL; - original_cm->active_prev = NULL; - original_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(dest_dev->flags & IFF_POINTOPOINT)) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (dest_dev->header_ops) { - if (dest_dev->header_ops->create == eth_header) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - /* - * Fill in the "reply" direction connection matching object. - */ - reply_cm->match_dev = dest_dev; - reply_cm->match_protocol = sic->protocol; - reply_cm->match_src_ip = sic->dest_ip_xlate.ip; - reply_cm->match_src_port = sic->dest_port_xlate; - reply_cm->match_dest_ip = sic->src_ip_xlate.ip; - reply_cm->match_dest_port = sic->src_port_xlate; - reply_cm->xlate_src_ip = sic->dest_ip.ip; - reply_cm->xlate_src_port = sic->dest_port; - reply_cm->xlate_dest_ip = sic->src_ip.ip; - reply_cm->xlate_dest_port = sic->src_port; - reply_cm->rx_packet_count = 0; - reply_cm->rx_packet_count64 = 0; - reply_cm->rx_byte_count = 0; - reply_cm->rx_byte_count64 = 0; - reply_cm->xmit_dev = src_dev; - reply_cm->xmit_dev_mtu = sic->src_mtu; - memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); - memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); - reply_cm->connection = c; - reply_cm->counter_match = original_cm; - reply_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - reply_cm->priority = sic->dest_priority; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - reply_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - reply_cm->flow_accel = sic->reply_accel; -#endif - reply_cm->active_next = NULL; - reply_cm->active_prev = NULL; - reply_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(src_dev->flags & IFF_POINTOPOINT)) { - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (src_dev->header_ops) { - if (src_dev->header_ops->create == eth_header) { - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - - if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; - } - - if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; - } - - c->protocol = sic->protocol; - c->src_ip = sic->src_ip.ip; - c->src_ip_xlate = sic->src_ip_xlate.ip; - c->src_port = sic->src_port; - c->src_port_xlate = sic->src_port_xlate; - c->original_dev = src_dev; - c->original_match = original_cm; - c->dest_ip = sic->dest_ip.ip; - c->dest_ip_xlate = sic->dest_ip_xlate.ip; - c->dest_port = sic->dest_port; - c->dest_port_xlate = sic->dest_port_xlate; - c->reply_dev = dest_dev; - c->reply_match = reply_cm; - c->mark = sic->mark; - c->debug_read_seq = 0; - c->last_sync_jiffies = get_jiffies_64(); - - /* - * Take hold of our source and dest devices for the duration of the connection. - */ - dev_hold(c->original_dev); - dev_hold(c->reply_dev); - - /* - * Initialize the protocol-specific information that we track. - */ - switch (sic->protocol) { - case IPPROTO_TCP: - original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; - original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; - original_cm->protocol_state.tcp.end = sic->src_td_end; - original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; - reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; - reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; - reply_cm->protocol_state.tcp.end = sic->dest_td_end; - reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } - break; - } - - sfe_ipv4_connection_match_compute_translations(original_cm); - sfe_ipv4_connection_match_compute_translations(reply_cm); - sfe_ipv4_insert_sfe_ipv4_connection(si, c); - - spin_unlock_bh(&si->lock); - - /* - * We have everything we need! - */ - DEBUG_INFO("new connection - mark: %08x, p: %d\n" - " s: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n" - " d: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, - &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate), - dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, - &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); - - return 0; -} - -/* - * sfe_ipv4_destroy_rule() - * Destroy a forwarding rule. - */ -void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - - spin_lock_bh(&si->lock); - si->connection_destroy_requests++; - - /* - * Check to see if we have a flow that matches the rule we're trying - * to destroy. If there isn't then we can't destroy it. - */ - c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port, - sid->dest_ip.ip, sid->dest_port); - if (!c) { - si->connection_destroy_misses++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n", - sid->protocol, &sid->src_ip, ntohs(sid->src_port), - &sid->dest_ip, ntohs(sid->dest_port)); - return; - } - - /* - * Remove our connection details from the hash tables. - */ - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - spin_unlock_bh(&si->lock); - - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); - - DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n", - sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port), - &sid->dest_ip.ip, ntohs(sid->dest_port)); -} - -/* - * sfe_ipv4_register_sync_rule_callback() - * Register a callback for rule synchronization. - */ -void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) -{ - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv4_get_debug_dev() - */ -static ssize_t sfe_ipv4_get_debug_dev(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv4 *si = &__si; - ssize_t count; - int num; - - spin_lock_bh(&si->lock); - num = si->debug_dev; - spin_unlock_bh(&si->lock); - - count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); - return count; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv4_debug_dev_attr = - __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL); - -/* - * sfe_ipv4_destroy_all_rules_for_dev() - * Destroy all connections that match a particular device. - * - * If we pass dev as NULL then this destroys all connections. - */ -void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - -another_round: - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - /* - * Does this connection relate to the device we are destroying? - */ - if (!dev - || (dev == c->original_dev) - || (dev == c->reply_dev)) { - break; - } - } - - if (c) { - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - } - - spin_unlock_bh(&si->lock); - - if (c) { - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); - goto another_round; - } -} - -/* - * sfe_ipv4_periodic_sync() - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) -static void sfe_ipv4_periodic_sync(unsigned long arg) -#else -static void sfe_ipv4_periodic_sync(struct timer_list *tl) -#endif -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg; -#else - struct sfe_ipv4 *si = from_timer(si, tl, timer); -#endif - u64 now_jiffies; - int quota; - sfe_sync_rule_callback_t sync_rule_callback; - - now_jiffies = get_jiffies_64(); - - rcu_read_lock(); - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - if (!sync_rule_callback) { - rcu_read_unlock(); - goto done; - } - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - /* - * Get an estimate of the number of connections to parse in this sync. - */ - quota = (si->num_connections + 63) / 64; - - /* - * Walk the "active" list and sync the connection state. - */ - while (quota--) { - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *counter_cm; - struct sfe_ipv4_connection *c; - struct sfe_connection_sync sis; - - cm = si->active_head; - if (!cm) { - break; - } - - /* - * There's a possibility that our counter match is in the active list too. - * If it is then remove it. - */ - counter_cm = cm->counter_match; - if (counter_cm->active) { - counter_cm->active = false; - - /* - * We must have a connection preceding this counter match - * because that's the one that got us to this point, so we don't have - * to worry about removing the head of the list. - */ - counter_cm->active_prev->active_next = counter_cm->active_next; - - if (likely(counter_cm->active_next)) { - counter_cm->active_next->active_prev = counter_cm->active_prev; - } else { - si->active_tail = counter_cm->active_prev; - } - - counter_cm->active_next = NULL; - counter_cm->active_prev = NULL; - } - - /* - * Now remove the head of the active scan list. - */ - cm->active = false; - si->active_head = cm->active_next; - if (likely(cm->active_next)) { - cm->active_next->active_prev = NULL; - } else { - si->active_tail = NULL; - } - cm->active_next = NULL; - - /* - * Sync the connection state. - */ - c = cm->connection; - sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); - - /* - * We don't want to be holding the lock when we sync! - */ - spin_unlock_bh(&si->lock); - sync_rule_callback(&sis); - spin_lock_bh(&si->lock); - } - - spin_unlock_bh(&si->lock); - rcu_read_unlock(); - -done: - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); -} - -#define CHAR_DEV_MSG_SIZE 768 - -/* - * sfe_ipv4_debug_dev_read_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - si->debug_read_seq++; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_connection() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - struct sfe_ipv4_connection *c; - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - int bytes_read; - int protocol; - struct net_device *src_dev; - __be32 src_ip; - __be32 src_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - u64 src_rx_packets; - u64 src_rx_bytes; - struct net_device *dest_dev; - __be32 dest_ip; - __be32 dest_ip_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u64 dest_rx_packets; - u64 dest_rx_bytes; - u64 last_sync_jiffies; - u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; -#ifdef CONFIG_NF_FLOW_COOKIE - int src_flow_cookie, dst_flow_cookie; -#endif - - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - if (c->debug_read_seq < si->debug_read_seq) { - c->debug_read_seq = si->debug_read_seq; - break; - } - } - - /* - * If there were no connections then move to the next state. - */ - if (!c) { - spin_unlock_bh(&si->lock); - ws->state++; - return true; - } - - original_cm = c->original_match; - reply_cm = c->reply_match; - - protocol = c->protocol; - src_dev = c->original_dev; - src_ip = c->src_ip; - src_ip_xlate = c->src_ip_xlate; - src_port = c->src_port; - src_port_xlate = c->src_port_xlate; - src_priority = original_cm->priority; - src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT; - - sfe_ipv4_connection_match_update_summary_stats(original_cm); - sfe_ipv4_connection_match_update_summary_stats(reply_cm); - - src_rx_packets = original_cm->rx_packet_count64; - src_rx_bytes = original_cm->rx_byte_count64; - dest_dev = c->reply_dev; - dest_ip = c->dest_ip; - dest_ip_xlate = c->dest_ip_xlate; - dest_port = c->dest_port; - dest_port_xlate = c->dest_port_xlate; - dest_priority = reply_cm->priority; - dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT; - dest_rx_packets = reply_cm->rx_packet_count64; - dest_rx_bytes = reply_cm->rx_byte_count64; - last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; - mark = c->mark; -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie = original_cm->flow_cookie; - dst_flow_cookie = reply_cm->flow_cookie; -#endif - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", - protocol, - src_dev->name, - &src_ip, &src_ip_xlate, - ntohs(src_port), ntohs(src_port_xlate), - src_priority, src_dscp, - src_rx_packets, src_rx_bytes, - dest_dev->name, - &dest_ip, &dest_ip_xlate, - ntohs(dest_port), ntohs(dest_port_xlate), - dest_priority, dest_dscp, - dest_rx_packets, dest_rx_bytes, -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie, dst_flow_cookie, -#endif - last_sync_jiffies, mark); - - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_exception() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - u64 ct; - - spin_lock_bh(&si->lock); - ct = si->exception_events64[ws->iter_exception]; - spin_unlock_bh(&si->lock); - - if (ct) { - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, - "\t\t\n", - sfe_ipv4_exception_events_string[ws->iter_exception], - ct); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - } - - ws->iter_exception++; - if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) { - ws->iter_exception = 0; - ws->state++; - } - - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_stats() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - unsigned int num_connections; - u64 packets_forwarded; - u64 packets_not_forwarded; - u64 connection_create_requests; - u64 connection_create_collisions; - u64 connection_destroy_requests; - u64 connection_destroy_misses; - u64 connection_flushes; - u64 connection_match_hash_hits; - u64 connection_match_hash_reorders; - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - num_connections = si->num_connections; - packets_forwarded = si->packets_forwarded64; - packets_not_forwarded = si->packets_not_forwarded64; - connection_create_requests = si->connection_create_requests64; - connection_create_collisions = si->connection_create_collisions64; - connection_destroy_requests = si->connection_destroy_requests64; - connection_destroy_misses = si->connection_destroy_misses64; - connection_flushes = si->connection_flushes64; - connection_match_hash_hits = si->connection_match_hash_hits64; - connection_match_hash_reorders = si->connection_match_hash_reorders64; - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", - num_connections, - packets_forwarded, - packets_not_forwarded, - connection_create_requests, - connection_create_collisions, - connection_destroy_requests, - connection_destroy_misses, - connection_flushes, - connection_match_hash_hits, - connection_match_hash_reorders); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * Array of write functions that write various XML elements that correspond to - * our XML output state machine. - */ -static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = { - sfe_ipv4_debug_dev_read_start, - sfe_ipv4_debug_dev_read_connections_start, - sfe_ipv4_debug_dev_read_connections_connection, - sfe_ipv4_debug_dev_read_connections_end, - sfe_ipv4_debug_dev_read_exceptions_start, - sfe_ipv4_debug_dev_read_exceptions_exception, - sfe_ipv4_debug_dev_read_exceptions_end, - sfe_ipv4_debug_dev_read_stats, - sfe_ipv4_debug_dev_read_end, -}; - -/* - * sfe_ipv4_debug_dev_read() - * Send info to userspace upon read request from user - */ -static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) -{ - char msg[CHAR_DEV_MSG_SIZE]; - int total_read = 0; - struct sfe_ipv4_debug_xml_write_state *ws; - struct sfe_ipv4 *si = &__si; - - ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data; - while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { - if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { - continue; - } - } - - return total_read; -} - -/* - * sfe_ipv4_debug_dev_write() - * Write to char device resets some stats - */ -static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) -{ - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - si->packets_forwarded64 = 0; - si->packets_not_forwarded64 = 0; - si->connection_create_requests64 = 0; - si->connection_create_collisions64 = 0; - si->connection_destroy_requests64 = 0; - si->connection_destroy_misses64 = 0; - si->connection_flushes64 = 0; - si->connection_match_hash_hits64 = 0; - si->connection_match_hash_reorders64 = 0; - spin_unlock_bh(&si->lock); - - return length; -} - -/* - * sfe_ipv4_debug_dev_open() - */ -static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file) -{ - struct sfe_ipv4_debug_xml_write_state *ws; - - ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; - if (!ws) { - ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL); - if (!ws) { - return -ENOMEM; - } - - ws->state = SFE_IPV4_DEBUG_XML_STATE_START; - file->private_data = ws; - } - - return 0; -} - -/* - * sfe_ipv4_debug_dev_release() - */ -static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file) -{ - struct sfe_ipv4_debug_xml_write_state *ws; - - ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; - if (ws) { - /* - * We've finished with our output so free the write state. - */ - kfree(ws); - } - - return 0; -} - -/* - * File operations used in the debug char device - */ -static struct file_operations sfe_ipv4_debug_dev_fops = { - .read = sfe_ipv4_debug_dev_read, - .write = sfe_ipv4_debug_dev_write, - .open = sfe_ipv4_debug_dev_open, - .release = sfe_ipv4_debug_dev_release -}; - -#ifdef CONFIG_NF_FLOW_COOKIE -/* - * sfe_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb) -{ - struct sfe_ipv4 *si = &__si; - - BUG_ON(!cb); - - if (si->flow_cookie_set_func) { - return -1; - } - - rcu_assign_pointer(si->flow_cookie_set_func, cb); - return 0; -} - -/* - * sfe_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb) -{ - struct sfe_ipv4 *si = &__si; - - RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); - return 0; -} - -/* - * sfe_ipv4_get_flow_cookie() - */ -static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv4 *si = &__si; - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); -} - -/* - * sfe_ipv4_set_flow_cookie() - */ -static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct sfe_ipv4 *si = &__si; - strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); - - return size; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv4_flow_cookie_attr = - __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie); -#endif /*CONFIG_NF_FLOW_COOKIE*/ - -/* - * sfe_ipv4_init() - */ -static int __init sfe_ipv4_init(void) -{ - struct sfe_ipv4 *si = &__si; - int result = -1; - - DEBUG_INFO("SFE IPv4 init\n"); - - /* - * Create sys/sfe_ipv4 - */ - si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL); - if (!si->sys_sfe_ipv4) { - DEBUG_ERROR("failed to register sfe_ipv4\n"); - goto exit1; - } - - /* - * Create files, one for each parameter supported by this module. - */ - result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev file: %d\n", result); - goto exit2; - } - -#ifdef CONFIG_NF_FLOW_COOKIE - result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); - if (result) { - DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); - goto exit3; - } -#endif /* CONFIG_NF_FLOW_COOKIE */ - - /* - * Register our debug char device. - */ - result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops); - if (result < 0) { - DEBUG_ERROR("Failed to register chrdev: %d\n", result); - goto exit4; - } - - si->debug_dev = result; - - /* - * Create a timer to handle periodic statistics. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si); -#else - timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0); -#endif - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); - - spin_lock_init(&si->lock); - - return 0; - -exit4: -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); - -exit3: -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - -exit2: - kobject_put(si->sys_sfe_ipv4); - -exit1: - return result; -} - -/* - * sfe_ipv4_exit() - */ -static void __exit sfe_ipv4_exit(void) -{ - struct sfe_ipv4 *si = &__si; - - DEBUG_INFO("SFE IPv4 exit\n"); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - - del_timer_sync(&si->timer); - - unregister_chrdev(si->debug_dev, "sfe_ipv4"); - -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - - kobject_put(si->sys_sfe_ipv4); - -} - -module_init(sfe_ipv4_init) -module_exit(sfe_ipv4_exit) - -EXPORT_SYMBOL(sfe_ipv4_recv); -EXPORT_SYMBOL(sfe_ipv4_create_rule); -EXPORT_SYMBOL(sfe_ipv4_destroy_rule); -EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev); -EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback); -EXPORT_SYMBOL(sfe_ipv4_mark_rule); -EXPORT_SYMBOL(sfe_ipv4_update_rule); -#ifdef CONFIG_NF_FLOW_COOKIE -EXPORT_SYMBOL(sfe_register_flow_cookie_cb); -EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb); -#endif - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/sfe_ipv6.c b/shortcut-fe/sfe_ipv6.c deleted file mode 100644 index a7cb811a9..000000000 --- a/shortcut-fe/sfe_ipv6.c +++ /dev/null @@ -1,3617 +0,0 @@ -/* - * sfe_ipv6.c - * Shortcut forwarding engine - IPv6 support. - * - * Copyright (c) 2015-2016, 2019-2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" - -/* - * By default Linux IP header and transport layer header structures are - * unpacked, assuming that such headers should be 32-bit aligned. - * Unfortunately some wireless adaptors can't cope with this requirement and - * some CPUs can't handle misaligned accesses. For those platforms we - * define SFE_IPV6_UNALIGNED_IP_HEADER and mark the structures as packed. - * When we do this the compiler will generate slightly worse code than for the - * aligned case (on most platforms) but will be much quicker than fixing - * things up in an unaligned trap handler. - */ -#define SFE_IPV6_UNALIGNED_IP_HEADER 1 -#if SFE_IPV6_UNALIGNED_IP_HEADER -#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed)) -#else -#define SFE_IPV6_UNALIGNED_STRUCT -#endif - -#define CHAR_DEV_MSG_SIZE 768 - -/* - * An Ethernet header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_eth_hdr { - __be16 h_dest[ETH_ALEN / 2]; - __be16 h_source[ETH_ALEN / 2]; - __be16 h_proto; -} SFE_IPV6_UNALIGNED_STRUCT; - -#define SFE_IPV6_DSCP_MASK 0xf03f -#define SFE_IPV6_DSCP_SHIFT 2 - -/* - * An IPv6 header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_ip_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 priority:4, - version:4; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u8 version:4, - priority:4; -#else -#error "Please fix " -#endif - __u8 flow_lbl[3]; - __be16 payload_len; - __u8 nexthdr; - __u8 hop_limit; - struct sfe_ipv6_addr saddr; - struct sfe_ipv6_addr daddr; - - /* - * The extension header start here. - */ -} SFE_IPV6_UNALIGNED_STRUCT; - -#define SFE_IPV6_EXT_HDR_HOP 0 -#define SFE_IPV6_EXT_HDR_ROUTING 43 -#define SFE_IPV6_EXT_HDR_FRAG 44 -#define SFE_IPV6_EXT_HDR_ESP 50 -#define SFE_IPV6_EXT_HDR_AH 51 -#define SFE_IPV6_EXT_HDR_NONE 59 -#define SFE_IPV6_EXT_HDR_DST 60 -#define SFE_IPV6_EXT_HDR_MH 135 - -/* - * fragmentation header - */ - -struct sfe_ipv6_frag_hdr { - __u8 nexthdr; - __u8 reserved; - __be16 frag_off; - __be32 identification; -}; - -#define SFE_IPV6_FRAG_OFFSET 0xfff8 - -/* - * generic IPv6 extension header - */ -struct sfe_ipv6_ext_hdr { - __u8 next_hdr; - __u8 hdr_len; - __u8 padding[6]; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * A UDP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_udp_hdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * A TCP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_tcp_hdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * Specifies the lower bound on ACK numbers carried in the TCP header - */ -#define SFE_IPV6_TCP_MAX_ACK_WINDOW 65520 - -/* - * IPv6 TCP connection match additional data. - */ -struct sfe_ipv6_tcp_connection_match { - u8 win_scale; /* Window scale */ - u32 max_win; /* Maximum window size seen */ - u32 end; /* Sequence number of the next byte to send (seq + segment length) */ - u32 max_end; /* Sequence number of the last byte to ack */ -}; - -/* - * Bit flags for IPv6 connection matching entry. - */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) - /* Perform source translation */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) - /* Perform destination translation */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) - /* Ignore TCP sequence numbers */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) - /* Fast Ethernet header write */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) - /* Fast Ethernet header write */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) - /* remark priority of SKB */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) - /* remark DSCP of packet */ - -/* - * IPv6 connection matching structure. - */ -struct sfe_ipv6_connection_match { - /* - * References to other objects. - */ - struct sfe_ipv6_connection_match *next; - struct sfe_ipv6_connection_match *prev; - struct sfe_ipv6_connection *connection; - struct sfe_ipv6_connection_match *counter_match; - /* Matches the flow in the opposite direction as the one in connection */ - struct sfe_ipv6_connection_match *active_next; - struct sfe_ipv6_connection_match *active_prev; - bool active; /* Flag to indicate if we're on the active list */ - - /* - * Characteristics that identify flows that match this rule. - */ - struct net_device *match_dev; /* Network device */ - u8 match_protocol; /* Protocol */ - struct sfe_ipv6_addr match_src_ip[1]; /* Source IP address */ - struct sfe_ipv6_addr match_dest_ip[1]; /* Destination IP address */ - __be16 match_src_port; /* Source port/connection ident */ - __be16 match_dest_port; /* Destination port/connection ident */ - - /* - * Control the operations of the match. - */ - u32 flags; /* Bit flags */ -#ifdef CONFIG_NF_FLOW_COOKIE - u32 flow_cookie; /* used flow cookie, for debug */ -#endif -#ifdef CONFIG_XFRM - u32 flow_accel; /* The flow accelerated or not */ -#endif - - /* - * Connection state that we track once we match. - */ - union { /* Protocol-specific state */ - struct sfe_ipv6_tcp_connection_match tcp; - } protocol_state; - /* - * Stats recorded in a sync period. These stats will be added to - * rx_packet_count64/rx_byte_count64 after a sync period. - */ - u32 rx_packet_count; - u32 rx_byte_count; - - /* - * Packet translation information. - */ - struct sfe_ipv6_addr xlate_src_ip[1]; /* Address after source translation */ - __be16 xlate_src_port; /* Port/connection ident after source translation */ - u16 xlate_src_csum_adjustment; - /* Transport layer checksum adjustment after source translation */ - struct sfe_ipv6_addr xlate_dest_ip[1]; /* Address after destination translation */ - __be16 xlate_dest_port; /* Port/connection ident after destination translation */ - u16 xlate_dest_csum_adjustment; - /* Transport layer checksum adjustment after destination translation */ - - /* - * QoS information - */ - u32 priority; - u32 dscp; - - /* - * Packet transmit information. - */ - struct net_device *xmit_dev; /* Network device on which to transmit */ - unsigned short int xmit_dev_mtu; - /* Interface MTU */ - u16 xmit_dest_mac[ETH_ALEN / 2]; - /* Destination MAC address to use when forwarding */ - u16 xmit_src_mac[ETH_ALEN / 2]; - /* Source MAC address to use when forwarding */ - - /* - * Summary stats. - */ - u64 rx_packet_count64; - u64 rx_byte_count64; -}; - -/* - * Per-connection data structure. - */ -struct sfe_ipv6_connection { - struct sfe_ipv6_connection *next; - /* Pointer to the next entry in a hash chain */ - struct sfe_ipv6_connection *prev; - /* Pointer to the previous entry in a hash chain */ - int protocol; /* IP protocol number */ - struct sfe_ipv6_addr src_ip[1]; /* Src IP addr pre-translation */ - struct sfe_ipv6_addr src_ip_xlate[1]; /* Src IP addr post-translation */ - struct sfe_ipv6_addr dest_ip[1]; /* Dest IP addr pre-translation */ - struct sfe_ipv6_addr dest_ip_xlate[1]; /* Dest IP addr post-translation */ - __be16 src_port; /* Src port pre-translation */ - __be16 src_port_xlate; /* Src port post-translation */ - __be16 dest_port; /* Dest port pre-translation */ - __be16 dest_port_xlate; /* Dest port post-translation */ - struct sfe_ipv6_connection_match *original_match; - /* Original direction matching structure */ - struct net_device *original_dev; - /* Original direction source device */ - struct sfe_ipv6_connection_match *reply_match; - /* Reply direction matching structure */ - struct net_device *reply_dev; /* Reply direction source device */ - u64 last_sync_jiffies; /* Jiffies count for the last sync */ - struct sfe_ipv6_connection *all_connections_next; - /* Pointer to the next entry in the list of all connections */ - struct sfe_ipv6_connection *all_connections_prev; - /* Pointer to the previous entry in the list of all connections */ - u32 mark; /* mark for outgoing packet */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * IPv6 connections and hash table size information. - */ -#define SFE_IPV6_CONNECTION_HASH_SHIFT 12 -#define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT) -#define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1) - -#ifdef CONFIG_NF_FLOW_COOKIE -#define SFE_FLOW_COOKIE_SIZE 2048 -#define SFE_FLOW_COOKIE_MASK 0x7ff - -struct sfe_ipv6_flow_cookie_entry { - struct sfe_ipv6_connection_match *match; - unsigned long last_clean_time; -}; -#endif - -enum sfe_ipv6_exception_events { - SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL, - SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, - SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL, - SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, - SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, - SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK, - SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, - SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_IP_OPTIONS_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UDP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_TCP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL, - SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, - SFE_IPV6_EXCEPTION_EVENT_NON_V6, - SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, - SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL, - SFE_IPV6_EXCEPTION_EVENT_LAST -}; - -static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = { - "UDP_HEADER_INCOMPLETE", - "UDP_NO_CONNECTION", - "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "UDP_SMALL_TTL", - "UDP_NEEDS_FRAGMENTATION", - "TCP_HEADER_INCOMPLETE", - "TCP_NO_CONNECTION_SLOW_FLAGS", - "TCP_NO_CONNECTION_FAST_FLAGS", - "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "TCP_SMALL_TTL", - "TCP_NEEDS_FRAGMENTATION", - "TCP_FLAGS", - "TCP_SEQ_EXCEEDS_RIGHT_EDGE", - "TCP_SMALL_DATA_OFFS", - "TCP_BAD_SACK", - "TCP_BIG_DATA_OFFS", - "TCP_SEQ_BEFORE_LEFT_EDGE", - "TCP_ACK_EXCEEDS_RIGHT_EDGE", - "TCP_ACK_BEFORE_LEFT_EDGE", - "ICMP_HEADER_INCOMPLETE", - "ICMP_UNHANDLED_TYPE", - "ICMP_IPV6_HEADER_INCOMPLETE", - "ICMP_IPV6_NON_V6", - "ICMP_IPV6_IP_OPTIONS_INCOMPLETE", - "ICMP_IPV6_UDP_HEADER_INCOMPLETE", - "ICMP_IPV6_TCP_HEADER_INCOMPLETE", - "ICMP_IPV6_UNHANDLED_PROTOCOL", - "ICMP_NO_CONNECTION", - "ICMP_FLUSHED_CONNECTION", - "HEADER_INCOMPLETE", - "BAD_TOTAL_LENGTH", - "NON_V6", - "NON_INITIAL_FRAGMENT", - "DATAGRAM_INCOMPLETE", - "IP_OPTIONS_INCOMPLETE", - "UNHANDLED_PROTOCOL", - "FLOW_COOKIE_ADD_FAIL" -}; - -/* - * Per-module structure. - */ -struct sfe_ipv6 { - spinlock_t lock; /* Lock for SMP correctness */ - struct sfe_ipv6_connection_match *active_head; - /* Head of the list of recently active connections */ - struct sfe_ipv6_connection_match *active_tail; - /* Tail of the list of recently active connections */ - struct sfe_ipv6_connection *all_connections_head; - /* Head of the list of all connections */ - struct sfe_ipv6_connection *all_connections_tail; - /* Tail of the list of all connections */ - unsigned int num_connections; /* Number of connections */ - struct timer_list timer; /* Timer used for periodic sync ops */ - sfe_sync_rule_callback_t __rcu sync_rule_callback; - /* Callback function registered by a connection manager for stats syncing */ - struct sfe_ipv6_connection *conn_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; - /* Connection hash table */ - struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; - /* Connection match hash table */ -#ifdef CONFIG_NF_FLOW_COOKIE - struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; - /* flow cookie table*/ - sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func; - /* function used to configure flow cookie in hardware*/ - int flow_cookie_enable; - /* Enable/disable flow cookie at runtime */ -#endif - - /* - * Stats recorded in a sync period. These stats will be added to - * connection_xxx64 after a sync period. - */ - u32 connection_create_requests; - /* Number of IPv6 connection create requests */ - u32 connection_create_collisions; - /* Number of IPv6 connection create requests that collided with existing hash table entries */ - u32 connection_destroy_requests; - /* Number of IPv6 connection destroy requests */ - u32 connection_destroy_misses; - /* Number of IPv6 connection destroy requests that missed our hash table */ - u32 connection_match_hash_hits; - /* Number of IPv6 connection match hash hits */ - u32 connection_match_hash_reorders; - /* Number of IPv6 connection match hash reorders */ - u32 connection_flushes; /* Number of IPv6 connection flushes */ - u32 packets_forwarded; /* Number of IPv6 packets forwarded */ - u32 packets_not_forwarded; /* Number of IPv6 packets not forwarded */ - u32 exception_events[SFE_IPV6_EXCEPTION_EVENT_LAST]; - - /* - * Summary statistics. - */ - u64 connection_create_requests64; - /* Number of IPv6 connection create requests */ - u64 connection_create_collisions64; - /* Number of IPv6 connection create requests that collided with existing hash table entries */ - u64 connection_destroy_requests64; - /* Number of IPv6 connection destroy requests */ - u64 connection_destroy_misses64; - /* Number of IPv6 connection destroy requests that missed our hash table */ - u64 connection_match_hash_hits64; - /* Number of IPv6 connection match hash hits */ - u64 connection_match_hash_reorders64; - /* Number of IPv6 connection match hash reorders */ - u64 connection_flushes64; /* Number of IPv6 connection flushes */ - u64 packets_forwarded64; /* Number of IPv6 packets forwarded */ - u64 packets_not_forwarded64; - /* Number of IPv6 packets not forwarded */ - u64 exception_events64[SFE_IPV6_EXCEPTION_EVENT_LAST]; - - /* - * Control state. - */ - struct kobject *sys_sfe_ipv6; /* sysfs linkage */ - int debug_dev; /* Major number of the debug char device */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * Enumeration of the XML output. - */ -enum sfe_ipv6_debug_xml_states { - SFE_IPV6_DEBUG_XML_STATE_START, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_START, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_END, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_START, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_END, - SFE_IPV6_DEBUG_XML_STATE_STATS, - SFE_IPV6_DEBUG_XML_STATE_END, - SFE_IPV6_DEBUG_XML_STATE_DONE -}; - -/* - * XML write state. - */ -struct sfe_ipv6_debug_xml_write_state { - enum sfe_ipv6_debug_xml_states state; - /* XML output file state machine state */ - int iter_exception; /* Next exception iterator */ -}; - -typedef bool (*sfe_ipv6_debug_xml_write_method_t)(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws); - -static struct sfe_ipv6 __si6; - -/* - * sfe_ipv6_get_debug_dev() - */ -static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf); - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv6_debug_dev_attr = - __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv6_get_debug_dev, NULL); - -/* - * sfe_ipv6_is_ext_hdr() - * check if we recognize ipv6 extension header - */ -static inline bool sfe_ipv6_is_ext_hdr(u8 hdr) -{ - return (hdr == SFE_IPV6_EXT_HDR_HOP) || - (hdr == SFE_IPV6_EXT_HDR_ROUTING) || - (hdr == SFE_IPV6_EXT_HDR_FRAG) || - (hdr == SFE_IPV6_EXT_HDR_AH) || - (hdr == SFE_IPV6_EXT_HDR_DST) || - (hdr == SFE_IPV6_EXT_HDR_MH); -} - -/* - * sfe_ipv6_change_dsfield() - * change dscp field in IPv6 packet - */ -static inline void sfe_ipv6_change_dsfield(struct sfe_ipv6_ip_hdr *iph, u8 dscp) -{ - __be16 *p = (__be16 *)iph; - - *p = ((*p & htons(SFE_IPV6_DSCP_MASK)) | htons((u16)dscp << 4)); -} - -/* - * sfe_ipv6_get_connection_match_hash() - * Generate the hash used in connection match lookups. - */ -static inline unsigned int sfe_ipv6_get_connection_match_hash(struct net_device *dev, u8 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - u32 idx, hash = 0; - size_t dev_addr = (size_t)dev; - - for (idx = 0; idx < 4; idx++) { - hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; - } - hash = ((u32)dev_addr) ^ hash ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv6_find_connection_match() - * Get the IPv6 flow match info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static struct sfe_ipv6_connection_match * -sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, u8 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *head; - unsigned int conn_match_idx; - - conn_match_idx = sfe_ipv6_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); - cm = si->conn_match_hash[conn_match_idx]; - - /* - * If we don't have anything in this chain then bail. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((cm->match_src_port == src_port) - && (cm->match_dest_port == dest_port) - && (sfe_ipv6_addr_equal(cm->match_src_ip, src_ip)) - && (sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip)) - && (cm->match_protocol == protocol) - && (cm->match_dev == dev)) { - si->connection_match_hash_hits++; - return cm; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain and - * move matching entry to the top of the hash chain. We presume that this - * will be reused again very quickly. - */ - head = cm; - do { - cm = cm->next; - } while (cm && (cm->match_src_port != src_port - || cm->match_dest_port != dest_port - || !sfe_ipv6_addr_equal(cm->match_src_ip, src_ip) - || !sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip) - || cm->match_protocol != protocol - || cm->match_dev != dev)); - - /* - * Not found then we're done. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * We found a match so move it. - */ - if (cm->next) { - cm->next->prev = cm->prev; - } - cm->prev->next = cm->next; - cm->prev = NULL; - cm->next = head; - head->prev = cm; - si->conn_match_hash[conn_match_idx] = cm; - si->connection_match_hash_reorders++; - - return cm; -} - -/* - * sfe_ipv6_connection_match_update_summary_stats() - * Update the summary stats for a connection match entry. - */ -static inline void sfe_ipv6_connection_match_update_summary_stats(struct sfe_ipv6_connection_match *cm) -{ - cm->rx_packet_count64 += cm->rx_packet_count; - cm->rx_packet_count = 0; - cm->rx_byte_count64 += cm->rx_byte_count; - cm->rx_byte_count = 0; -} - -/* - * sfe_ipv6_connection_match_compute_translations() - * Compute port and address translations for a connection match entry. - */ -static void sfe_ipv6_connection_match_compute_translations(struct sfe_ipv6_connection_match *cm) -{ - u32 diff[9]; - u32 *idx_32; - u16 *idx_16; - - /* - * Before we insert the entry look to see if this is tagged as doing address - * translations. If it is then work out the adjustment that we need to apply - * to the transport checksum. - */ - if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC) { - u32 adj = 0; - u32 carry = 0; - - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - idx_32 = diff; - *(idx_32++) = cm->match_src_ip->addr[0]; - *(idx_32++) = cm->match_src_ip->addr[1]; - *(idx_32++) = cm->match_src_ip->addr[2]; - *(idx_32++) = cm->match_src_ip->addr[3]; - - idx_16 = (u16 *)idx_32; - *(idx_16++) = cm->match_src_port; - *(idx_16++) = ~cm->xlate_src_port; - idx_32 = (u32 *)idx_16; - - *(idx_32++) = ~cm->xlate_src_ip->addr[0]; - *(idx_32++) = ~cm->xlate_src_ip->addr[1]; - *(idx_32++) = ~cm->xlate_src_ip->addr[2]; - *(idx_32++) = ~cm->xlate_src_ip->addr[3]; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { - u32 w = *idx_32; - adj += carry; - adj += w; - carry = (w > adj); - } - adj += carry; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST) { - u32 adj = 0; - u32 carry = 0; - - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - idx_32 = diff; - *(idx_32++) = cm->match_dest_ip->addr[0]; - *(idx_32++) = cm->match_dest_ip->addr[1]; - *(idx_32++) = cm->match_dest_ip->addr[2]; - *(idx_32++) = cm->match_dest_ip->addr[3]; - - idx_16 = (u16 *)idx_32; - *(idx_16++) = cm->match_dest_port; - *(idx_16++) = ~cm->xlate_dest_port; - idx_32 = (u32 *)idx_16; - - *(idx_32++) = ~cm->xlate_dest_ip->addr[0]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[1]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[2]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[3]; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { - u32 w = *idx_32; - adj += carry; - adj += w; - carry = (w > adj); - } - adj += carry; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_csum_adjustment = (u16)adj; - } -} - -/* - * sfe_ipv6_update_summary_stats() - * Update the summary stats. - */ -static void sfe_ipv6_update_summary_stats(struct sfe_ipv6 *si) -{ - int i; - - si->connection_create_requests64 += si->connection_create_requests; - si->connection_create_requests = 0; - si->connection_create_collisions64 += si->connection_create_collisions; - si->connection_create_collisions = 0; - si->connection_destroy_requests64 += si->connection_destroy_requests; - si->connection_destroy_requests = 0; - si->connection_destroy_misses64 += si->connection_destroy_misses; - si->connection_destroy_misses = 0; - si->connection_match_hash_hits64 += si->connection_match_hash_hits; - si->connection_match_hash_hits = 0; - si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; - si->connection_match_hash_reorders = 0; - si->connection_flushes64 += si->connection_flushes; - si->connection_flushes = 0; - si->packets_forwarded64 += si->packets_forwarded; - si->packets_forwarded = 0; - si->packets_not_forwarded64 += si->packets_not_forwarded; - si->packets_not_forwarded = 0; - - for (i = 0; i < SFE_IPV6_EXCEPTION_EVENT_LAST; i++) { - si->exception_events64[i] += si->exception_events[i]; - si->exception_events[i] = 0; - } -} - -/* - * sfe_ipv6_insert_connection_match() - * Insert a connection match into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si, - struct sfe_ipv6_connection_match *cm) -{ - struct sfe_ipv6_connection_match **hash_head; - struct sfe_ipv6_connection_match *prev_head; - unsigned int conn_match_idx - = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - - hash_head = &si->conn_match_hash[conn_match_idx]; - prev_head = *hash_head; - cm->prev = NULL; - if (prev_head) { - prev_head->prev = cm; - } - - cm->next = prev_head; - *hash_head = cm; - -#ifdef CONFIG_NF_FLOW_COOKIE - if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST))) - return; - - /* - * Configure hardware to put a flow cookie in packet of this flow, - * then we can accelerate the lookup process when we received this packet. - */ - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { - sfe_ipv6_flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, - cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) { - entry->match = cm; - cm->flow_cookie = conn_match_idx; - } else { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++; - } - } - rcu_read_unlock(); - - break; - } - } -#endif -} - -/* - * sfe_ipv6_remove_connection_match() - * Remove a connection match object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm) -{ -#ifdef CONFIG_NF_FLOW_COOKIE - if (si->flow_cookie_enable) { - /* - * Tell hardware that we no longer need a flow cookie in packet of this flow - */ - unsigned int conn_match_idx; - - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if (cm == entry->match) { - sfe_ipv6_flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, - cm->match_dest_ip->addr, cm->match_dest_port, 0); - } - rcu_read_unlock(); - - cm->flow_cookie = 0; - entry->match = NULL; - entry->last_clean_time = jiffies; - break; - } - } - } -#endif - - /* - * Unlink the connection match entry from the hash. - */ - if (cm->prev) { - cm->prev->next = cm->next; - } else { - unsigned int conn_match_idx - = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - si->conn_match_hash[conn_match_idx] = cm->next; - } - - if (cm->next) { - cm->next->prev = cm->prev; - } - - /* - * If the connection match entry is in the active list remove it. - */ - if (cm->active) { - if (likely(cm->active_prev)) { - cm->active_prev->active_next = cm->active_next; - } else { - si->active_head = cm->active_next; - } - - if (likely(cm->active_next)) { - cm->active_next->active_prev = cm->active_prev; - } else { - si->active_tail = cm->active_prev; - } - } -} - -/* - * sfe_ipv6_get_connection_hash() - * Generate the hash used in connection lookups. - */ -static inline unsigned int sfe_ipv6_get_connection_hash(u8 protocol, struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - u32 idx, hash = 0; - - for (idx = 0; idx < 4; idx++) { - hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; - } - hash = hash ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv6_find_connection() - * Get the IPv6 connection info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline struct sfe_ipv6_connection *sfe_ipv6_find_connection(struct sfe_ipv6 *si, u32 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - struct sfe_ipv6_connection *c; - unsigned int conn_idx = sfe_ipv6_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); - c = si->conn_hash[conn_idx]; - - /* - * If we don't have anything in this chain then bale. - */ - if (unlikely(!c)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((c->src_port == src_port) - && (c->dest_port == dest_port) - && (sfe_ipv6_addr_equal(c->src_ip, src_ip)) - && (sfe_ipv6_addr_equal(c->dest_ip, dest_ip)) - && (c->protocol == protocol)) { - return c; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain. - */ - do { - c = c->next; - } while (c && (c->src_port != src_port - || c->dest_port != dest_port - || !sfe_ipv6_addr_equal(c->src_ip, src_ip) - || !sfe_ipv6_addr_equal(c->dest_ip, dest_ip) - || c->protocol != protocol)); - - /* - * Will need connection entry for next create/destroy metadata, - * So no need to re-order entry for these requests - */ - return c; -} - -/* - * sfe_ipv6_mark_rule() - * Updates the mark for a current offloaded connection - * - * Will take hash lock upon entry - */ -void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - - spin_lock_bh(&si->lock); - c = sfe_ipv6_find_connection(si, mark->protocol, - mark->src_ip.ip6, mark->src_port, - mark->dest_ip.ip6, mark->dest_port); - if (c) { - WARN_ON((0 != c->mark) && (0 == mark->mark)); - c->mark = mark->mark; - } - spin_unlock_bh(&si->lock); - - if (c) { - DEBUG_TRACE("Matching connection found for mark, " - "setting from %08x to %08x\n", - c->mark, mark->mark); - } -} - -/* - * sfe_ipv6_insert_connection() - * Insert a connection into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv6_insert_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) -{ - struct sfe_ipv6_connection **hash_head; - struct sfe_ipv6_connection *prev_head; - unsigned int conn_idx; - - /* - * Insert entry into the connection hash. - */ - conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - hash_head = &si->conn_hash[conn_idx]; - prev_head = *hash_head; - c->prev = NULL; - if (prev_head) { - prev_head->prev = c; - } - - c->next = prev_head; - *hash_head = c; - - /* - * Insert entry into the "all connections" list. - */ - if (si->all_connections_tail) { - c->all_connections_prev = si->all_connections_tail; - si->all_connections_tail->all_connections_next = c; - } else { - c->all_connections_prev = NULL; - si->all_connections_head = c; - } - - si->all_connections_tail = c; - c->all_connections_next = NULL; - si->num_connections++; - - /* - * Insert the connection match objects too. - */ - sfe_ipv6_insert_connection_match(si, c->original_match); - sfe_ipv6_insert_connection_match(si, c->reply_match); -} - -/* - * sfe_ipv6_remove_connection() - * Remove a sfe_ipv6_connection object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv6_remove_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) -{ - /* - * Remove the connection match objects. - */ - sfe_ipv6_remove_connection_match(si, c->reply_match); - sfe_ipv6_remove_connection_match(si, c->original_match); - - /* - * Unlink the connection. - */ - if (c->prev) { - c->prev->next = c->next; - } else { - unsigned int conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - si->conn_hash[conn_idx] = c->next; - } - - if (c->next) { - c->next->prev = c->prev; - } - - /* - * Unlink connection from all_connections list - */ - if (c->all_connections_prev) { - c->all_connections_prev->all_connections_next = c->all_connections_next; - } else { - si->all_connections_head = c->all_connections_next; - } - - if (c->all_connections_next) { - c->all_connections_next->all_connections_prev = c->all_connections_prev; - } else { - si->all_connections_tail = c->all_connections_prev; - } - - si->num_connections--; -} - -/* - * sfe_ipv6_gen_sync_connection() - * Sync a connection. - * - * On entry to this function we expect that the lock for the connection is either - * already held or isn't required. - */ -static void sfe_ipv6_gen_sync_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c, - struct sfe_connection_sync *sis, sfe_sync_reason_t reason, - u64 now_jiffies) -{ - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - - /* - * Fill in the update message. - */ - sis->is_v6 = 1; - sis->protocol = c->protocol; - sis->src_ip.ip6[0] = c->src_ip[0]; - sis->src_ip_xlate.ip6[0] = c->src_ip_xlate[0]; - sis->dest_ip.ip6[0] = c->dest_ip[0]; - sis->dest_ip_xlate.ip6[0] = c->dest_ip_xlate[0]; - sis->src_port = c->src_port; - sis->src_port_xlate = c->src_port_xlate; - sis->dest_port = c->dest_port; - sis->dest_port_xlate = c->dest_port_xlate; - - original_cm = c->original_match; - reply_cm = c->reply_match; - sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; - sis->src_td_end = original_cm->protocol_state.tcp.end; - sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; - sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; - sis->dest_td_end = reply_cm->protocol_state.tcp.end; - sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; - - sis->src_new_packet_count = original_cm->rx_packet_count; - sis->src_new_byte_count = original_cm->rx_byte_count; - sis->dest_new_packet_count = reply_cm->rx_packet_count; - sis->dest_new_byte_count = reply_cm->rx_byte_count; - - sfe_ipv6_connection_match_update_summary_stats(original_cm); - sfe_ipv6_connection_match_update_summary_stats(reply_cm); - - sis->src_dev = original_cm->match_dev; - sis->src_packet_count = original_cm->rx_packet_count64; - sis->src_byte_count = original_cm->rx_byte_count64; - - sis->dest_dev = reply_cm->match_dev; - sis->dest_packet_count = reply_cm->rx_packet_count64; - sis->dest_byte_count = reply_cm->rx_byte_count64; - - sis->reason = reason; - - /* - * Get the time increment since our last sync. - */ - sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; - c->last_sync_jiffies = now_jiffies; -} - -/* - * sfe_ipv6_flush_connection() - * Flush a connection and free all associated resources. - * - * We need to be called with bottom halves disabled locally as we need to acquire - * the connection hash lock and release it again. In general we're actually called - * from within a BH and so we're fine, but we're also called when connections are - * torn down. - */ -static void sfe_ipv6_flush_connection(struct sfe_ipv6 *si, - struct sfe_ipv6_connection *c, - sfe_sync_reason_t reason) -{ - struct sfe_connection_sync sis; - u64 now_jiffies; - sfe_sync_rule_callback_t sync_rule_callback; - - rcu_read_lock(); - spin_lock_bh(&si->lock); - si->connection_flushes++; - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - spin_unlock_bh(&si->lock); - - if (sync_rule_callback) { - /* - * Generate a sync message and then sync. - */ - now_jiffies = get_jiffies_64(); - sfe_ipv6_gen_sync_connection(si, c, &sis, reason, now_jiffies); - sync_rule_callback(&sis); - } - - rcu_read_unlock(); - - /* - * Release our hold of the source and dest devices and free the memory - * for our connection objects. - */ - dev_put(c->original_dev); - dev_put(c->reply_dev); - kfree(c->original_match); - kfree(c->reply_match); - kfree(c); -} - -/* - * sfe_ipv6_recv_udp() - * Handle UDP packet receives and forwarding. - */ -static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv6_udp_hdr *udph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_udp_hdr) + ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for UDP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the UDP header cached though so allow more time for any prefetching. - */ - src_ip = &iph->saddr; - dest_ip = &iph->daddr; - - udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - src_port = udph->source; - dest_port = udph->dest; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our hop_limit allow forwarding? - */ - if (unlikely(iph->hop_limit < 2)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("hop_limit too low\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely(len > cm->xmit_dev_mtu)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and udph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - sfe_ipv6_change_dsfield(iph, cm->dscp); - } - - /* - * Decrement our hop_limit. - */ - iph->hop_limit -= 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 udp_csum; - - iph->saddr = cm->xlate_src_ip[0]; - udph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum = udp_csum + cm->xlate_src_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 udp_csum; - - iph->daddr = cm->xlate_dest_ip[0]; - udph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum = udp_csum + cm->xlate_dest_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IPV6, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IPV6); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet. - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv6_process_tcp_option_sack() - * Parse TCP SACK option and update ack according - */ -static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const u32 data_offs, - u32 *ack) -{ - u32 length = sizeof(struct sfe_ipv6_tcp_hdr); - u8 *ptr = (u8 *)th + length; - - /* - * Ignore processing if TCP packet has only TIMESTAMP option. - */ - if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) - && likely(ptr[0] == TCPOPT_NOP) - && likely(ptr[1] == TCPOPT_NOP) - && likely(ptr[2] == TCPOPT_TIMESTAMP) - && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { - return true; - } - - /* - * TCP options. Parse SACK option. - */ - while (length < data_offs) { - u8 size; - u8 kind; - - ptr = (u8 *)th + length; - kind = *ptr; - - /* - * NOP, for padding - * Not in the switch because to fast escape and to not calculate size - */ - if (kind == TCPOPT_NOP) { - length++; - continue; - } - - if (kind == TCPOPT_SACK) { - u32 sack = 0; - u8 re = 1 + 1; - - size = *(ptr + 1); - if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) - || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) - || (size > (data_offs - length))) { - return false; - } - - re += 4; - while (re < size) { - u32 sack_re; - u8 *sptr = ptr + re; - sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; - if (sack_re > sack) { - sack = sack_re; - } - re += TCPOLEN_SACK_PERBLOCK; - } - if (sack > *ack) { - *ack = sack; - } - length += size; - continue; - } - if (kind == TCPOPT_EOL) { - return true; - } - size = *(ptr + 1); - if (size < 2) { - return false; - } - length += size; - } - - return true; -} - -/* - * sfe_ipv6_recv_tcp() - * Handle TCP packet receives and forwarding. - */ -static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv6_tcp_hdr *tcph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *counter_cm; - u32 flags; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_tcp_hdr) + ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for TCP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the TCP header cached though so allow more time for any prefetching. - */ - src_ip = &iph->saddr; - dest_ip = &iph->daddr; - - tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - src_port = tcph->source; - dest_port = tcph->dest; - flags = tcp_flag_word(tcph); - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - /* - * We didn't get a connection but as TCP is connection-oriented that - * may be because this is a non-fast connection (not running established). - * For diagnostic purposes we differentiate this here. - */ - if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - fast flags\n"); - return 0; - } - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - slow flags: 0x%x\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our hop_limit allow forwarding? - */ - if (unlikely(iph->hop_limit < 2)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("hop_limit too low\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN - * set is not a fast path packet. - */ - if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP flags: 0x%x are not fast\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - counter_cm = cm->counter_match; - - /* - * Are we doing sequence number checking? - */ - if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { - u32 seq; - u32 ack; - u32 sack; - u32 data_offs; - u32 end; - u32 left_edge; - u32 scaled_win; - u32 max_end; - - /* - * Is our sequence fully past the right hand edge of the window? - */ - seq = ntohl(tcph->seq); - if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u exceeds right edge: %u\n", - seq, cm->protocol_state.tcp.max_end + 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't too short. - */ - data_offs = tcph->doff << 2; - if (unlikely(data_offs < sizeof(struct sfe_ipv6_tcp_hdr))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Update ACK according to any SACK option. - */ - ack = ntohl(tcph->ack_seq); - sack = ack; - if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP option SACK size is wrong\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't past the end of the packet. - */ - data_offs += sizeof(struct sfe_ipv6_ip_hdr); - if (unlikely(len < data_offs)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", - data_offs, len); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - end = seq + len - data_offs; - - /* - * Is our sequence fully before the left hand edge of the window? - */ - if (unlikely((s32)(end - (cm->protocol_state.tcp.end - - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u before left edge: %u\n", - end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Are we acking data that is to the right of what has been sent? - */ - if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u exceeds right edge: %u\n", - sack, counter_cm->protocol_state.tcp.end + 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Is our ack too far before the left hand edge of the window? - */ - left_edge = counter_cm->protocol_state.tcp.end - - cm->protocol_state.tcp.max_win - - SFE_IPV6_TCP_MAX_ACK_WINDOW - - 1; - if (unlikely((s32)(sack - left_edge) < 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Have we just seen the largest window size yet for this connection? If yes - * then we need to record the new value. - */ - scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; - scaled_win += (sack - ack); - if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { - cm->protocol_state.tcp.max_win = scaled_win; - } - - /* - * If our sequence and/or ack numbers have advanced then record the new state. - */ - if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { - cm->protocol_state.tcp.end = end; - } - - max_end = sack + scaled_win; - if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { - counter_cm->protocol_state.tcp.max_end = max_end; - } - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and tcph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - sfe_ipv6_change_dsfield(iph, cm->dscp); - } - - /* - * Decrement our hop_limit. - */ - iph->hop_limit -= 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 tcp_csum; - u32 sum; - - iph->saddr = cm->xlate_src_ip[0]; - tcph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - sum = tcp_csum + cm->xlate_src_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 tcp_csum; - u32 sum; - - iph->daddr = cm->xlate_dest_ip[0]; - tcph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - sum = tcp_csum + cm->xlate_dest_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IPV6, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IPV6); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv6_recv_icmp() - * Handle ICMP packet receives. - * - * ICMP packets aren't handled as a "fast path" and always have us process them - * through the default Linux stack. What we do need to do is look for any errors - * about connections we are handling in the fast path. If we find any such - * connections then we want to flush their state so that the ICMP error path - * within Linux has all of the correct state should it need it. - */ -static int sfe_ipv6_recv_icmp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl) -{ - struct icmp6hdr *icmph; - struct sfe_ipv6_ip_hdr *icmp_iph; - struct sfe_ipv6_udp_hdr *icmp_udph; - struct sfe_ipv6_tcp_hdr *icmp_tcph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection *c; - u8 next_hdr; - - /* - * Is our packet too short to contain a valid ICMP header? - */ - len -= ihl; - if (!pskb_may_pull(skb, ihl + sizeof(struct icmp6hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for ICMP header\n"); - return 0; - } - - /* - * We only handle "destination unreachable" and "time exceeded" messages. - */ - icmph = (struct icmp6hdr *)(skb->data + ihl); - if ((icmph->icmp6_type != ICMPV6_DEST_UNREACH) - && (icmph->icmp6_type != ICMPV6_TIME_EXCEED)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->icmp6_type); - return 0; - } - - /* - * Do we have the full embedded IP header? - * We should have 8 bytes of next L4 header - that's enough to identify - * the connection. - */ - len -= sizeof(struct icmp6hdr); - ihl += sizeof(struct icmp6hdr); - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ip_hdr) + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded IP header not complete\n"); - return 0; - } - - /* - * Is our embedded IP version wrong? - */ - icmp_iph = (struct sfe_ipv6_ip_hdr *)(icmph + 1); - if (unlikely(icmp_iph->version != 6)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", icmp_iph->version); - return 0; - } - - len -= sizeof(struct sfe_ipv6_ip_hdr); - ihl += sizeof(struct sfe_ipv6_ip_hdr); - next_hdr = icmp_iph->nexthdr; - while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { - struct sfe_ipv6_ext_hdr *ext_hdr; - unsigned int ext_hdr_len; - - ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); - if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { - struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; - unsigned int frag_off = ntohs(frag_hdr->frag_off); - - if (frag_off & SFE_IPV6_FRAG_OFFSET) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - } - - ext_hdr_len = ext_hdr->hdr_len; - ext_hdr_len <<= 3; - ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); - len -= ext_hdr_len; - ihl += ext_hdr_len; - /* - * We should have 8 bytes of next header - that's enough to identify - * the connection. - */ - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("extension header %d not completed\n", next_hdr); - return 0; - } - - next_hdr = ext_hdr->next_hdr; - } - - /* - * Handle the embedded transport layer header. - */ - switch (next_hdr) { - case IPPROTO_UDP: - icmp_udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - src_port = icmp_udph->source; - dest_port = icmp_udph->dest; - break; - - case IPPROTO_TCP: - icmp_tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - src_port = icmp_tcph->source; - dest_port = icmp_tcph->dest; - break; - - default: - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", next_hdr); - return 0; - } - - src_ip = &icmp_iph->saddr; - dest_ip = &icmp_iph->daddr; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. Note that we reverse the source and destination - * here because our embedded message contains a packet that was sent in the - * opposite direction to the one in which we just received it. It will have - * been sent on the interface from which we received it though so that's still - * ok to use. - */ - cm = sfe_ipv6_find_connection_match(si, dev, icmp_iph->nexthdr, dest_ip, dest_port, src_ip, src_port); - if (unlikely(!cm)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * We found a connection so now remove it from the connection list and flush - * its state. - */ - c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; -} - -/* - * sfe_ipv6_recv() - * Handle packet receives and forwaring. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) -{ - struct sfe_ipv6 *si = &__si6; - unsigned int len; - unsigned int payload_len; - unsigned int ihl = sizeof(struct sfe_ipv6_ip_hdr); - bool flush_on_find = false; - struct sfe_ipv6_ip_hdr *iph; - u8 next_hdr; - - /* - * Check that we have space for an IP header and an uplayer header here. - */ - len = skb->len; - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short\n", len); - return 0; - } - - /* - * Is our IP version wrong? - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - if (unlikely(iph->version != 6)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_V6]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", iph->version); - return 0; - } - - /* - * Does our datagram fit inside the skb? - */ - payload_len = ntohs(iph->payload_len); - if (unlikely(payload_len > (len - ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("payload_len: %u, exceeds len: %u\n", payload_len, (len - (unsigned int)sizeof(struct sfe_ipv6_ip_hdr))); - return 0; - } - - next_hdr = iph->nexthdr; - while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { - struct sfe_ipv6_ext_hdr *ext_hdr; - unsigned int ext_hdr_len; - - ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); - if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { - struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; - unsigned int frag_off = ntohs(frag_hdr->frag_off); - - if (frag_off & SFE_IPV6_FRAG_OFFSET) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - } - - ext_hdr_len = ext_hdr->hdr_len; - ext_hdr_len <<= 3; - ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); - ihl += ext_hdr_len; - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("extension header %d not completed\n", next_hdr); - return 0; - } - - flush_on_find = true; - next_hdr = ext_hdr->next_hdr; - } - - if (IPPROTO_UDP == next_hdr) { - return sfe_ipv6_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_TCP == next_hdr) { - return sfe_ipv6_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_ICMPV6 == next_hdr) { - return sfe_ipv6_recv_icmp(si, skb, dev, len, iph, ihl); - } - - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", next_hdr); - return 0; -} - -/* - * sfe_ipv6_update_tcp_state() - * update TCP window variables. - */ -static void -sfe_ipv6_update_tcp_state(struct sfe_ipv6_connection *c, - struct sfe_connection_create *sic) -{ - struct sfe_ipv6_connection_match *orig_cm; - struct sfe_ipv6_connection_match *repl_cm; - struct sfe_ipv6_tcp_connection_match *orig_tcp; - struct sfe_ipv6_tcp_connection_match *repl_tcp; - - orig_cm = c->original_match; - repl_cm = c->reply_match; - orig_tcp = &orig_cm->protocol_state.tcp; - repl_tcp = &repl_cm->protocol_state.tcp; - - /* update orig */ - if (orig_tcp->max_win < sic->src_td_max_window) { - orig_tcp->max_win = sic->src_td_max_window; - } - if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { - orig_tcp->end = sic->src_td_end; - } - if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { - orig_tcp->max_end = sic->src_td_max_end; - } - - /* update reply */ - if (repl_tcp->max_win < sic->dest_td_max_window) { - repl_tcp->max_win = sic->dest_td_max_window; - } - if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { - repl_tcp->end = sic->dest_td_end; - } - if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { - repl_tcp->max_end = sic->dest_td_max_end; - } - - /* update match flags */ - orig_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - orig_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } -} - -/* - * sfe_ipv6_update_protocol_state() - * update protocol specified state machine. - */ -static void -sfe_ipv6_update_protocol_state(struct sfe_ipv6_connection *c, - struct sfe_connection_create *sic) -{ - switch (sic->protocol) { - case IPPROTO_TCP: - sfe_ipv6_update_tcp_state(c, sic); - break; - } -} - -/* - * sfe_ipv6_update_rule() - * update forwarding rule after rule is created. - */ -void sfe_ipv6_update_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv6_connection *c; - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - - c = sfe_ipv6_find_connection(si, - sic->protocol, - sic->src_ip.ip6, - sic->src_port, - sic->dest_ip.ip6, - sic->dest_port); - if (c != NULL) { - sfe_ipv6_update_protocol_state(c, sic); - } - - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv6_create_rule() - * Create a forwarding rule. - */ -int sfe_ipv6_create_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - struct net_device *dest_dev; - struct net_device *src_dev; - - dest_dev = sic->dest_dev; - src_dev = sic->src_dev; - - if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || - (src_dev->reg_state != NETREG_REGISTERED))) { - return -EINVAL; - } - - spin_lock_bh(&si->lock); - si->connection_create_requests++; - - /* - * Check to see if there is already a flow that matches the rule we're - * trying to create. If there is then we can't create a new one. - */ - c = sfe_ipv6_find_connection(si, - sic->protocol, - sic->src_ip.ip6, - sic->src_port, - sic->dest_ip.ip6, - sic->dest_port); - if (c != NULL) { - si->connection_create_collisions++; - - /* - * If we already have the flow then it's likely that this - * request to create the connection rule contains more - * up-to-date information. Check and update accordingly. - */ - sfe_ipv6_update_protocol_state(c, sic); - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" - " s: %s:%pxM:%pI6:%u, d: %s:%pxM:%pI6:%u\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_ip.ip6, ntohs(sic->src_port), - sic->dest_dev->name, sic->dest_mac, sic->dest_ip.ip6, ntohs(sic->dest_port)); - return -EADDRINUSE; - } - - /* - * Allocate the various connection tracking objects. - */ - c = (struct sfe_ipv6_connection *)kmalloc(sizeof(struct sfe_ipv6_connection), GFP_ATOMIC); - if (unlikely(!c)) { - spin_unlock_bh(&si->lock); - return -ENOMEM; - } - - original_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); - if (unlikely(!original_cm)) { - spin_unlock_bh(&si->lock); - kfree(c); - return -ENOMEM; - } - - reply_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); - if (unlikely(!reply_cm)) { - spin_unlock_bh(&si->lock); - kfree(original_cm); - kfree(c); - return -ENOMEM; - } - - /* - * Fill in the "original" direction connection matching object. - * Note that the transmit MAC address is "dest_mac_xlate" because - * we always know both ends of a connection by their translated - * addresses and not their public addresses. - */ - original_cm->match_dev = src_dev; - original_cm->match_protocol = sic->protocol; - original_cm->match_src_ip[0] = sic->src_ip.ip6[0]; - original_cm->match_src_port = sic->src_port; - original_cm->match_dest_ip[0] = sic->dest_ip.ip6[0]; - original_cm->match_dest_port = sic->dest_port; - original_cm->xlate_src_ip[0] = sic->src_ip_xlate.ip6[0]; - original_cm->xlate_src_port = sic->src_port_xlate; - original_cm->xlate_dest_ip[0] = sic->dest_ip_xlate.ip6[0]; - original_cm->xlate_dest_port = sic->dest_port_xlate; - original_cm->rx_packet_count = 0; - original_cm->rx_packet_count64 = 0; - original_cm->rx_byte_count = 0; - original_cm->rx_byte_count64 = 0; - original_cm->xmit_dev = dest_dev; - original_cm->xmit_dev_mtu = sic->dest_mtu; - memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); - memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); - original_cm->connection = c; - original_cm->counter_match = reply_cm; - original_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - original_cm->priority = sic->src_priority; - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT; - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - original_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - original_cm->flow_accel = sic->original_accel; -#endif - original_cm->active_next = NULL; - original_cm->active_prev = NULL; - original_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(dest_dev->flags & IFF_POINTOPOINT)) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (dest_dev->header_ops) { - if (dest_dev->header_ops->create == eth_header) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - /* - * Fill in the "reply" direction connection matching object. - */ - reply_cm->match_dev = dest_dev; - reply_cm->match_protocol = sic->protocol; - reply_cm->match_src_ip[0] = sic->dest_ip_xlate.ip6[0]; - reply_cm->match_src_port = sic->dest_port_xlate; - reply_cm->match_dest_ip[0] = sic->src_ip_xlate.ip6[0]; - reply_cm->match_dest_port = sic->src_port_xlate; - reply_cm->xlate_src_ip[0] = sic->dest_ip.ip6[0]; - reply_cm->xlate_src_port = sic->dest_port; - reply_cm->xlate_dest_ip[0] = sic->src_ip.ip6[0]; - reply_cm->xlate_dest_port = sic->src_port; - reply_cm->rx_packet_count = 0; - reply_cm->rx_packet_count64 = 0; - reply_cm->rx_byte_count = 0; - reply_cm->rx_byte_count64 = 0; - reply_cm->xmit_dev = src_dev; - reply_cm->xmit_dev_mtu = sic->src_mtu; - memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); - memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); - reply_cm->connection = c; - reply_cm->counter_match = original_cm; - reply_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - reply_cm->priority = sic->dest_priority; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - reply_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - reply_cm->flow_accel = sic->reply_accel; -#endif - reply_cm->active_next = NULL; - reply_cm->active_prev = NULL; - reply_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(src_dev->flags & IFF_POINTOPOINT)) { - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (src_dev->header_ops) { - if (src_dev->header_ops->create == eth_header) { - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - - if (!sfe_ipv6_addr_equal(sic->dest_ip.ip6, sic->dest_ip_xlate.ip6) || sic->dest_port != sic->dest_port_xlate) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; - } - - if (!sfe_ipv6_addr_equal(sic->src_ip.ip6, sic->src_ip_xlate.ip6) || sic->src_port != sic->src_port_xlate) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; - } - - c->protocol = sic->protocol; - c->src_ip[0] = sic->src_ip.ip6[0]; - c->src_ip_xlate[0] = sic->src_ip_xlate.ip6[0]; - c->src_port = sic->src_port; - c->src_port_xlate = sic->src_port_xlate; - c->original_dev = src_dev; - c->original_match = original_cm; - c->dest_ip[0] = sic->dest_ip.ip6[0]; - c->dest_ip_xlate[0] = sic->dest_ip_xlate.ip6[0]; - c->dest_port = sic->dest_port; - c->dest_port_xlate = sic->dest_port_xlate; - c->reply_dev = dest_dev; - c->reply_match = reply_cm; - c->mark = sic->mark; - c->debug_read_seq = 0; - c->last_sync_jiffies = get_jiffies_64(); - - /* - * Take hold of our source and dest devices for the duration of the connection. - */ - dev_hold(c->original_dev); - dev_hold(c->reply_dev); - - /* - * Initialize the protocol-specific information that we track. - */ - switch (sic->protocol) { - case IPPROTO_TCP: - original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; - original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; - original_cm->protocol_state.tcp.end = sic->src_td_end; - original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; - reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; - reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; - reply_cm->protocol_state.tcp.end = sic->dest_td_end; - reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } - break; - } - - sfe_ipv6_connection_match_compute_translations(original_cm); - sfe_ipv6_connection_match_compute_translations(reply_cm); - sfe_ipv6_insert_connection(si, c); - - spin_unlock_bh(&si->lock); - - /* - * We have everything we need! - */ - DEBUG_INFO("new connection - mark: %08x, p: %d\n" - " s: %s:%pxM(%pxM):%pI6(%pI6):%u(%u)\n" - " d: %s:%pxM(%pxM):%pI6(%pI6):%u(%u)\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, - sic->src_ip.ip6, sic->src_ip_xlate.ip6, ntohs(sic->src_port), ntohs(sic->src_port_xlate), - dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, - sic->dest_ip.ip6, sic->dest_ip_xlate.ip6, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); - - return 0; -} - -/* - * sfe_ipv6_destroy_rule() - * Destroy a forwarding rule. - */ -void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - - spin_lock_bh(&si->lock); - si->connection_destroy_requests++; - - /* - * Check to see if we have a flow that matches the rule we're trying - * to destroy. If there isn't then we can't destroy it. - */ - c = sfe_ipv6_find_connection(si, sid->protocol, sid->src_ip.ip6, sid->src_port, - sid->dest_ip.ip6, sid->dest_port); - if (!c) { - si->connection_destroy_misses++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection does not exist - p: %d, s: %pI6:%u, d: %pI6:%u\n", - sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), - sid->dest_ip.ip6, ntohs(sid->dest_port)); - return; - } - - /* - * Remove our connection details from the hash tables. - */ - sfe_ipv6_remove_connection(si, c); - spin_unlock_bh(&si->lock); - - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); - - DEBUG_INFO("connection destroyed - p: %d, s: %pI6:%u, d: %pI6:%u\n", - sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), - sid->dest_ip.ip6, ntohs(sid->dest_port)); -} - -/* - * sfe_ipv6_register_sync_rule_callback() - * Register a callback for rule synchronization. - */ -void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) -{ - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv6_get_debug_dev() - */ -static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv6 *si = &__si6; - ssize_t count; - int num; - - spin_lock_bh(&si->lock); - num = si->debug_dev; - spin_unlock_bh(&si->lock); - - count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); - return count; -} - -/* - * sfe_ipv6_destroy_all_rules_for_dev() - * Destroy all connections that match a particular device. - * - * If we pass dev as NULL then this destroys all connections. - */ -void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - -another_round: - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - /* - * Does this connection relate to the device we are destroying? - */ - if (!dev - || (dev == c->original_dev) - || (dev == c->reply_dev)) { - break; - } - } - - if (c) { - sfe_ipv6_remove_connection(si, c); - } - - spin_unlock_bh(&si->lock); - - if (c) { - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); - goto another_round; - } -} - -/* - * sfe_ipv6_periodic_sync() - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) -static void sfe_ipv6_periodic_sync(unsigned long arg) -#else -static void sfe_ipv6_periodic_sync(struct timer_list *tl) -#endif -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg; -#else - struct sfe_ipv6 *si = from_timer(si, tl, timer); -#endif - u64 now_jiffies; - int quota; - sfe_sync_rule_callback_t sync_rule_callback; - - now_jiffies = get_jiffies_64(); - - rcu_read_lock(); - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - if (!sync_rule_callback) { - rcu_read_unlock(); - goto done; - } - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - /* - * Get an estimate of the number of connections to parse in this sync. - */ - quota = (si->num_connections + 63) / 64; - - /* - * Walk the "active" list and sync the connection state. - */ - while (quota--) { - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *counter_cm; - struct sfe_ipv6_connection *c; - struct sfe_connection_sync sis; - - cm = si->active_head; - if (!cm) { - break; - } - - /* - * There's a possibility that our counter match is in the active list too. - * If it is then remove it. - */ - counter_cm = cm->counter_match; - if (counter_cm->active) { - counter_cm->active = false; - - /* - * We must have a connection preceding this counter match - * because that's the one that got us to this point, so we don't have - * to worry about removing the head of the list. - */ - counter_cm->active_prev->active_next = counter_cm->active_next; - - if (likely(counter_cm->active_next)) { - counter_cm->active_next->active_prev = counter_cm->active_prev; - } else { - si->active_tail = counter_cm->active_prev; - } - - counter_cm->active_next = NULL; - counter_cm->active_prev = NULL; - } - - /* - * Now remove the head of the active scan list. - */ - cm->active = false; - si->active_head = cm->active_next; - if (likely(cm->active_next)) { - cm->active_next->active_prev = NULL; - } else { - si->active_tail = NULL; - } - cm->active_next = NULL; - - /* - * Sync the connection state. - */ - c = cm->connection; - sfe_ipv6_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); - - /* - * We don't want to be holding the lock when we sync! - */ - spin_unlock_bh(&si->lock); - sync_rule_callback(&sis); - spin_lock_bh(&si->lock); - } - - spin_unlock_bh(&si->lock); - rcu_read_unlock(); - -done: - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); -} - -/* - * sfe_ipv6_debug_dev_read_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - si->debug_read_seq++; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_connection() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - struct sfe_ipv6_connection *c; - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - int bytes_read; - int protocol; - struct net_device *src_dev; - struct sfe_ipv6_addr src_ip; - struct sfe_ipv6_addr src_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - u64 src_rx_packets; - u64 src_rx_bytes; - struct net_device *dest_dev; - struct sfe_ipv6_addr dest_ip; - struct sfe_ipv6_addr dest_ip_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u64 dest_rx_packets; - u64 dest_rx_bytes; - u64 last_sync_jiffies; - u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; -#ifdef CONFIG_NF_FLOW_COOKIE - int src_flow_cookie, dst_flow_cookie; -#endif - - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - if (c->debug_read_seq < si->debug_read_seq) { - c->debug_read_seq = si->debug_read_seq; - break; - } - } - - /* - * If there were no connections then move to the next state. - */ - if (!c) { - spin_unlock_bh(&si->lock); - ws->state++; - return true; - } - - original_cm = c->original_match; - reply_cm = c->reply_match; - - protocol = c->protocol; - src_dev = c->original_dev; - src_ip = c->src_ip[0]; - src_ip_xlate = c->src_ip_xlate[0]; - src_port = c->src_port; - src_port_xlate = c->src_port_xlate; - src_priority = original_cm->priority; - src_dscp = original_cm->dscp >> SFE_IPV6_DSCP_SHIFT; - - sfe_ipv6_connection_match_update_summary_stats(original_cm); - sfe_ipv6_connection_match_update_summary_stats(reply_cm); - - src_rx_packets = original_cm->rx_packet_count64; - src_rx_bytes = original_cm->rx_byte_count64; - dest_dev = c->reply_dev; - dest_ip = c->dest_ip[0]; - dest_ip_xlate = c->dest_ip_xlate[0]; - dest_port = c->dest_port; - dest_port_xlate = c->dest_port_xlate; - dest_priority = reply_cm->priority; - dest_dscp = reply_cm->dscp >> SFE_IPV6_DSCP_SHIFT; - dest_rx_packets = reply_cm->rx_packet_count64; - dest_rx_bytes = reply_cm->rx_byte_count64; - last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; - mark = c->mark; -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie = original_cm->flow_cookie; - dst_flow_cookie = reply_cm->flow_cookie; -#endif - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", - protocol, - src_dev->name, - &src_ip, &src_ip_xlate, - ntohs(src_port), ntohs(src_port_xlate), - src_priority, src_dscp, - src_rx_packets, src_rx_bytes, - dest_dev->name, - &dest_ip, &dest_ip_xlate, - ntohs(dest_port), ntohs(dest_port_xlate), - dest_priority, dest_dscp, - dest_rx_packets, dest_rx_bytes, -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie, dst_flow_cookie, -#endif - last_sync_jiffies, mark); - - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_exception() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_exception(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - u64 ct; - - spin_lock_bh(&si->lock); - ct = si->exception_events64[ws->iter_exception]; - spin_unlock_bh(&si->lock); - - if (ct) { - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, - "\t\t\n", - sfe_ipv6_exception_events_string[ws->iter_exception], - ct); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - } - - ws->iter_exception++; - if (ws->iter_exception >= SFE_IPV6_EXCEPTION_EVENT_LAST) { - ws->iter_exception = 0; - ws->state++; - } - - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_stats() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_stats(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - unsigned int num_connections; - u64 packets_forwarded; - u64 packets_not_forwarded; - u64 connection_create_requests; - u64 connection_create_collisions; - u64 connection_destroy_requests; - u64 connection_destroy_misses; - u64 connection_flushes; - u64 connection_match_hash_hits; - u64 connection_match_hash_reorders; - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - num_connections = si->num_connections; - packets_forwarded = si->packets_forwarded64; - packets_not_forwarded = si->packets_not_forwarded64; - connection_create_requests = si->connection_create_requests64; - connection_create_collisions = si->connection_create_collisions64; - connection_destroy_requests = si->connection_destroy_requests64; - connection_destroy_misses = si->connection_destroy_misses64; - connection_flushes = si->connection_flushes64; - connection_match_hash_hits = si->connection_match_hash_hits64; - connection_match_hash_reorders = si->connection_match_hash_reorders64; - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", - num_connections, - packets_forwarded, - packets_not_forwarded, - connection_create_requests, - connection_create_collisions, - connection_destroy_requests, - connection_destroy_misses, - connection_flushes, - connection_match_hash_hits, - connection_match_hash_reorders); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * Array of write functions that write various XML elements that correspond to - * our XML output state machine. - */ -static sfe_ipv6_debug_xml_write_method_t sfe_ipv6_debug_xml_write_methods[SFE_IPV6_DEBUG_XML_STATE_DONE] = { - sfe_ipv6_debug_dev_read_start, - sfe_ipv6_debug_dev_read_connections_start, - sfe_ipv6_debug_dev_read_connections_connection, - sfe_ipv6_debug_dev_read_connections_end, - sfe_ipv6_debug_dev_read_exceptions_start, - sfe_ipv6_debug_dev_read_exceptions_exception, - sfe_ipv6_debug_dev_read_exceptions_end, - sfe_ipv6_debug_dev_read_stats, - sfe_ipv6_debug_dev_read_end, -}; - -/* - * sfe_ipv6_debug_dev_read() - * Send info to userspace upon read request from user - */ -static ssize_t sfe_ipv6_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) -{ - char msg[CHAR_DEV_MSG_SIZE]; - int total_read = 0; - struct sfe_ipv6_debug_xml_write_state *ws; - struct sfe_ipv6 *si = &__si6; - - ws = (struct sfe_ipv6_debug_xml_write_state *)filp->private_data; - while ((ws->state != SFE_IPV6_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { - if ((sfe_ipv6_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { - continue; - } - } - - return total_read; -} - -/* - * sfe_ipv6_debug_dev_write() - * Write to char device resets some stats - */ -static ssize_t sfe_ipv6_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) -{ - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - si->packets_forwarded64 = 0; - si->packets_not_forwarded64 = 0; - si->connection_create_requests64 = 0; - si->connection_create_collisions64 = 0; - si->connection_destroy_requests64 = 0; - si->connection_destroy_misses64 = 0; - si->connection_flushes64 = 0; - si->connection_match_hash_hits64 = 0; - si->connection_match_hash_reorders64 = 0; - spin_unlock_bh(&si->lock); - - return length; -} - -/* - * sfe_ipv6_debug_dev_open() - */ -static int sfe_ipv6_debug_dev_open(struct inode *inode, struct file *file) -{ - struct sfe_ipv6_debug_xml_write_state *ws; - - ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; - if (ws) { - return 0; - } - - ws = kzalloc(sizeof(struct sfe_ipv6_debug_xml_write_state), GFP_KERNEL); - if (!ws) { - return -ENOMEM; - } - - ws->state = SFE_IPV6_DEBUG_XML_STATE_START; - file->private_data = ws; - - return 0; -} - -/* - * sfe_ipv6_debug_dev_release() - */ -static int sfe_ipv6_debug_dev_release(struct inode *inode, struct file *file) -{ - struct sfe_ipv6_debug_xml_write_state *ws; - - ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; - if (ws) { - /* - * We've finished with our output so free the write state. - */ - kfree(ws); - } - - return 0; -} - -/* - * File operations used in the debug char device - */ -static struct file_operations sfe_ipv6_debug_dev_fops = { - .read = sfe_ipv6_debug_dev_read, - .write = sfe_ipv6_debug_dev_write, - .open = sfe_ipv6_debug_dev_open, - .release = sfe_ipv6_debug_dev_release -}; - -#ifdef CONFIG_NF_FLOW_COOKIE -/* - * sfe_ipv6_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) -{ - struct sfe_ipv6 *si = &__si6; - - BUG_ON(!cb); - - if (si->flow_cookie_set_func) { - return -1; - } - - rcu_assign_pointer(si->flow_cookie_set_func, cb); - return 0; -} - -/* - * sfe_ipv6_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) -{ - struct sfe_ipv6 *si = &__si6; - - RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); - return 0; -} - -/* - * sfe_ipv6_get_flow_cookie() - */ -static ssize_t sfe_ipv6_get_flow_cookie(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv6 *si = &__si6; - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); -} - -/* - * sfe_ipv6_set_flow_cookie() - */ -static ssize_t sfe_ipv6_set_flow_cookie(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct sfe_ipv6 *si = &__si6; - strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); - - return size; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv6_flow_cookie_attr = - __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv6_get_flow_cookie, sfe_ipv6_set_flow_cookie); -#endif /*CONFIG_NF_FLOW_COOKIE*/ - -/* - * sfe_ipv6_init() - */ -static int __init sfe_ipv6_init(void) -{ - struct sfe_ipv6 *si = &__si6; - int result = -1; - - DEBUG_INFO("SFE IPv6 init\n"); - - /* - * Create sys/sfe_ipv6 - */ - si->sys_sfe_ipv6 = kobject_create_and_add("sfe_ipv6", NULL); - if (!si->sys_sfe_ipv6) { - DEBUG_ERROR("failed to register sfe_ipv6\n"); - goto exit1; - } - - /* - * Create files, one for each parameter supported by this module. - */ - result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev file: %d\n", result); - goto exit2; - } - -#ifdef CONFIG_NF_FLOW_COOKIE - result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); - if (result) { - DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); - goto exit3; - } -#endif /* CONFIG_NF_FLOW_COOKIE */ - - /* - * Register our debug char device. - */ - result = register_chrdev(0, "sfe_ipv6", &sfe_ipv6_debug_dev_fops); - if (result < 0) { - DEBUG_ERROR("Failed to register chrdev: %d\n", result); - goto exit4; - } - - si->debug_dev = result; - - /* - * Create a timer to handle periodic statistics. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - setup_timer(&si->timer, sfe_ipv6_periodic_sync, (unsigned long)si); -#else - timer_setup(&si->timer, sfe_ipv6_periodic_sync, 0); -#endif - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); - - spin_lock_init(&si->lock); - - return 0; - -exit4: -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); - -exit3: -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - -exit2: - kobject_put(si->sys_sfe_ipv6); - -exit1: - return result; -} - -/* - * sfe_ipv6_exit() - */ -static void __exit sfe_ipv6_exit(void) -{ - struct sfe_ipv6 *si = &__si6; - - DEBUG_INFO("SFE IPv6 exit\n"); - - /* - * Destroy all connections. - */ - sfe_ipv6_destroy_all_rules_for_dev(NULL); - - del_timer_sync(&si->timer); - - unregister_chrdev(si->debug_dev, "sfe_ipv6"); - -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - - kobject_put(si->sys_sfe_ipv6); -} - -module_init(sfe_ipv6_init) -module_exit(sfe_ipv6_exit) - -EXPORT_SYMBOL(sfe_ipv6_recv); -EXPORT_SYMBOL(sfe_ipv6_create_rule); -EXPORT_SYMBOL(sfe_ipv6_destroy_rule); -EXPORT_SYMBOL(sfe_ipv6_destroy_all_rules_for_dev); -EXPORT_SYMBOL(sfe_ipv6_register_sync_rule_callback); -EXPORT_SYMBOL(sfe_ipv6_mark_rule); -EXPORT_SYMBOL(sfe_ipv6_update_rule); -#ifdef CONFIG_NF_FLOW_COOKIE -EXPORT_SYMBOL(sfe_ipv6_register_flow_cookie_cb); -EXPORT_SYMBOL(sfe_ipv6_unregister_flow_cookie_cb); -#endif - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv6 support"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/shortcut-fe/Kconfig b/shortcut-fe/shortcut-fe/Kconfig deleted file mode 100644 index 487f1e065..000000000 --- a/shortcut-fe/shortcut-fe/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# -# Shortcut forwarding engine -# - -config SHORTCUT_FE - tristate "Shortcut Forwarding Engine" - depends on NF_CONNTRACK - ---help--- - Shortcut is a fast in-kernel packet forwarding engine. - - To compile this code as a module, choose M here: the module will be - called shortcut-fe. - - If unsure, say N. diff --git a/shortcut-fe/shortcut-fe/Makefile b/shortcut-fe/shortcut-fe/Makefile index dd53042e5..598e9d4a2 100644 --- a/shortcut-fe/shortcut-fe/Makefile +++ b/shortcut-fe/shortcut-fe/Makefile @@ -79,8 +79,12 @@ endef ifneq ($(CONFIG_PACKAGE_kmod-shortcut-fe)$(CONFIG_PACKAGE_kmod-shortcut-fe-cm),) define Build/InstallDev + $(INSTALL_DIR) $(1)/usr/include $(INSTALL_DIR) $(1)/usr/include/shortcut-fe $(CP) -rf $(PKG_BUILD_DIR)/sfe.h $(1)/usr/include/shortcut-fe + $(CP) -rf $(PKG_BUILD_DIR)/sfe.h $(1)/usr/include + $(CP) -rf $(PKG_BUILD_DIR)/sfe_cm.h $(1)/usr/include + $(CP) -rf $(PKG_BUILD_DIR)/sfe_backport.h $(1)/usr/include endef endif diff --git a/shortcut-fe/shortcut-fe/sfe.h b/shortcut-fe/shortcut-fe/sfe.h deleted file mode 100644 index 279e7b3dc..000000000 --- a/shortcut-fe/shortcut-fe/sfe.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * sfe.h - * Shortcut forwarding engine. - * - * Copyright (c) 2013-2017 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - - -/* - * The following are debug macros used throughout the SFE. - * - * The DEBUG_LEVEL enables the followings based on its value, - * when dynamic debug option is disabled. - * - * 0 = OFF - * 1 = ASSERTS / ERRORS - * 2 = 1 + WARN - * 3 = 2 + INFO - * 4 = 3 + TRACE - */ -#define DEBUG_LEVEL 2 - -#if (DEBUG_LEVEL < 1) -#define DEBUG_ASSERT(s, ...) -#define DEBUG_ERROR(s, ...) -#else -#define DEBUG_ASSERT(c, s, ...) if (!(c)) { pr_emerg("ASSERT: %s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__); BUG(); } -#define DEBUG_ERROR(s, ...) pr_err("%s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if defined(CONFIG_DYNAMIC_DEBUG) -/* - * Compile messages for dynamic enable/disable - */ -#define DEBUG_WARN(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#define DEBUG_INFO(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#define DEBUG_TRACE(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#else - -/* - * Statically compile messages at different levels - */ -#if (DEBUG_LEVEL < 2) -#define DEBUG_WARN(s, ...) -#else -#define DEBUG_WARN(s, ...) pr_warn("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if (DEBUG_LEVEL < 3) -#define DEBUG_INFO(s, ...) -#else -#define DEBUG_INFO(s, ...) pr_notice("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif - -#if (DEBUG_LEVEL < 4) -#define DEBUG_TRACE(s, ...) -#else -#define DEBUG_TRACE(s, ...) pr_info("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif -#endif - -#ifdef CONFIG_NF_FLOW_COOKIE -typedef int (*flow_cookie_set_func_t)(u32 protocol, __be32 src_ip, __be16 src_port, - __be32 dst_ip, __be16 dst_port, u16 flow_cookie); -/* - * sfe_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb); - -/* - * sfe_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb); - -typedef int (*sfe_ipv6_flow_cookie_set_func_t)(u32 protocol, __be32 src_ip[4], __be16 src_port, - __be32 dst_ip[4], __be16 dst_port, u16 flow_cookie); - -/* - * sfe_ipv6_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); - -/* - * sfe_ipv6_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); - -#endif /*CONFIG_NF_FLOW_COOKIE*/ diff --git a/shortcut-fe/shortcut-fe/sfe_backport.h b/shortcut-fe/shortcut-fe/sfe_backport.h deleted file mode 100644 index d2d60c73c..000000000 --- a/shortcut-fe/shortcut-fe/sfe_backport.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * sfe_backport.h - * Shortcut forwarding engine compatible header file. - * - * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) -#include -#else -enum udp_conntrack { - UDP_CT_UNREPLIED, - UDP_CT_REPLIED, - UDP_CT_MAX -}; - -static inline unsigned int * -nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) -{ -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - struct nf_conn_timeout *timeout_ext; - unsigned int *timeouts; - - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); - - return timeouts; -#else - return l4proto->get_timeouts(net); -#endif /*CONFIG_NF_CONNTRACK_TIMEOUT*/ -} -#endif /*KERNEL_VERSION(3, 7, 0)*/ -#endif /*KERNEL_VERSION(3, 4, 0)*/ - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(void *priv, \ - struct sk_buff *SKB, \ - const struct nf_hook_state *state) -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(const struct nf_hook_ops *OPS, \ - struct sk_buff *SKB, \ - const struct net_device *UNUSED, \ - const struct net_device *OUT, \ - int (*OKFN)(struct sk_buff *)) -#else -#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ -static unsigned int FN_NAME(unsigned int HOOKNUM, \ - struct sk_buff *SKB, \ - const struct net_device *UNUSED, \ - const struct net_device *OUT, \ - int (*OKFN)(struct sk_buff *)) -#endif - -#define sfe_cm_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__sfe_cm_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define sfe_cm_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__sfe_cm_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define fast_classifier_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__fast_classifier_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) -#define fast_classifier_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ - sfe_define_post_routing_hook(__fast_classifier_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .pf = NFPROTO_IPV4, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#else -#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .owner = THIS_MODULE, \ - .pf = NFPROTO_IPV4, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .pf = NFPROTO_IPV6, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP_PRI_NAT_SRC + 1, \ - } -#else -#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ - { \ - .hook = fn, \ - .owner = THIS_MODULE, \ - .pf = NFPROTO_IPV6, \ - .hooknum = NF_INET_POST_ROUTING, \ - .priority = NF_IP6_PRI_NAT_SRC + 1, \ - } -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)) -#define SFE_NF_CT_DEFAULT_ZONE (&nf_ct_zone_dflt) -#else -#define SFE_NF_CT_DEFAULT_ZONE NF_CT_DEFAULT_ZONE -#endif - -/* - * sfe_dev_get_master - * get master of bridge port, and hold it - */ -static inline struct net_device *sfe_dev_get_master(struct net_device *dev) -{ - struct net_device *master; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) - rcu_read_lock(); - master = netdev_master_upper_dev_get_rcu(dev); - if (master) - dev_hold(master); - - rcu_read_unlock(); -#else - master = dev->master; - if (master) - dev_hold(master); -#endif - return master; -} - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) -#define SFE_DEV_EVENT_PTR(PTR) netdev_notifier_info_to_dev(PTR) -#else -#define SFE_DEV_EVENT_PTR(PTR) (struct net_device *)(PTR) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define SFE_NF_CONN_ACCT(NM) struct nf_conn_acct *NM -#else -#define SFE_NF_CONN_ACCT(NM) struct nf_conn_counter *NM -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#define SFE_ACCT_COUNTER(NM) ((NM)->counter) -#else -#define SFE_ACCT_COUNTER(NM) (NM) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) -#define sfe_hash_for_each_possible(name, obj, node, member, key) \ - hash_for_each_possible(name, obj, member, key) -#else -#define sfe_hash_for_each_possible(name, obj, node, member, key) \ - hash_for_each_possible(name, obj, node, member, key) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) -#define sfe_hash_for_each(name, bkt, node, obj, member) \ - hash_for_each(name, bkt, obj, member) -#else -#define sfe_hash_for_each(name, bkt, node, obj, member) \ - hash_for_each(name, bkt, node, obj, member) -#endif - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) -#define sfe_dst_get_neighbour(dst, daddr) dst_neigh_lookup(dst, addr) -#else -static inline struct neighbour * -sfe_dst_get_neighbour(struct dst_entry *dst, void *daddr) -{ - struct neighbour *neigh = dst_get_neighbour_noref(dst); - - if (neigh) - neigh_hold(neigh); - - return neigh; -} -#endif diff --git a/shortcut-fe/shortcut-fe/sfe_cm.c b/shortcut-fe/shortcut-fe/sfe_cm.c deleted file mode 100644 index bd1bb88aa..000000000 --- a/shortcut-fe/shortcut-fe/sfe_cm.c +++ /dev/null @@ -1,1154 +0,0 @@ -/* - * sfe-cm.c - * Shortcut forwarding engine connection manager. - * - * Copyright (c) 2013-2018, 2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" -#include "sfe_backport.h" - -typedef enum sfe_cm_exception { - SFE_CM_EXCEPTION_PACKET_BROADCAST, - SFE_CM_EXCEPTION_PACKET_MULTICAST, - SFE_CM_EXCEPTION_NO_IIF, - SFE_CM_EXCEPTION_NO_CT, - SFE_CM_EXCEPTION_CT_NO_TRACK, - SFE_CM_EXCEPTION_CT_NO_CONFIRM, - SFE_CM_EXCEPTION_CT_IS_ALG, - SFE_CM_EXCEPTION_IS_IPV4_MCAST, - SFE_CM_EXCEPTION_IS_IPV6_MCAST, - SFE_CM_EXCEPTION_TCP_NOT_ASSURED, - SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED, - SFE_CM_EXCEPTION_UNKNOW_PROTOCOL, - SFE_CM_EXCEPTION_NO_SRC_DEV, - SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV, - SFE_CM_EXCEPTION_NO_DEST_DEV, - SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV, - SFE_CM_EXCEPTION_NO_BRIDGE, - SFE_CM_EXCEPTION_LOCAL_OUT, - SFE_CM_EXCEPTION_MAX -} sfe_cm_exception_t; - -static char *sfe_cm_exception_events_string[SFE_CM_EXCEPTION_MAX] = { - "PACKET_BROADCAST", - "PACKET_MULTICAST", - "NO_IIF", - "NO_CT", - "CT_NO_TRACK", - "CT_NO_CONFIRM", - "CT_IS_ALG", - "IS_IPV4_MCAST", - "IS_IPV6_MCAST", - "TCP_NOT_ASSURED", - "TCP_NOT_ESTABLISHED", - "UNKNOW_PROTOCOL", - "NO_SRC_DEV", - "NO_SRC_XLATE_DEV", - "NO_DEST_DEV", - "NO_DEST_XLATE_DEV", - "NO_BRIDGE", - "LOCAL_OUT" -}; - -/* - * Per-module structure. - */ -struct sfe_cm { - spinlock_t lock; /* Lock for SMP correctness */ - - /* - * Control state. - */ - struct kobject *sys_sfe_cm; /* sysfs linkage */ - - /* - * Callback notifiers. - */ - struct notifier_block dev_notifier; /* Device notifier */ - struct notifier_block inet_notifier; /* IPv4 notifier */ - struct notifier_block inet6_notifier; /* IPv6 notifier */ - u32 exceptions[SFE_CM_EXCEPTION_MAX]; -}; - -static struct sfe_cm __sc; - -/* - * sfe_cm_incr_exceptions() - * increase an exception counter. - */ -static inline void sfe_cm_incr_exceptions(sfe_cm_exception_t except) -{ - struct sfe_cm *sc = &__sc; - - spin_lock_bh(&sc->lock); - sc->exceptions[except]++; - spin_unlock_bh(&sc->lock); -} - -/* - * sfe_cm_recv() - * Handle packet receives. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_cm_recv(struct sk_buff *skb) -{ - struct net_device *dev; - - /* - * We know that for the vast majority of packets we need the transport - * layer header so we may as well start to fetch it now! - */ - prefetch(skb->data + 32); - barrier(); - - dev = skb->dev; - - /* - * We're only interested in IPv4 and IPv6 packets. - */ - if (likely(htons(ETH_P_IP) == skb->protocol)) { - struct in_device *in_dev; - - /* - * Does our input device support IP processing? - */ - in_dev = (struct in_device *)dev->ip_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IP processing for device: %s\n", dev->name); - return 0; - } - - /* - * Does it have an IP address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(!in_dev->ifa_list)) { - DEBUG_TRACE("no IP address for device: %s\n", dev->name); - return 0; - } - - return sfe_ipv4_recv(dev, skb); - } - - if (likely(htons(ETH_P_IPV6) == skb->protocol)) { - struct inet6_dev *in_dev; - - /* - * Does our input device support IPv6 processing? - */ - in_dev = (struct inet6_dev *)dev->ip6_ptr; - if (unlikely(!in_dev)) { - DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); - return 0; - } - - /* - * Does it have an IPv6 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(list_empty(&in_dev->addr_list))) { - DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); - return 0; - } - - return sfe_ipv6_recv(dev, skb); - } - - DEBUG_TRACE("not IP packet\n"); - return 0; -} - -/* - * sfe_cm_find_dev_and_mac_addr() - * Find the device and MAC address for a given IPv4/IPv6 address. - * - * Returns true if we find the device and MAC address, otherwise false. - * - * We look up the rtable entry for the address and, from its neighbour - * structure, obtain the hardware address. This means this function also - * works if the neighbours are routers too. - */ -static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, int is_v4) -{ - struct neighbour *neigh; - struct rtable *rt; - struct rt6_info *rt6; - struct dst_entry *dst; - struct net_device *mac_dev; - - /* - * Look up the rtable entry for the IP address then get the hardware - * address from its neighbour structure. This means this work when the - * neighbours are routers too. - */ - if (likely(is_v4)) { - rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); - if (unlikely(IS_ERR(rt))) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt; - } else { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)) - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); -#else - rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0); -#endif - if (!rt6) { - goto ret_fail; - } - - dst = (struct dst_entry *)rt6; - } - - rcu_read_lock(); - neigh = sfe_dst_get_neighbour(dst, addr); - if (unlikely(!neigh)) { - rcu_read_unlock(); - dst_release(dst); - goto ret_fail; - } - - if (unlikely(!(neigh->nud_state & NUD_VALID))) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - mac_dev = neigh->dev; - if (!mac_dev) { - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - goto ret_fail; - } - - memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); - - dev_hold(mac_dev); - *dev = mac_dev; - rcu_read_unlock(); - neigh_release(neigh); - dst_release(dst); - - return true; - -ret_fail: - if (is_v4) { - DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", &addr->ip); - - } else { - DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr->ip6); - } - - return false; -} - -/* - * sfe_cm_post_routing() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4) -{ - struct sfe_connection_create sic; - struct net_device *in; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct net_device *dev; - struct net_device *src_dev; - struct net_device *dest_dev; - struct net_device *src_dev_tmp; - struct net_device *dest_dev_tmp; - struct net_device *src_br_dev = NULL; - struct net_device *dest_br_dev = NULL; - struct nf_conntrack_tuple orig_tuple; - struct nf_conntrack_tuple reply_tuple; - SFE_NF_CONN_ACCT(acct); - - /* - * Don't process broadcast or multicast packets. - */ - if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_BROADCAST); - DEBUG_TRACE("broadcast, ignoring\n"); - return NF_ACCEPT; - } - if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_MULTICAST); - DEBUG_TRACE("multicast, ignoring\n"); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - /* - * Packet to xfrm for encapsulation, we can't process it - */ - if (unlikely(skb_dst(skb)->xfrm)) { - DEBUG_TRACE("packet to xfrm, ignoring\n"); - return NF_ACCEPT; - } -#endif - - /* - * Don't process locally generated packets. - */ - if (skb->sk) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_LOCAL_OUT); - DEBUG_TRACE("skip local out packet\n"); - return NF_ACCEPT; - } - - /* - * Don't process packets that are not being forwarded. - */ - in = dev_get_by_index(&init_net, skb->skb_iif); - if (!in) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_IIF); - DEBUG_TRACE("packet not forwarding\n"); - return NF_ACCEPT; - } - - dev_put(in); - - /* - * Don't process packets that aren't being tracked by conntrack. - */ - ct = nf_ct_get(skb, &ctinfo); - if (unlikely(!ct)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_CT); - DEBUG_TRACE("no conntrack connection, ignoring\n"); - return NF_ACCEPT; - } - - /* - * Don't process untracked connections. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) - if (unlikely(nf_ct_is_untracked(ct))) { -#else - if (unlikely(ctinfo == IP_CT_UNTRACKED)) { -#endif - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_TRACK); - DEBUG_TRACE("untracked connection\n"); - return NF_ACCEPT; - } - - /* - * Unconfirmed connection may be dropped by Linux at the final step, - * So we don't process unconfirmed connections. - */ - if (!nf_ct_is_confirmed(ct)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_CONFIRM); - DEBUG_TRACE("unconfirmed connection\n"); - return NF_ACCEPT; - } - - /* - * Don't process connections that require support from a 'helper' (typically a NAT ALG). - */ - if (unlikely(nfct_help(ct))) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_IS_ALG); - DEBUG_TRACE("connection has helper\n"); - return NF_ACCEPT; - } - - /* - * Check if the acceleration of a flow could be rejected quickly. - */ - acct = nf_conn_acct_find(ct); - if (acct) { - long long packets = atomic64_read(&SFE_ACCT_COUNTER(acct)[CTINFO2DIR(ctinfo)].packets); - if ((packets > 0xff) && (packets & 0xff)) { - /* - * Connection hits slow path at least 256 times, so it must be not able to accelerate. - * But we also give it a chance to walk through ECM every 256 packets - */ - return NF_ACCEPT; - } - } - - /* - * Look up the details of our connection in conntrack. - * - * Note that the data we get from conntrack is for the "ORIGINAL" direction - * but our packet may actually be in the "REPLY" direction. - */ - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - sic.protocol = (s32)orig_tuple.dst.protonum; - - sic.flags = 0; - - /* - * Get addressing information, non-NAT first - */ - if (likely(is_v4)) { - u32 dscp; - - sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV4_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; - sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; - - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } else { - u32 dscp; - - sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || - ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV6_MCAST); - DEBUG_TRACE("multicast address\n"); - return NF_ACCEPT; - } - - /* - * NAT'ed addresses - note these are as seen from the 'reply' direction - * When NAT does not apply to this connection these will be identical to the above. - */ - sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); - sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); - - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - if (dscp) { - sic.dest_dscp = dscp; - sic.src_dscp = sic.dest_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - } - - switch (sic.protocol) { - case IPPROTO_TCP: - sic.src_port = orig_tuple.src.u.tcp.port; - sic.dest_port = orig_tuple.dst.u.tcp.port; - sic.src_port_xlate = reply_tuple.dst.u.tcp.port; - sic.dest_port_xlate = reply_tuple.src.u.tcp.port; - sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale; - sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; - sic.src_td_end = ct->proto.tcp.seen[0].td_end; - sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend; - sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; - sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; - sic.dest_td_end = ct->proto.tcp.seen[1].td_end; - sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; - - if (nf_ct_tcp_no_window_check - || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) - || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { - sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - - /* - * Don't try to manage a non-established connection. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ASSURED); - DEBUG_TRACE("non-established connection\n"); - return NF_ACCEPT; - } - - /* - * If the connection is shutting down do not manage it. - * state can not be SYN_SENT, SYN_RECV because connection is assured - * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. - */ - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { - spin_unlock_bh(&ct->lock); - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED); - DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", - ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port), - &sic.dest_ip, ntohs(sic.dest_port)); - return NF_ACCEPT; - } - spin_unlock_bh(&ct->lock); - break; - - case IPPROTO_UDP: - sic.src_port = orig_tuple.src.u.udp.port; - sic.dest_port = orig_tuple.dst.u.udp.port; - sic.src_port_xlate = reply_tuple.dst.u.udp.port; - sic.dest_port_xlate = reply_tuple.src.u.udp.port; - break; - - default: - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_UNKNOW_PROTOCOL); - DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); - return NF_ACCEPT; - } - -#ifdef CONFIG_XFRM - sic.original_accel = 1; - sic.reply_accel = 1; - - /* - * For packets de-capsulated from xfrm, we still can accelerate it - * on the direction we just received the packet. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) - if (unlikely(skb->sp)) { -#else - if (unlikely(secpath_exists(skb))) { -#endif - if (sic.protocol == IPPROTO_TCP && - !(sic.flags & SFE_CREATE_FLAG_NO_SEQ_CHECK)) { - return NF_ACCEPT; - } - - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { - sic.reply_accel = 0; - } else { - sic.original_accel = 0; - } - } -#endif - - /* - * Get QoS information - */ - if (skb->priority) { - sic.dest_priority = skb->priority; - sic.src_priority = sic.dest_priority; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - /* - * Get the net device and MAC addresses that correspond to the various source and - * destination host addresses. - */ - if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_DEV); - return NF_ACCEPT; - } - src_dev = src_dev_tmp; - - if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV); - goto done1; - } - dev_put(dev); - - if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_DEV); - goto done1; - } - dev_put(dev); - - if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV); - goto done1; - } - dest_dev = dest_dev_tmp; - - /* - * Our devices may actually be part of a bridge interface. If that's - * the case then find the bridge interface instead. - */ - if (src_dev->priv_flags & IFF_BRIDGE_PORT) { - src_br_dev = sfe_dev_get_master(src_dev); - if (!src_br_dev) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); - goto done2; - } - src_dev = src_br_dev; - } - - if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { - dest_br_dev = sfe_dev_get_master(dest_dev); - if (!dest_br_dev) { - sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); - DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); - goto done3; - } - dest_dev = dest_br_dev; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = src_dev->mtu; - sic.dest_mtu = dest_dev->mtu; - - if (likely(is_v4)) { - sfe_ipv4_create_rule(&sic); - } else { - sfe_ipv6_create_rule(&sic); - } - - /* - * If we had bridge ports then release them too. - */ - if (dest_br_dev) { - dev_put(dest_br_dev); - } -done3: - if (src_br_dev) { - dev_put(src_br_dev); - } -done2: - dev_put(dest_dev_tmp); -done1: - dev_put(src_dev_tmp); - - return NF_ACCEPT; -} - -/* - * sfe_cm_ipv4_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -sfe_cm_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return sfe_cm_post_routing(skb, true); -} - -/* - * sfe_cm_ipv6_post_routing_hook() - * Called for packets about to leave the box - either locally generated or forwarded from another interface - */ -sfe_cm_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) -{ - return sfe_cm_post_routing(skb, false); -} - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * sfe_cm_conntrack_event() - * Callback event invoked when a conntrack connection's state changes. - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static int sfe_cm_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) -#else -static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item) -#endif -{ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - struct nf_ct_event *item = ptr; -#endif - struct sfe_connection_destroy sid; - struct nf_conn *ct = item->ct; - struct nf_conntrack_tuple orig_tuple; - - /* - * If we don't have a conntrack entry then we're done. - */ - if (unlikely(!ct)) { - DEBUG_WARN("no ct in conntrack event callback\n"); - return NOTIFY_DONE; - } - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) - if (unlikely(nf_ct_is_untracked(ct))) { - DEBUG_TRACE("ignoring untracked conn\n"); - return NOTIFY_DONE; - } -#endif - - /* - * We're only interested in destroy events. - */ - if (unlikely(!(events & (1 << IPCT_DESTROY)))) { - DEBUG_TRACE("ignoring non-destroy event\n"); - return NOTIFY_DONE; - } - - orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - sid.protocol = (s32)orig_tuple.dst.protonum; - - /* - * Extract information from the conntrack connection. We're only interested - * in nominal connection information (i.e. we're ignoring any NAT information). - */ - switch (sid.protocol) { - case IPPROTO_TCP: - sid.src_port = orig_tuple.src.u.tcp.port; - sid.dest_port = orig_tuple.dst.u.tcp.port; - break; - - case IPPROTO_UDP: - sid.src_port = orig_tuple.src.u.udp.port; - sid.dest_port = orig_tuple.dst.u.udp.port; - break; - - default: - DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); - return NOTIFY_DONE; - } - - if (likely(nf_ct_l3num(ct) == AF_INET)) { - sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; - sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; - - sfe_ipv4_destroy_rule(&sid); - } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { - sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); - sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); - - sfe_ipv6_destroy_rule(&sid); - } else { - DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); - } - - return NOTIFY_DONE; -} - -/* - * Netfilter conntrack event system to monitor connection tracking changes - */ -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -static struct notifier_block sfe_cm_conntrack_notifier = { - .notifier_call = sfe_cm_conntrack_event, -}; -#else -static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = { - .fcn = sfe_cm_conntrack_event, -}; -#endif -#endif - -/* - * Structure to establish a hook into the post routing netfilter point - this - * will pick up local outbound and packets going from one interface to another. - * - * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. - * We want to examine packets after NAT translation and any ALG processing. - */ -static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = { - SFE_IPV4_NF_POST_ROUTING_HOOK(__sfe_cm_ipv4_post_routing_hook), -#ifdef SFE_SUPPORT_IPV6 - SFE_IPV6_NF_POST_ROUTING_HOOK(__sfe_cm_ipv6_post_routing_hook), -#endif -}; - -/* - * sfe_cm_sync_rule() - * Synchronize a connection's state. - */ -static void sfe_cm_sync_rule(struct sfe_connection_sync *sis) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - SFE_NF_CONN_ACCT(acct); - - /* - * Create a tuple so as to be able to look up a connection - */ - memset(&tuple, 0, sizeof(tuple)); - tuple.src.u.all = (__be16)sis->src_port; - tuple.dst.dir = IP_CT_DIR_ORIGINAL; - tuple.dst.protonum = (u8)sis->protocol; - tuple.dst.u.all = (__be16)sis->dest_port; - - if (sis->is_v6) { - tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); - tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); - tuple.src.l3num = AF_INET6; - - DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); - } else { - tuple.src.u3.ip = sis->src_ip.ip; - tuple.dst.u3.ip = sis->dest_ip.ip; - tuple.src.l3num = AF_INET; - - DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", - (int)tuple.dst.protonum, - &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), - &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); - } - - /* - * Look up conntrack connection - */ - h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); - if (unlikely(!h)) { - DEBUG_TRACE("no connection found\n"); - return; - } - - ct = nf_ct_tuplehash_to_ctrack(h); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); -#endif - /* - * Only update if this is not a fixed timeout - */ - if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { - spin_lock_bh(&ct->lock); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) - ct->timeout.expires += sis->delta_jiffies; -#else - ct->timeout += sis->delta_jiffies; -#endif - spin_unlock_bh(&ct->lock); - } - - acct = nf_conn_acct_find(ct); - if (acct) { - spin_lock_bh(&ct->lock); - atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); - atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); - atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); - spin_unlock_bh(&ct->lock); - } - - switch (sis->protocol) { - case IPPROTO_TCP: - spin_lock_bh(&ct->lock); - if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { - ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { - ct->proto.tcp.seen[0].td_end = sis->src_td_end; - } - if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { - ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; - } - if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { - ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; - } - if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { - ct->proto.tcp.seen[1].td_end = sis->dest_td_end; - } - if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { - ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; - } - spin_unlock_bh(&ct->lock); - break; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) - case IPPROTO_UDP: - /* - * In Linux connection track, UDP flow has two timeout values: - * /proc/sys/net/netfilter/nf_conntrack_udp_timeout: - * this is for uni-direction UDP flow, normally its value is 60 seconds - * /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream: - * this is for bi-direction UDP flow, normally its value is 180 seconds - * - * Linux will update timer of UDP flow to stream timeout once it seen packets - * in reply direction. But if flow is accelerated by NSS or SFE, Linux won't - * see any packets. So we have to do the same thing in our stats sync message. - */ - if (!test_bit(IPS_ASSURED_BIT, &ct->status) && acct) { - u_int64_t reply_pkts = atomic64_read(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); - - if (reply_pkts != 0) { - unsigned int *timeouts; - struct nf_conntrack_l4proto *l4proto __maybe_unused; - set_bit(IPS_SEEN_REPLY_BIT, &ct->status); - set_bit(IPS_ASSURED_BIT, &ct->status); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)) - l4proto = __nf_ct_l4proto_find((sis->is_v6 ? AF_INET6 : AF_INET), IPPROTO_UDP); - timeouts = nf_ct_timeout_lookup(&init_net, ct, l4proto); - spin_lock_bh(&ct->lock); - ct->timeout.expires = jiffies + timeouts[UDP_CT_REPLIED]; - spin_unlock_bh(&ct->lock); -#else - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) { - timeouts = udp_get_timeouts(nf_ct_net(ct)); - } - - spin_lock_bh(&ct->lock); - ct->timeout = jiffies + timeouts[UDP_CT_REPLIED]; - spin_unlock_bh(&ct->lock); -#endif - } - } - break; -#endif /*KERNEL_VERSION(3, 4, 0)*/ - } - - /* - * Release connection - */ - nf_ct_put(ct); -} - -/* - * sfe_cm_device_event() - */ -int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_inet_event() - */ -static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv4_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_inet6_event() - */ -static int sfe_cm_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; - - if (dev && (event == NETDEV_DOWN)) { - sfe_ipv6_destroy_all_rules_for_dev(dev); - } - - return NOTIFY_DONE; -} - -/* - * sfe_cm_get_exceptions - * dump exception counters - */ -static ssize_t sfe_cm_get_exceptions(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int idx, len; - struct sfe_cm *sc = &__sc; - - spin_lock_bh(&sc->lock); - for (len = 0, idx = 0; idx < SFE_CM_EXCEPTION_MAX; idx++) { - if (sc->exceptions[idx]) { - len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", sfe_cm_exception_events_string[idx], sc->exceptions[idx]); - } - } - spin_unlock_bh(&sc->lock); - - return len; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_cm_exceptions_attr = - __ATTR(exceptions, S_IRUGO, sfe_cm_get_exceptions, NULL); - -/* - * sfe_cm_init() - */ -static int __init sfe_cm_init(void) -{ - struct sfe_cm *sc = &__sc; - int result = -1; - - DEBUG_INFO("SFE CM init\n"); - - /* - * Create sys/sfe_cm - */ - sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL); - if (!sc->sys_sfe_cm) { - DEBUG_ERROR("failed to register sfe_cm\n"); - goto exit1; - } - - /* - * Create sys/sfe_cm/exceptions - */ - result = sysfs_create_file(sc->sys_sfe_cm, &sfe_cm_exceptions_attr.attr); - if (result) { - DEBUG_ERROR("failed to register exceptions file: %d\n", result); - goto exit2; - } - - sc->dev_notifier.notifier_call = sfe_cm_device_event; - sc->dev_notifier.priority = 1; - register_netdevice_notifier(&sc->dev_notifier); - - sc->inet_notifier.notifier_call = sfe_cm_inet_event; - sc->inet_notifier.priority = 1; - register_inetaddr_notifier(&sc->inet_notifier); - - sc->inet6_notifier.notifier_call = sfe_cm_inet6_event; - sc->inet6_notifier.priority = 1; - register_inet6addr_notifier(&sc->inet6_notifier); - /* - * Register our netfilter hooks. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - result = nf_register_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - result = nf_register_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - if (result < 0) { - DEBUG_ERROR("can't register nf post routing hook: %d\n", result); - goto exit3; - } - - /* - * Register a notifier hook to get fast notifications of expired connections. - * Note: In CONFIG_NF_CONNTRACK_CHAIN_EVENTS enabled case, nf_conntrack_register_notifier() - * function always returns 0. - */ -#ifdef CONFIG_NF_CONNTRACK_EVENTS -#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS - (void)nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier); -#else - result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier); - if (result < 0) { - DEBUG_ERROR("can't register nf notifier hook: %d\n", result); - goto exit4; - } -#endif -#endif - - spin_lock_init(&sc->lock); - - /* - * Hook the receive path in the network stack. - */ - BUG_ON(athrs_fast_nat_recv); - RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv); - - /* - * Hook the shortcut sync callback. - */ - sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule); - sfe_ipv6_register_sync_rule_callback(sfe_cm_sync_rule); - return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS -exit4: -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - -#endif -#endif -exit3: - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); -exit2: - kobject_put(sc->sys_sfe_cm); - -exit1: - return result; -} - -/* - * sfe_cm_exit() - */ -static void __exit sfe_cm_exit(void) -{ - struct sfe_cm *sc = &__sc; - - DEBUG_INFO("SFE CM exit\n"); - - /* - * Unregister our sync callback. - */ - sfe_ipv4_register_sync_rule_callback(NULL); - sfe_ipv6_register_sync_rule_callback(NULL); - - /* - * Unregister our receive callback. - */ - RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); - - /* - * Wait for all callbacks to complete. - */ - rcu_barrier(); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - sfe_ipv6_destroy_all_rules_for_dev(NULL); - -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier); - -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) - nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#else - nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); -#endif - unregister_inet6addr_notifier(&sc->inet6_notifier); - unregister_inetaddr_notifier(&sc->inet_notifier); - unregister_netdevice_notifier(&sc->dev_notifier); - - kobject_put(sc->sys_sfe_cm); -} - -module_init(sfe_cm_init) -module_exit(sfe_cm_exit) - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/shortcut-fe/sfe_cm.h b/shortcut-fe/shortcut-fe/sfe_cm.h deleted file mode 100644 index 23cbde859..000000000 --- a/shortcut-fe/shortcut-fe/sfe_cm.h +++ /dev/null @@ -1,259 +0,0 @@ -/* - * sfe_cm.h - * Shortcut forwarding engine. - * - * Copyright (c) 2013-2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * connection flags. - */ -#define SFE_CREATE_FLAG_NO_SEQ_CHECK BIT(0) - /* Indicates that we should not check sequence numbers */ -#define SFE_CREATE_FLAG_REMARK_PRIORITY BIT(1) - /* Indicates that we should remark priority of skb */ -#define SFE_CREATE_FLAG_REMARK_DSCP BIT(2) - /* Indicates that we should remark DSCP of packet */ - -/* - * IPv6 address structure - */ -struct sfe_ipv6_addr { - __be32 addr[4]; -}; - -typedef union { - __be32 ip; - struct sfe_ipv6_addr ip6[1]; -} sfe_ip_addr_t; - -/* - * connection creation structure. - */ -struct sfe_connection_create { - int protocol; - struct net_device *src_dev; - struct net_device *dest_dev; - u32 flags; - u32 src_mtu; - u32 dest_mtu; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t src_ip_xlate; - sfe_ip_addr_t dest_ip; - sfe_ip_addr_t dest_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u8 src_mac[ETH_ALEN]; - u8 src_mac_xlate[ETH_ALEN]; - u8 dest_mac[ETH_ALEN]; - u8 dest_mac_xlate[ETH_ALEN]; - u8 src_td_window_scale; - u32 src_td_max_window; - u32 src_td_end; - u32 src_td_max_end; - u8 dest_td_window_scale; - u32 dest_td_max_window; - u32 dest_td_end; - u32 dest_td_max_end; - u32 mark; -#ifdef CONFIG_XFRM - u32 original_accel; - u32 reply_accel; -#endif - u32 src_priority; - u32 dest_priority; - u32 src_dscp; - u32 dest_dscp; -}; - -/* - * connection destruction structure. - */ -struct sfe_connection_destroy { - int protocol; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t dest_ip; - __be16 src_port; - __be16 dest_port; -}; - -typedef enum sfe_sync_reason { - SFE_SYNC_REASON_STATS, /* Sync is to synchronize stats */ - SFE_SYNC_REASON_FLUSH, /* Sync is to flush a entry */ - SFE_SYNC_REASON_DESTROY /* Sync is to destroy a entry(requested by connection manager) */ -} sfe_sync_reason_t; - -/* - * Structure used to sync connection stats/state back within the system. - * - * NOTE: The addresses here are NON-NAT addresses, i.e. the true endpoint addressing. - * 'src' is the creator of the connection. - */ -struct sfe_connection_sync { - struct net_device *src_dev; - struct net_device *dest_dev; - int is_v6; /* Is it for ipv6? */ - int protocol; /* IP protocol number (IPPROTO_...) */ - sfe_ip_addr_t src_ip; /* Non-NAT source address, i.e. the creator of the connection */ - sfe_ip_addr_t src_ip_xlate; /* NATed source address */ - __be16 src_port; /* Non-NAT source port */ - __be16 src_port_xlate; /* NATed source port */ - sfe_ip_addr_t dest_ip; /* Non-NAT destination address, i.e. to whom the connection was created */ - sfe_ip_addr_t dest_ip_xlate; /* NATed destination address */ - __be16 dest_port; /* Non-NAT destination port */ - __be16 dest_port_xlate; /* NATed destination port */ - u32 src_td_max_window; - u32 src_td_end; - u32 src_td_max_end; - u64 src_packet_count; - u64 src_byte_count; - u32 src_new_packet_count; - u32 src_new_byte_count; - u32 dest_td_max_window; - u32 dest_td_end; - u32 dest_td_max_end; - u64 dest_packet_count; - u64 dest_byte_count; - u32 dest_new_packet_count; - u32 dest_new_byte_count; - u32 reason; /* reason for stats sync message, i.e. destroy, flush, period sync */ - u64 delta_jiffies; /* Time to be added to the current timeout to keep the connection alive */ -}; - -/* - * connection mark structure - */ -struct sfe_connection_mark { - int protocol; - sfe_ip_addr_t src_ip; - sfe_ip_addr_t dest_ip; - __be16 src_port; - __be16 dest_port; - u32 mark; -}; - -/* - * Expose the hook for the receive processing. - */ -extern int (*athrs_fast_nat_recv)(struct sk_buff *skb); - -/* - * Expose what should be a static flag in the TCP connection tracker. - */ -extern int nf_ct_tcp_no_window_check; - -/* - * This callback will be called in a timer - * at 100 times per second to sync stats back to - * Linux connection track. - * - * A RCU lock is taken to prevent this callback - * from unregistering. - */ -typedef void (*sfe_sync_rule_callback_t)(struct sfe_connection_sync *); - -/* - * IPv4 APIs used by connection manager - */ -int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb); -int sfe_ipv4_create_rule(struct sfe_connection_create *sic); -void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid); -void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev); -void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t callback); -void sfe_ipv4_update_rule(struct sfe_connection_create *sic); -void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark); - -#ifdef SFE_SUPPORT_IPV6 -/* - * IPv6 APIs used by connection manager - */ -int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb); -int sfe_ipv6_create_rule(struct sfe_connection_create *sic); -void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid); -void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev); -void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback); -void sfe_ipv6_update_rule(struct sfe_connection_create *sic); -void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark); -#else -static inline int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) -{ - return 0; -} - -static inline int sfe_ipv6_create_rule(struct sfe_connection_create *sic) -{ - return 0; -} - -static inline void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) -{ - return; -} - -static inline void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) -{ - return; -} - -static inline void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback) -{ - return; -} - -static inline void sfe_ipv6_update_rule(struct sfe_connection_create *sic) -{ - return; -} - -static inline void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) -{ - return; -} -#endif - -/* - * sfe_ipv6_addr_equal() - * compare ipv6 address - * - * return: 1, equal; 0, no equal - */ -static inline int sfe_ipv6_addr_equal(struct sfe_ipv6_addr *a, - struct sfe_ipv6_addr *b) -{ - return a->addr[0] == b->addr[0] && - a->addr[1] == b->addr[1] && - a->addr[2] == b->addr[2] && - a->addr[3] == b->addr[3]; -} - -/* - * sfe_ipv4_addr_equal() - * compare ipv4 address - * - * return: 1, equal; 0, no equal - */ -#define sfe_ipv4_addr_equal(a, b) ((u32)(a) == (u32)(b)) - -/* - * sfe_addr_equal() - * compare ipv4 or ipv6 address - * - * return: 1, equal; 0, no equal - */ -static inline int sfe_addr_equal(sfe_ip_addr_t *a, - sfe_ip_addr_t *b, int is_v4) -{ - return is_v4 ? sfe_ipv4_addr_equal(a->ip, b->ip) : sfe_ipv6_addr_equal(a->ip6, b->ip6); -} diff --git a/shortcut-fe/shortcut-fe/sfe_ipv4.c b/shortcut-fe/shortcut-fe/sfe_ipv4.c deleted file mode 100644 index 9f7ebd1c9..000000000 --- a/shortcut-fe/shortcut-fe/sfe_ipv4.c +++ /dev/null @@ -1,3610 +0,0 @@ -/* - * sfe_ipv4.c - * Shortcut forwarding engine - IPv4 edition. - * - * Copyright (c) 2013-2016, 2019-2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" - -/* - * By default Linux IP header and transport layer header structures are - * unpacked, assuming that such headers should be 32-bit aligned. - * Unfortunately some wireless adaptors can't cope with this requirement and - * some CPUs can't handle misaligned accesses. For those platforms we - * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed. - * When we do this the compiler will generate slightly worse code than for the - * aligned case (on most platforms) but will be much quicker than fixing - * things up in an unaligned trap handler. - */ -#define SFE_IPV4_UNALIGNED_IP_HEADER 1 -#if SFE_IPV4_UNALIGNED_IP_HEADER -#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed)) -#else -#define SFE_IPV4_UNALIGNED_STRUCT -#endif - -/* - * An Ethernet header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_eth_hdr { - __be16 h_dest[ETH_ALEN / 2]; - __be16 h_source[ETH_ALEN / 2]; - __be16 h_proto; -} SFE_IPV4_UNALIGNED_STRUCT; - -#define SFE_IPV4_DSCP_MASK 0x3 -#define SFE_IPV4_DSCP_SHIFT 2 - -/* - * An IPv4 header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_ip_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 ihl:4, - version:4; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u8 version:4, - ihl:4; -#else -#error "Please fix " -#endif - __u8 tos; - __be16 tot_len; - __be16 id; - __be16 frag_off; - __u8 ttl; - __u8 protocol; - __sum16 check; - __be32 saddr; - __be32 daddr; - - /* - * The options start here. - */ -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * A UDP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_udp_hdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * A TCP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV4_UNALIGNED_STRUCT) - */ -struct sfe_ipv4_tcp_hdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -} SFE_IPV4_UNALIGNED_STRUCT; - -/* - * Specifies the lower bound on ACK numbers carried in the TCP header - */ -#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520 - -/* - * IPv4 TCP connection match additional data. - */ -struct sfe_ipv4_tcp_connection_match { - u8 win_scale; /* Window scale */ - u32 max_win; /* Maximum window size seen */ - u32 end; /* Sequence number of the next byte to send (seq + segment length) */ - u32 max_end; /* Sequence number of the last byte to ack */ -}; - -/* - * Bit flags for IPv4 connection matching entry. - */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) - /* Perform source translation */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) - /* Perform destination translation */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) - /* Ignore TCP sequence numbers */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) - /* Fast Ethernet header write */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) - /* Fast Ethernet header write */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) - /* remark priority of SKB */ -#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) - /* remark DSCP of packet */ - -/* - * IPv4 connection matching structure. - */ -struct sfe_ipv4_connection_match { - /* - * References to other objects. - */ - struct sfe_ipv4_connection_match *next; - struct sfe_ipv4_connection_match *prev; - struct sfe_ipv4_connection *connection; - struct sfe_ipv4_connection_match *counter_match; - /* Matches the flow in the opposite direction as the one in *connection */ - struct sfe_ipv4_connection_match *active_next; - struct sfe_ipv4_connection_match *active_prev; - bool active; /* Flag to indicate if we're on the active list */ - - /* - * Characteristics that identify flows that match this rule. - */ - struct net_device *match_dev; /* Network device */ - u8 match_protocol; /* Protocol */ - __be32 match_src_ip; /* Source IP address */ - __be32 match_dest_ip; /* Destination IP address */ - __be16 match_src_port; /* Source port/connection ident */ - __be16 match_dest_port; /* Destination port/connection ident */ - - /* - * Control the operations of the match. - */ - u32 flags; /* Bit flags */ -#ifdef CONFIG_NF_FLOW_COOKIE - u32 flow_cookie; /* used flow cookie, for debug */ -#endif -#ifdef CONFIG_XFRM - u32 flow_accel; /* The flow accelerated or not */ -#endif - - /* - * Connection state that we track once we match. - */ - union { /* Protocol-specific state */ - struct sfe_ipv4_tcp_connection_match tcp; - } protocol_state; - /* - * Stats recorded in a sync period. These stats will be added to - * rx_packet_count64/rx_byte_count64 after a sync period. - */ - u32 rx_packet_count; - u32 rx_byte_count; - - /* - * Packet translation information. - */ - __be32 xlate_src_ip; /* Address after source translation */ - __be16 xlate_src_port; /* Port/connection ident after source translation */ - u16 xlate_src_csum_adjustment; - /* Transport layer checksum adjustment after source translation */ - u16 xlate_src_partial_csum_adjustment; - /* Transport layer pseudo header checksum adjustment after source translation */ - - __be32 xlate_dest_ip; /* Address after destination translation */ - __be16 xlate_dest_port; /* Port/connection ident after destination translation */ - u16 xlate_dest_csum_adjustment; - /* Transport layer checksum adjustment after destination translation */ - u16 xlate_dest_partial_csum_adjustment; - /* Transport layer pseudo header checksum adjustment after destination translation */ - - /* - * QoS information - */ - u32 priority; - u32 dscp; - - /* - * Packet transmit information. - */ - struct net_device *xmit_dev; /* Network device on which to transmit */ - unsigned short int xmit_dev_mtu; - /* Interface MTU */ - u16 xmit_dest_mac[ETH_ALEN / 2]; - /* Destination MAC address to use when forwarding */ - u16 xmit_src_mac[ETH_ALEN / 2]; - /* Source MAC address to use when forwarding */ - - /* - * Summary stats. - */ - u64 rx_packet_count64; - u64 rx_byte_count64; -}; - -/* - * Per-connection data structure. - */ -struct sfe_ipv4_connection { - struct sfe_ipv4_connection *next; - /* Pointer to the next entry in a hash chain */ - struct sfe_ipv4_connection *prev; - /* Pointer to the previous entry in a hash chain */ - int protocol; /* IP protocol number */ - __be32 src_ip; /* Src IP addr pre-translation */ - __be32 src_ip_xlate; /* Src IP addr post-translation */ - __be32 dest_ip; /* Dest IP addr pre-translation */ - __be32 dest_ip_xlate; /* Dest IP addr post-translation */ - __be16 src_port; /* Src port pre-translation */ - __be16 src_port_xlate; /* Src port post-translation */ - __be16 dest_port; /* Dest port pre-translation */ - __be16 dest_port_xlate; /* Dest port post-translation */ - struct sfe_ipv4_connection_match *original_match; - /* Original direction matching structure */ - struct net_device *original_dev; - /* Original direction source device */ - struct sfe_ipv4_connection_match *reply_match; - /* Reply direction matching structure */ - struct net_device *reply_dev; /* Reply direction source device */ - u64 last_sync_jiffies; /* Jiffies count for the last sync */ - struct sfe_ipv4_connection *all_connections_next; - /* Pointer to the next entry in the list of all connections */ - struct sfe_ipv4_connection *all_connections_prev; - /* Pointer to the previous entry in the list of all connections */ - u32 mark; /* mark for outgoing packet */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * IPv4 connections and hash table size information. - */ -#define SFE_IPV4_CONNECTION_HASH_SHIFT 12 -#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT) -#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1) - -#ifdef CONFIG_NF_FLOW_COOKIE -#define SFE_FLOW_COOKIE_SIZE 2048 -#define SFE_FLOW_COOKIE_MASK 0x7ff - -struct sfe_flow_cookie_entry { - struct sfe_ipv4_connection_match *match; - unsigned long last_clean_time; -}; -#endif - -enum sfe_ipv4_exception_events { - SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL, - SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, - SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL, - SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, - SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS, - SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, - SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK, - SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, - SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL, - SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, - SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, - SFE_IPV4_EXCEPTION_EVENT_NON_V4, - SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, - SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, - SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, - SFE_IPV4_EXCEPTION_EVENT_LAST -}; - -static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = { - "UDP_HEADER_INCOMPLETE", - "UDP_NO_CONNECTION", - "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "UDP_SMALL_TTL", - "UDP_NEEDS_FRAGMENTATION", - "TCP_HEADER_INCOMPLETE", - "TCP_NO_CONNECTION_SLOW_FLAGS", - "TCP_NO_CONNECTION_FAST_FLAGS", - "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "TCP_SMALL_TTL", - "TCP_NEEDS_FRAGMENTATION", - "TCP_FLAGS", - "TCP_SEQ_EXCEEDS_RIGHT_EDGE", - "TCP_SMALL_DATA_OFFS", - "TCP_BAD_SACK", - "TCP_BIG_DATA_OFFS", - "TCP_SEQ_BEFORE_LEFT_EDGE", - "TCP_ACK_EXCEEDS_RIGHT_EDGE", - "TCP_ACK_BEFORE_LEFT_EDGE", - "ICMP_HEADER_INCOMPLETE", - "ICMP_UNHANDLED_TYPE", - "ICMP_IPV4_HEADER_INCOMPLETE", - "ICMP_IPV4_NON_V4", - "ICMP_IPV4_IP_OPTIONS_INCOMPLETE", - "ICMP_IPV4_UDP_HEADER_INCOMPLETE", - "ICMP_IPV4_TCP_HEADER_INCOMPLETE", - "ICMP_IPV4_UNHANDLED_PROTOCOL", - "ICMP_NO_CONNECTION", - "ICMP_FLUSHED_CONNECTION", - "HEADER_INCOMPLETE", - "BAD_TOTAL_LENGTH", - "NON_V4", - "NON_INITIAL_FRAGMENT", - "DATAGRAM_INCOMPLETE", - "IP_OPTIONS_INCOMPLETE", - "UNHANDLED_PROTOCOL" -}; - -/* - * Per-module structure. - */ -struct sfe_ipv4 { - spinlock_t lock; /* Lock for SMP correctness */ - struct sfe_ipv4_connection_match *active_head; - /* Head of the list of recently active connections */ - struct sfe_ipv4_connection_match *active_tail; - /* Tail of the list of recently active connections */ - struct sfe_ipv4_connection *all_connections_head; - /* Head of the list of all connections */ - struct sfe_ipv4_connection *all_connections_tail; - /* Tail of the list of all connections */ - unsigned int num_connections; /* Number of connections */ - struct timer_list timer; /* Timer used for periodic sync ops */ - sfe_sync_rule_callback_t __rcu sync_rule_callback; - /* Callback function registered by a connection manager for stats syncing */ - struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; - /* Connection hash table */ - struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; - /* Connection match hash table */ -#ifdef CONFIG_NF_FLOW_COOKIE - struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; - /* flow cookie table*/ - flow_cookie_set_func_t flow_cookie_set_func; - /* function used to configure flow cookie in hardware*/ - int flow_cookie_enable; - /* Enable/disable flow cookie at runtime */ -#endif - - /* - * Stats recorded in a sync period. These stats will be added to - * connection_xxx64 after a sync period. - */ - u32 connection_create_requests; - /* Number of IPv4 connection create requests */ - u32 connection_create_collisions; - /* Number of IPv4 connection create requests that collided with existing hash table entries */ - u32 connection_destroy_requests; - /* Number of IPv4 connection destroy requests */ - u32 connection_destroy_misses; - /* Number of IPv4 connection destroy requests that missed our hash table */ - u32 connection_match_hash_hits; - /* Number of IPv4 connection match hash hits */ - u32 connection_match_hash_reorders; - /* Number of IPv4 connection match hash reorders */ - u32 connection_flushes; /* Number of IPv4 connection flushes */ - u32 packets_forwarded; /* Number of IPv4 packets forwarded */ - u32 packets_not_forwarded; /* Number of IPv4 packets not forwarded */ - u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST]; - - /* - * Summary statistics. - */ - u64 connection_create_requests64; - /* Number of IPv4 connection create requests */ - u64 connection_create_collisions64; - /* Number of IPv4 connection create requests that collided with existing hash table entries */ - u64 connection_destroy_requests64; - /* Number of IPv4 connection destroy requests */ - u64 connection_destroy_misses64; - /* Number of IPv4 connection destroy requests that missed our hash table */ - u64 connection_match_hash_hits64; - /* Number of IPv4 connection match hash hits */ - u64 connection_match_hash_reorders64; - /* Number of IPv4 connection match hash reorders */ - u64 connection_flushes64; /* Number of IPv4 connection flushes */ - u64 packets_forwarded64; /* Number of IPv4 packets forwarded */ - u64 packets_not_forwarded64; - /* Number of IPv4 packets not forwarded */ - u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST]; - - /* - * Control state. - */ - struct kobject *sys_sfe_ipv4; /* sysfs linkage */ - int debug_dev; /* Major number of the debug char device */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * Enumeration of the XML output. - */ -enum sfe_ipv4_debug_xml_states { - SFE_IPV4_DEBUG_XML_STATE_START, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, - SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, - SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END, - SFE_IPV4_DEBUG_XML_STATE_STATS, - SFE_IPV4_DEBUG_XML_STATE_END, - SFE_IPV4_DEBUG_XML_STATE_DONE -}; - -/* - * XML write state. - */ -struct sfe_ipv4_debug_xml_write_state { - enum sfe_ipv4_debug_xml_states state; - /* XML output file state machine state */ - int iter_exception; /* Next exception iterator */ -}; - -typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws); - -static struct sfe_ipv4 __si; - -/* - * sfe_ipv4_gen_ip_csum() - * Generate the IP checksum for an IPv4 header. - * - * Note that this function assumes that we have only 20 bytes of IP header. - */ -static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph) -{ - u32 sum; - u16 *i = (u16 *)iph; - - iph->check = 0; - - /* - * Generate the sum. - */ - sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9]; - - /* - * Fold it to ones-complement form. - */ - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - - return (u16)sum ^ 0xffff; -} - -/* - * sfe_ipv4_get_connection_match_hash() - * Generate the hash used in connection match lookups. - */ -static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - size_t dev_addr = (size_t)dev; - u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv4_find_sfe_ipv4_connection_match() - * Get the IPv4 flow match info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static struct sfe_ipv4_connection_match * -sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *head; - unsigned int conn_match_idx; - - conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); - cm = si->conn_match_hash[conn_match_idx]; - - /* - * If we don't have anything in this chain then bail. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((cm->match_src_port == src_port) - && (cm->match_dest_port == dest_port) - && (cm->match_src_ip == src_ip) - && (cm->match_dest_ip == dest_ip) - && (cm->match_protocol == protocol) - && (cm->match_dev == dev)) { - si->connection_match_hash_hits++; - return cm; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain and - * move matching entry to the top of the hash chain. We presume that this - * will be reused again very quickly. - */ - head = cm; - do { - cm = cm->next; - } while (cm && (cm->match_src_port != src_port - || cm->match_dest_port != dest_port - || cm->match_src_ip != src_ip - || cm->match_dest_ip != dest_ip - || cm->match_protocol != protocol - || cm->match_dev != dev)); - - /* - * Not found then we're done. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * We found a match so move it. - */ - if (cm->next) { - cm->next->prev = cm->prev; - } - cm->prev->next = cm->next; - cm->prev = NULL; - cm->next = head; - head->prev = cm; - si->conn_match_hash[conn_match_idx] = cm; - si->connection_match_hash_reorders++; - - return cm; -} - -/* - * sfe_ipv4_connection_match_update_summary_stats() - * Update the summary stats for a connection match entry. - */ -static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm) -{ - cm->rx_packet_count64 += cm->rx_packet_count; - cm->rx_packet_count = 0; - cm->rx_byte_count64 += cm->rx_byte_count; - cm->rx_byte_count = 0; -} - -/* - * sfe_ipv4_connection_match_compute_translations() - * Compute port and address translations for a connection match entry. - */ -static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm) -{ - /* - * Before we insert the entry look to see if this is tagged as doing address - * translations. If it is then work out the adjustment that we need to apply - * to the transport checksum. - */ - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - u16 src_ip_hi = cm->match_src_ip >> 16; - u16 src_ip_lo = cm->match_src_ip & 0xffff; - u32 xlate_src_ip = ~cm->xlate_src_ip; - u16 xlate_src_ip_hi = xlate_src_ip >> 16; - u16 xlate_src_ip_lo = xlate_src_ip & 0xffff; - u16 xlate_src_port = ~cm->xlate_src_port; - u32 adj; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - adj = src_ip_hi + src_ip_lo + cm->match_src_port - + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_csum_adjustment = (u16)adj; - - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - u16 dest_ip_hi = cm->match_dest_ip >> 16; - u16 dest_ip_lo = cm->match_dest_ip & 0xffff; - u32 xlate_dest_ip = ~cm->xlate_dest_ip; - u16 xlate_dest_ip_hi = xlate_dest_ip >> 16; - u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff; - u16 xlate_dest_port = ~cm->xlate_dest_port; - u32 adj; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port - + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { - u32 adj = ~cm->match_src_ip + cm->xlate_src_ip; - if (adj < cm->xlate_src_ip) { - adj++; - } - - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_partial_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { - u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip; - if (adj < cm->xlate_dest_ip) { - adj++; - } - - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_partial_csum_adjustment = (u16)adj; - } - -} - -/* - * sfe_ipv4_update_summary_stats() - * Update the summary stats. - */ -static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si) -{ - int i; - - si->connection_create_requests64 += si->connection_create_requests; - si->connection_create_requests = 0; - si->connection_create_collisions64 += si->connection_create_collisions; - si->connection_create_collisions = 0; - si->connection_destroy_requests64 += si->connection_destroy_requests; - si->connection_destroy_requests = 0; - si->connection_destroy_misses64 += si->connection_destroy_misses; - si->connection_destroy_misses = 0; - si->connection_match_hash_hits64 += si->connection_match_hash_hits; - si->connection_match_hash_hits = 0; - si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; - si->connection_match_hash_reorders = 0; - si->connection_flushes64 += si->connection_flushes; - si->connection_flushes = 0; - si->packets_forwarded64 += si->packets_forwarded; - si->packets_forwarded = 0; - si->packets_not_forwarded64 += si->packets_not_forwarded; - si->packets_not_forwarded = 0; - - for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) { - si->exception_events64[i] += si->exception_events[i]; - si->exception_events[i] = 0; - } -} - -/* - * sfe_ipv4_insert_sfe_ipv4_connection_match() - * Insert a connection match into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, - struct sfe_ipv4_connection_match *cm) -{ - struct sfe_ipv4_connection_match **hash_head; - struct sfe_ipv4_connection_match *prev_head; - unsigned int conn_match_idx - = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - - hash_head = &si->conn_match_hash[conn_match_idx]; - prev_head = *hash_head; - cm->prev = NULL; - if (prev_head) { - prev_head->prev = cm; - } - - cm->next = prev_head; - *hash_head = cm; - -#ifdef CONFIG_NF_FLOW_COOKIE - if (!si->flow_cookie_enable) - return; - - /* - * Configure hardware to put a flow cookie in packet of this flow, - * then we can accelerate the lookup process when we received this packet. - */ - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { - flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) { - entry->match = cm; - cm->flow_cookie = conn_match_idx; - } - } - rcu_read_unlock(); - - break; - } - } -#endif -} - -/* - * sfe_ipv4_remove_sfe_ipv4_connection_match() - * Remove a connection match object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) -{ -#ifdef CONFIG_NF_FLOW_COOKIE - if (si->flow_cookie_enable) { - /* - * Tell hardware that we no longer need a flow cookie in packet of this flow - */ - unsigned int conn_match_idx; - - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if (cm == entry->match) { - flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port, 0); - } - rcu_read_unlock(); - - cm->flow_cookie = 0; - entry->match = NULL; - entry->last_clean_time = jiffies; - break; - } - } - } -#endif - - /* - * Unlink the connection match entry from the hash. - */ - if (cm->prev) { - cm->prev->next = cm->next; - } else { - unsigned int conn_match_idx - = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - si->conn_match_hash[conn_match_idx] = cm->next; - } - - if (cm->next) { - cm->next->prev = cm->prev; - } - - /* - * If the connection match entry is in the active list remove it. - */ - if (cm->active) { - if (likely(cm->active_prev)) { - cm->active_prev->active_next = cm->active_next; - } else { - si->active_head = cm->active_next; - } - - if (likely(cm->active_next)) { - cm->active_next->active_prev = cm->active_prev; - } else { - si->active_tail = cm->active_prev; - } - } -} - -/* - * sfe_ipv4_get_connection_hash() - * Generate the hash used in connection lookups. - */ -static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv4_find_sfe_ipv4_connection() - * Get the IPv4 connection info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol, - __be32 src_ip, __be16 src_port, - __be32 dest_ip, __be16 dest_port) -{ - struct sfe_ipv4_connection *c; - unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); - c = si->conn_hash[conn_idx]; - - /* - * If we don't have anything in this chain then bale. - */ - if (unlikely(!c)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((c->src_port == src_port) - && (c->dest_port == dest_port) - && (c->src_ip == src_ip) - && (c->dest_ip == dest_ip) - && (c->protocol == protocol)) { - return c; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain. - */ - do { - c = c->next; - } while (c && (c->src_port != src_port - || c->dest_port != dest_port - || c->src_ip != src_ip - || c->dest_ip != dest_ip - || c->protocol != protocol)); - - /* - * Will need connection entry for next create/destroy metadata, - * So no need to re-order entry for these requests - */ - return c; -} - -/* - * sfe_ipv4_mark_rule() - * Updates the mark for a current offloaded connection - * - * Will take hash lock upon entry - */ -void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - - spin_lock_bh(&si->lock); - c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol, - mark->src_ip.ip, mark->src_port, - mark->dest_ip.ip, mark->dest_port); - if (c) { - WARN_ON((0 != c->mark) && (0 == mark->mark)); - c->mark = mark->mark; - } - spin_unlock_bh(&si->lock); - - if (c) { - DEBUG_TRACE("Matching connection found for mark, " - "setting from %08x to %08x\n", - c->mark, mark->mark); - } -} - -/* - * sfe_ipv4_insert_sfe_ipv4_connection() - * Insert a connection into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) -{ - struct sfe_ipv4_connection **hash_head; - struct sfe_ipv4_connection *prev_head; - unsigned int conn_idx; - - /* - * Insert entry into the connection hash. - */ - conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - hash_head = &si->conn_hash[conn_idx]; - prev_head = *hash_head; - c->prev = NULL; - if (prev_head) { - prev_head->prev = c; - } - - c->next = prev_head; - *hash_head = c; - - /* - * Insert entry into the "all connections" list. - */ - if (si->all_connections_tail) { - c->all_connections_prev = si->all_connections_tail; - si->all_connections_tail->all_connections_next = c; - } else { - c->all_connections_prev = NULL; - si->all_connections_head = c; - } - - si->all_connections_tail = c; - c->all_connections_next = NULL; - si->num_connections++; - - /* - * Insert the connection match objects too. - */ - sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match); - sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match); -} - -/* - * sfe_ipv4_remove_sfe_ipv4_connection() - * Remove a sfe_ipv4_connection object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) -{ - /* - * Remove the connection match objects. - */ - sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match); - sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match); - - /* - * Unlink the connection. - */ - if (c->prev) { - c->prev->next = c->next; - } else { - unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - si->conn_hash[conn_idx] = c->next; - } - - if (c->next) { - c->next->prev = c->prev; - } - - /* - * Unlink connection from all_connections list - */ - if (c->all_connections_prev) { - c->all_connections_prev->all_connections_next = c->all_connections_next; - } else { - si->all_connections_head = c->all_connections_next; - } - - if (c->all_connections_next) { - c->all_connections_next->all_connections_prev = c->all_connections_prev; - } else { - si->all_connections_tail = c->all_connections_prev; - } - - si->num_connections--; -} - -/* - * sfe_ipv4_sync_sfe_ipv4_connection() - * Sync a connection. - * - * On entry to this function we expect that the lock for the connection is either - * already held or isn't required. - */ -static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, - struct sfe_connection_sync *sis, sfe_sync_reason_t reason, - u64 now_jiffies) -{ - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - - /* - * Fill in the update message. - */ - sis->is_v6 = 0; - sis->protocol = c->protocol; - sis->src_ip.ip = c->src_ip; - sis->src_ip_xlate.ip = c->src_ip_xlate; - sis->dest_ip.ip = c->dest_ip; - sis->dest_ip_xlate.ip = c->dest_ip_xlate; - sis->src_port = c->src_port; - sis->src_port_xlate = c->src_port_xlate; - sis->dest_port = c->dest_port; - sis->dest_port_xlate = c->dest_port_xlate; - - original_cm = c->original_match; - reply_cm = c->reply_match; - sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; - sis->src_td_end = original_cm->protocol_state.tcp.end; - sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; - sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; - sis->dest_td_end = reply_cm->protocol_state.tcp.end; - sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; - - sis->src_new_packet_count = original_cm->rx_packet_count; - sis->src_new_byte_count = original_cm->rx_byte_count; - sis->dest_new_packet_count = reply_cm->rx_packet_count; - sis->dest_new_byte_count = reply_cm->rx_byte_count; - - sfe_ipv4_connection_match_update_summary_stats(original_cm); - sfe_ipv4_connection_match_update_summary_stats(reply_cm); - - sis->src_dev = original_cm->match_dev; - sis->src_packet_count = original_cm->rx_packet_count64; - sis->src_byte_count = original_cm->rx_byte_count64; - - sis->dest_dev = reply_cm->match_dev; - sis->dest_packet_count = reply_cm->rx_packet_count64; - sis->dest_byte_count = reply_cm->rx_byte_count64; - - sis->reason = reason; - - /* - * Get the time increment since our last sync. - */ - sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; - c->last_sync_jiffies = now_jiffies; -} - -/* - * sfe_ipv4_flush_sfe_ipv4_connection() - * Flush a connection and free all associated resources. - * - * We need to be called with bottom halves disabled locally as we need to acquire - * the connection hash lock and release it again. In general we're actually called - * from within a BH and so we're fine, but we're also called when connections are - * torn down. - */ -static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, - struct sfe_ipv4_connection *c, - sfe_sync_reason_t reason) -{ - struct sfe_connection_sync sis; - u64 now_jiffies; - sfe_sync_rule_callback_t sync_rule_callback; - - rcu_read_lock(); - spin_lock_bh(&si->lock); - si->connection_flushes++; - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - spin_unlock_bh(&si->lock); - - if (sync_rule_callback) { - /* - * Generate a sync message and then sync. - */ - now_jiffies = get_jiffies_64(); - sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies); - sync_rule_callback(&sis); - } - - rcu_read_unlock(); - - /* - * Release our hold of the source and dest devices and free the memory - * for our connection objects. - */ - dev_put(c->original_dev); - dev_put(c->reply_dev); - kfree(c->original_match); - kfree(c->reply_match); - kfree(c); -} - -/* - * sfe_ipv4_recv_udp() - * Handle UDP packet receives and forwarding. - */ -static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv4_udp_hdr *udph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - u8 ttl; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for UDP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the UDP header cached though so allow more time for any prefetching. - */ - src_ip = iph->saddr; - dest_ip = iph->daddr; - - udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); - src_port = udph->source; - dest_port = udph->dest; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our TTL allow forwarding? - */ - ttl = iph->ttl; - if (unlikely(ttl < 2)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ttl too low\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely(len > cm->xmit_dev_mtu)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and udph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; - } - - /* - * Decrement our TTL. - */ - iph->ttl = ttl - 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 udp_csum; - - iph->saddr = cm->xlate_src_ip; - udph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum; - - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = udp_csum + cm->xlate_src_partial_csum_adjustment; - } else { - sum = udp_csum + cm->xlate_src_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 udp_csum; - - iph->daddr = cm->xlate_dest_ip; - udph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum; - - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = udp_csum + cm->xlate_dest_partial_csum_adjustment; - } else { - sum = udp_csum + cm->xlate_dest_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Replace the IP checksum. - */ - iph->check = sfe_ipv4_gen_ip_csum(iph); - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IP, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IP); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet. - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv4_process_tcp_option_sack() - * Parse TCP SACK option and update ack according - */ -static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs, - u32 *ack) -{ - u32 length = sizeof(struct sfe_ipv4_tcp_hdr); - u8 *ptr = (u8 *)th + length; - - /* - * Ignore processing if TCP packet has only TIMESTAMP option. - */ - if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) - && likely(ptr[0] == TCPOPT_NOP) - && likely(ptr[1] == TCPOPT_NOP) - && likely(ptr[2] == TCPOPT_TIMESTAMP) - && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { - return true; - } - - /* - * TCP options. Parse SACK option. - */ - while (length < data_offs) { - u8 size; - u8 kind; - - ptr = (u8 *)th + length; - kind = *ptr; - - /* - * NOP, for padding - * Not in the switch because to fast escape and to not calculate size - */ - if (kind == TCPOPT_NOP) { - length++; - continue; - } - - if (kind == TCPOPT_SACK) { - u32 sack = 0; - u8 re = 1 + 1; - - size = *(ptr + 1); - if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) - || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) - || (size > (data_offs - length))) { - return false; - } - - re += 4; - while (re < size) { - u32 sack_re; - u8 *sptr = ptr + re; - sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; - if (sack_re > sack) { - sack = sack_re; - } - re += TCPOLEN_SACK_PERBLOCK; - } - if (sack > *ack) { - *ack = sack; - } - length += size; - continue; - } - if (kind == TCPOPT_EOL) { - return true; - } - size = *(ptr + 1); - if (size < 2) { - return false; - } - length += size; - } - - return true; -} - -/* - * sfe_ipv4_recv_tcp() - * Handle TCP packet receives and forwarding. - */ -static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv4_tcp_hdr *tcph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *counter_cm; - u8 ttl; - u32 flags; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for TCP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the TCP header cached though so allow more time for any prefetching. - */ - src_ip = iph->saddr; - dest_ip = iph->daddr; - - tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); - src_port = tcph->source; - dest_port = tcph->dest; - flags = tcp_flag_word(tcph); - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - /* - * We didn't get a connection but as TCP is connection-oriented that - * may be because this is a non-fast connection (not running established). - * For diagnostic purposes we differentiate this here. - */ - if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - fast flags\n"); - return 0; - } - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - slow flags: 0x%x\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - /* - * Does our TTL allow forwarding? - */ - ttl = iph->ttl; - if (unlikely(ttl < 2)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ttl too low\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN - * set is not a fast path packet. - */ - if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP flags: 0x%x are not fast\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - counter_cm = cm->counter_match; - - /* - * Are we doing sequence number checking? - */ - if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { - u32 seq; - u32 ack; - u32 sack; - u32 data_offs; - u32 end; - u32 left_edge; - u32 scaled_win; - u32 max_end; - - /* - * Is our sequence fully past the right hand edge of the window? - */ - seq = ntohl(tcph->seq); - if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u exceeds right edge: %u\n", - seq, cm->protocol_state.tcp.max_end + 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't too short. - */ - data_offs = tcph->doff << 2; - if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Update ACK according to any SACK option. - */ - ack = ntohl(tcph->ack_seq); - sack = ack; - if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP option SACK size is wrong\n"); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't past the end of the packet. - */ - data_offs += sizeof(struct sfe_ipv4_ip_hdr); - if (unlikely(len < data_offs)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", - data_offs, len); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - end = seq + len - data_offs; - - /* - * Is our sequence fully before the left hand edge of the window? - */ - if (unlikely((s32)(end - (cm->protocol_state.tcp.end - - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u before left edge: %u\n", - end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Are we acking data that is to the right of what has been sent? - */ - if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u exceeds right edge: %u\n", - sack, counter_cm->protocol_state.tcp.end + 1); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Is our ack too far before the left hand edge of the window? - */ - left_edge = counter_cm->protocol_state.tcp.end - - cm->protocol_state.tcp.max_win - - SFE_IPV4_TCP_MAX_ACK_WINDOW - - 1; - if (unlikely((s32)(sack - left_edge) < 0)) { - struct sfe_ipv4_connection *c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Have we just seen the largest window size yet for this connection? If yes - * then we need to record the new value. - */ - scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; - scaled_win += (sack - ack); - if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { - cm->protocol_state.tcp.max_win = scaled_win; - } - - /* - * If our sequence and/or ack numbers have advanced then record the new state. - */ - if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { - cm->protocol_state.tcp.end = end; - } - - max_end = sack + scaled_win; - if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { - counter_cm->protocol_state.tcp.max_end = max_end; - } - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and tcph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; - } - - /* - * Decrement our TTL. - */ - iph->ttl = ttl - 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 tcp_csum; - u32 sum; - - iph->saddr = cm->xlate_src_ip; - tcph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = tcp_csum + cm->xlate_src_partial_csum_adjustment; - } else { - sum = tcp_csum + cm->xlate_src_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 tcp_csum; - u32 sum; - - iph->daddr = cm->xlate_dest_ip; - tcph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { - sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment; - } else { - sum = tcp_csum + cm->xlate_dest_csum_adjustment; - } - - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Replace the IP checksum. - */ - iph->check = sfe_ipv4_gen_ip_csum(iph); - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IP, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IP); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv4_recv_icmp() - * Handle ICMP packet receives. - * - * ICMP packets aren't handled as a "fast path" and always have us process them - * through the default Linux stack. What we do need to do is look for any errors - * about connections we are handling in the fast path. If we find any such - * connections then we want to flush their state so that the ICMP error path - * within Linux has all of the correct state should it need it. - */ -static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl) -{ - struct icmphdr *icmph; - struct sfe_ipv4_ip_hdr *icmp_iph; - unsigned int icmp_ihl_words; - unsigned int icmp_ihl; - u32 *icmp_trans_h; - struct sfe_ipv4_udp_hdr *icmp_udph; - struct sfe_ipv4_tcp_hdr *icmp_tcph; - __be32 src_ip; - __be32 dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection *c; - u32 pull_len = sizeof(struct icmphdr) + ihl; - - /* - * Is our packet too short to contain a valid ICMP header? - */ - len -= ihl; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for ICMP header\n"); - return 0; - } - - /* - * We only handle "destination unreachable" and "time exceeded" messages. - */ - icmph = (struct icmphdr *)(skb->data + ihl); - if ((icmph->type != ICMP_DEST_UNREACH) - && (icmph->type != ICMP_TIME_EXCEEDED)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type); - return 0; - } - - /* - * Do we have the full embedded IP header? - */ - len -= sizeof(struct icmphdr); - pull_len += sizeof(struct sfe_ipv4_ip_hdr); - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded IP header not complete\n"); - return 0; - } - - /* - * Is our embedded IP version wrong? - */ - icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1); - if (unlikely(icmp_iph->version != 4)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", icmp_iph->version); - return 0; - } - - /* - * Do we have the full embedded IP header, including any options? - */ - icmp_ihl_words = icmp_iph->ihl; - icmp_ihl = icmp_ihl_words << 2; - pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr); - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded header not large enough for IP options\n"); - return 0; - } - - len -= icmp_ihl; - icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words; - - /* - * Handle the embedded transport layer header. - */ - switch (icmp_iph->protocol) { - case IPPROTO_UDP: - /* - * We should have 8 bytes of UDP header - that's enough to identify - * the connection. - */ - pull_len += 8; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Incomplete embedded UDP header\n"); - return 0; - } - - icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h; - src_port = icmp_udph->source; - dest_port = icmp_udph->dest; - break; - - case IPPROTO_TCP: - /* - * We should have 8 bytes of TCP header - that's enough to identify - * the connection. - */ - pull_len += 8; - if (!pskb_may_pull(skb, pull_len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Incomplete embedded TCP header\n"); - return 0; - } - - icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h; - src_port = icmp_tcph->source; - dest_port = icmp_tcph->dest; - break; - - default: - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol); - return 0; - } - - src_ip = icmp_iph->saddr; - dest_ip = icmp_iph->daddr; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. Note that we reverse the source and destination - * here because our embedded message contains a packet that was sent in the - * opposite direction to the one in which we just received it. It will have - * been sent on the interface from which we received it though so that's still - * ok to use. - */ - cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port); - if (unlikely(!cm)) { - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * We found a connection so now remove it from the connection list and flush - * its state. - */ - c = cm->connection; - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; -} - -/* - * sfe_ipv4_recv() - * Handle packet receives and forwaring. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb) -{ - struct sfe_ipv4 *si = &__si; - unsigned int len; - unsigned int tot_len; - unsigned int frag_off; - unsigned int ihl; - bool flush_on_find; - bool ip_options; - struct sfe_ipv4_ip_hdr *iph; - u32 protocol; - - /* - * Check that we have space for an IP header here. - */ - len = skb->len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short\n", len); - return 0; - } - - /* - * Check that our "total length" is large enough for an IP header. - */ - iph = (struct sfe_ipv4_ip_hdr *)skb->data; - tot_len = ntohs(iph->tot_len); - if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("tot_len: %u is too short\n", tot_len); - return 0; - } - - /* - * Is our IP version wrong? - */ - if (unlikely(iph->version != 4)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", iph->version); - return 0; - } - - /* - * Does our datagram fit inside the skb? - */ - if (unlikely(tot_len > len)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len); - return 0; - } - - /* - * Do we have a non-initial fragment? - */ - frag_off = ntohs(iph->frag_off); - if (unlikely(frag_off & IP_OFFSET)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - - /* - * If we have a (first) fragment then mark it to cause any connection to flush. - */ - flush_on_find = unlikely(frag_off & IP_MF) ? true : false; - - /* - * Do we have any IP options? That's definite a slow path! If we do have IP - * options we need to recheck our header size. - */ - ihl = iph->ihl << 2; - ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false; - if (unlikely(ip_options)) { - if (unlikely(len < ihl)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl); - return 0; - } - - flush_on_find = true; - } - - protocol = iph->protocol; - if (IPPROTO_UDP == protocol) { - return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_TCP == protocol) { - return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_ICMP == protocol) { - return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl); - } - - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol); - return 0; -} - -static void -sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c, - struct sfe_connection_create *sic) -{ - struct sfe_ipv4_connection_match *orig_cm; - struct sfe_ipv4_connection_match *repl_cm; - struct sfe_ipv4_tcp_connection_match *orig_tcp; - struct sfe_ipv4_tcp_connection_match *repl_tcp; - - orig_cm = c->original_match; - repl_cm = c->reply_match; - orig_tcp = &orig_cm->protocol_state.tcp; - repl_tcp = &repl_cm->protocol_state.tcp; - - /* update orig */ - if (orig_tcp->max_win < sic->src_td_max_window) { - orig_tcp->max_win = sic->src_td_max_window; - } - if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { - orig_tcp->end = sic->src_td_end; - } - if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { - orig_tcp->max_end = sic->src_td_max_end; - } - - /* update reply */ - if (repl_tcp->max_win < sic->dest_td_max_window) { - repl_tcp->max_win = sic->dest_td_max_window; - } - if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { - repl_tcp->end = sic->dest_td_end; - } - if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { - repl_tcp->max_end = sic->dest_td_max_end; - } - - /* update match flags */ - orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } -} - -static void -sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c, - struct sfe_connection_create *sic) -{ - switch (sic->protocol) { - case IPPROTO_TCP: - sfe_ipv4_update_tcp_state(c, sic); - break; - } -} - -void sfe_ipv4_update_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv4_connection *c; - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - - c = sfe_ipv4_find_sfe_ipv4_connection(si, - sic->protocol, - sic->src_ip.ip, - sic->src_port, - sic->dest_ip.ip, - sic->dest_port); - if (c != NULL) { - sfe_ipv4_update_protocol_state(c, sic); - } - - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv4_create_rule() - * Create a forwarding rule. - */ -int sfe_ipv4_create_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - struct net_device *dest_dev; - struct net_device *src_dev; - - dest_dev = sic->dest_dev; - src_dev = sic->src_dev; - - if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || - (src_dev->reg_state != NETREG_REGISTERED))) { - return -EINVAL; - } - - spin_lock_bh(&si->lock); - si->connection_create_requests++; - - /* - * Check to see if there is already a flow that matches the rule we're - * trying to create. If there is then we can't create a new one. - */ - c = sfe_ipv4_find_sfe_ipv4_connection(si, - sic->protocol, - sic->src_ip.ip, - sic->src_port, - sic->dest_ip.ip, - sic->dest_port); - if (c != NULL) { - si->connection_create_collisions++; - - /* - * If we already have the flow then it's likely that this - * request to create the connection rule contains more - * up-to-date information. Check and update accordingly. - */ - sfe_ipv4_update_protocol_state(c, sic); - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" - " s: %s:%pxM:%pI4:%u, d: %s:%pxM:%pI4:%u\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port), - sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port)); - return -EADDRINUSE; - } - - /* - * Allocate the various connection tracking objects. - */ - c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC); - if (unlikely(!c)) { - spin_unlock_bh(&si->lock); - return -ENOMEM; - } - - original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); - if (unlikely(!original_cm)) { - spin_unlock_bh(&si->lock); - kfree(c); - return -ENOMEM; - } - - reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); - if (unlikely(!reply_cm)) { - spin_unlock_bh(&si->lock); - kfree(original_cm); - kfree(c); - return -ENOMEM; - } - - /* - * Fill in the "original" direction connection matching object. - * Note that the transmit MAC address is "dest_mac_xlate" because - * we always know both ends of a connection by their translated - * addresses and not their public addresses. - */ - original_cm->match_dev = src_dev; - original_cm->match_protocol = sic->protocol; - original_cm->match_src_ip = sic->src_ip.ip; - original_cm->match_src_port = sic->src_port; - original_cm->match_dest_ip = sic->dest_ip.ip; - original_cm->match_dest_port = sic->dest_port; - original_cm->xlate_src_ip = sic->src_ip_xlate.ip; - original_cm->xlate_src_port = sic->src_port_xlate; - original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip; - original_cm->xlate_dest_port = sic->dest_port_xlate; - original_cm->rx_packet_count = 0; - original_cm->rx_packet_count64 = 0; - original_cm->rx_byte_count = 0; - original_cm->rx_byte_count64 = 0; - original_cm->xmit_dev = dest_dev; - original_cm->xmit_dev_mtu = sic->dest_mtu; - memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); - memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); - original_cm->connection = c; - original_cm->counter_match = reply_cm; - original_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - original_cm->priority = sic->src_priority; - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT; - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - original_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - original_cm->flow_accel = sic->original_accel; -#endif - original_cm->active_next = NULL; - original_cm->active_prev = NULL; - original_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(dest_dev->flags & IFF_POINTOPOINT)) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (dest_dev->header_ops) { - if (dest_dev->header_ops->create == eth_header) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - /* - * Fill in the "reply" direction connection matching object. - */ - reply_cm->match_dev = dest_dev; - reply_cm->match_protocol = sic->protocol; - reply_cm->match_src_ip = sic->dest_ip_xlate.ip; - reply_cm->match_src_port = sic->dest_port_xlate; - reply_cm->match_dest_ip = sic->src_ip_xlate.ip; - reply_cm->match_dest_port = sic->src_port_xlate; - reply_cm->xlate_src_ip = sic->dest_ip.ip; - reply_cm->xlate_src_port = sic->dest_port; - reply_cm->xlate_dest_ip = sic->src_ip.ip; - reply_cm->xlate_dest_port = sic->src_port; - reply_cm->rx_packet_count = 0; - reply_cm->rx_packet_count64 = 0; - reply_cm->rx_byte_count = 0; - reply_cm->rx_byte_count64 = 0; - reply_cm->xmit_dev = src_dev; - reply_cm->xmit_dev_mtu = sic->src_mtu; - memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); - memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); - reply_cm->connection = c; - reply_cm->counter_match = original_cm; - reply_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - reply_cm->priority = sic->dest_priority; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - reply_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - reply_cm->flow_accel = sic->reply_accel; -#endif - reply_cm->active_next = NULL; - reply_cm->active_prev = NULL; - reply_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(src_dev->flags & IFF_POINTOPOINT)) { - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (src_dev->header_ops) { - if (src_dev->header_ops->create == eth_header) { - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - - if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; - } - - if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; - } - - c->protocol = sic->protocol; - c->src_ip = sic->src_ip.ip; - c->src_ip_xlate = sic->src_ip_xlate.ip; - c->src_port = sic->src_port; - c->src_port_xlate = sic->src_port_xlate; - c->original_dev = src_dev; - c->original_match = original_cm; - c->dest_ip = sic->dest_ip.ip; - c->dest_ip_xlate = sic->dest_ip_xlate.ip; - c->dest_port = sic->dest_port; - c->dest_port_xlate = sic->dest_port_xlate; - c->reply_dev = dest_dev; - c->reply_match = reply_cm; - c->mark = sic->mark; - c->debug_read_seq = 0; - c->last_sync_jiffies = get_jiffies_64(); - - /* - * Take hold of our source and dest devices for the duration of the connection. - */ - dev_hold(c->original_dev); - dev_hold(c->reply_dev); - - /* - * Initialize the protocol-specific information that we track. - */ - switch (sic->protocol) { - case IPPROTO_TCP: - original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; - original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; - original_cm->protocol_state.tcp.end = sic->src_td_end; - original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; - reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; - reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; - reply_cm->protocol_state.tcp.end = sic->dest_td_end; - reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } - break; - } - - sfe_ipv4_connection_match_compute_translations(original_cm); - sfe_ipv4_connection_match_compute_translations(reply_cm); - sfe_ipv4_insert_sfe_ipv4_connection(si, c); - - spin_unlock_bh(&si->lock); - - /* - * We have everything we need! - */ - DEBUG_INFO("new connection - mark: %08x, p: %d\n" - " s: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n" - " d: %s:%pxM(%pxM):%pI4(%pI4):%u(%u)\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, - &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate), - dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, - &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); - - return 0; -} - -/* - * sfe_ipv4_destroy_rule() - * Destroy a forwarding rule. - */ -void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - - spin_lock_bh(&si->lock); - si->connection_destroy_requests++; - - /* - * Check to see if we have a flow that matches the rule we're trying - * to destroy. If there isn't then we can't destroy it. - */ - c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port, - sid->dest_ip.ip, sid->dest_port); - if (!c) { - si->connection_destroy_misses++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n", - sid->protocol, &sid->src_ip, ntohs(sid->src_port), - &sid->dest_ip, ntohs(sid->dest_port)); - return; - } - - /* - * Remove our connection details from the hash tables. - */ - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - spin_unlock_bh(&si->lock); - - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); - - DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n", - sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port), - &sid->dest_ip.ip, ntohs(sid->dest_port)); -} - -/* - * sfe_ipv4_register_sync_rule_callback() - * Register a callback for rule synchronization. - */ -void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) -{ - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv4_get_debug_dev() - */ -static ssize_t sfe_ipv4_get_debug_dev(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv4 *si = &__si; - ssize_t count; - int num; - - spin_lock_bh(&si->lock); - num = si->debug_dev; - spin_unlock_bh(&si->lock); - - count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); - return count; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv4_debug_dev_attr = - __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL); - -/* - * sfe_ipv4_destroy_all_rules_for_dev() - * Destroy all connections that match a particular device. - * - * If we pass dev as NULL then this destroys all connections. - */ -void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev) -{ - struct sfe_ipv4 *si = &__si; - struct sfe_ipv4_connection *c; - -another_round: - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - /* - * Does this connection relate to the device we are destroying? - */ - if (!dev - || (dev == c->original_dev) - || (dev == c->reply_dev)) { - break; - } - } - - if (c) { - sfe_ipv4_remove_sfe_ipv4_connection(si, c); - } - - spin_unlock_bh(&si->lock); - - if (c) { - sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); - goto another_round; - } -} - -/* - * sfe_ipv4_periodic_sync() - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) -static void sfe_ipv4_periodic_sync(unsigned long arg) -#else -static void sfe_ipv4_periodic_sync(struct timer_list *tl) -#endif -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg; -#else - struct sfe_ipv4 *si = from_timer(si, tl, timer); -#endif - u64 now_jiffies; - int quota; - sfe_sync_rule_callback_t sync_rule_callback; - - now_jiffies = get_jiffies_64(); - - rcu_read_lock(); - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - if (!sync_rule_callback) { - rcu_read_unlock(); - goto done; - } - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - /* - * Get an estimate of the number of connections to parse in this sync. - */ - quota = (si->num_connections + 63) / 64; - - /* - * Walk the "active" list and sync the connection state. - */ - while (quota--) { - struct sfe_ipv4_connection_match *cm; - struct sfe_ipv4_connection_match *counter_cm; - struct sfe_ipv4_connection *c; - struct sfe_connection_sync sis; - - cm = si->active_head; - if (!cm) { - break; - } - - /* - * There's a possibility that our counter match is in the active list too. - * If it is then remove it. - */ - counter_cm = cm->counter_match; - if (counter_cm->active) { - counter_cm->active = false; - - /* - * We must have a connection preceding this counter match - * because that's the one that got us to this point, so we don't have - * to worry about removing the head of the list. - */ - counter_cm->active_prev->active_next = counter_cm->active_next; - - if (likely(counter_cm->active_next)) { - counter_cm->active_next->active_prev = counter_cm->active_prev; - } else { - si->active_tail = counter_cm->active_prev; - } - - counter_cm->active_next = NULL; - counter_cm->active_prev = NULL; - } - - /* - * Now remove the head of the active scan list. - */ - cm->active = false; - si->active_head = cm->active_next; - if (likely(cm->active_next)) { - cm->active_next->active_prev = NULL; - } else { - si->active_tail = NULL; - } - cm->active_next = NULL; - - /* - * Sync the connection state. - */ - c = cm->connection; - sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); - - /* - * We don't want to be holding the lock when we sync! - */ - spin_unlock_bh(&si->lock); - sync_rule_callback(&sis); - spin_lock_bh(&si->lock); - } - - spin_unlock_bh(&si->lock); - rcu_read_unlock(); - -done: - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); -} - -#define CHAR_DEV_MSG_SIZE 768 - -/* - * sfe_ipv4_debug_dev_read_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - si->debug_read_seq++; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_connection() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - struct sfe_ipv4_connection *c; - struct sfe_ipv4_connection_match *original_cm; - struct sfe_ipv4_connection_match *reply_cm; - int bytes_read; - int protocol; - struct net_device *src_dev; - __be32 src_ip; - __be32 src_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - u64 src_rx_packets; - u64 src_rx_bytes; - struct net_device *dest_dev; - __be32 dest_ip; - __be32 dest_ip_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u64 dest_rx_packets; - u64 dest_rx_bytes; - u64 last_sync_jiffies; - u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; -#ifdef CONFIG_NF_FLOW_COOKIE - int src_flow_cookie, dst_flow_cookie; -#endif - - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - if (c->debug_read_seq < si->debug_read_seq) { - c->debug_read_seq = si->debug_read_seq; - break; - } - } - - /* - * If there were no connections then move to the next state. - */ - if (!c) { - spin_unlock_bh(&si->lock); - ws->state++; - return true; - } - - original_cm = c->original_match; - reply_cm = c->reply_match; - - protocol = c->protocol; - src_dev = c->original_dev; - src_ip = c->src_ip; - src_ip_xlate = c->src_ip_xlate; - src_port = c->src_port; - src_port_xlate = c->src_port_xlate; - src_priority = original_cm->priority; - src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT; - - sfe_ipv4_connection_match_update_summary_stats(original_cm); - sfe_ipv4_connection_match_update_summary_stats(reply_cm); - - src_rx_packets = original_cm->rx_packet_count64; - src_rx_bytes = original_cm->rx_byte_count64; - dest_dev = c->reply_dev; - dest_ip = c->dest_ip; - dest_ip_xlate = c->dest_ip_xlate; - dest_port = c->dest_port; - dest_port_xlate = c->dest_port_xlate; - dest_priority = reply_cm->priority; - dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT; - dest_rx_packets = reply_cm->rx_packet_count64; - dest_rx_bytes = reply_cm->rx_byte_count64; - last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; - mark = c->mark; -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie = original_cm->flow_cookie; - dst_flow_cookie = reply_cm->flow_cookie; -#endif - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", - protocol, - src_dev->name, - &src_ip, &src_ip_xlate, - ntohs(src_port), ntohs(src_port_xlate), - src_priority, src_dscp, - src_rx_packets, src_rx_bytes, - dest_dev->name, - &dest_ip, &dest_ip_xlate, - ntohs(dest_port), ntohs(dest_port_xlate), - dest_priority, dest_dscp, - dest_rx_packets, dest_rx_bytes, -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie, dst_flow_cookie, -#endif - last_sync_jiffies, mark); - - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - return true; -} - -/* - * sfe_ipv4_debug_dev_read_connections_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_start() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_exception() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - u64 ct; - - spin_lock_bh(&si->lock); - ct = si->exception_events64[ws->iter_exception]; - spin_unlock_bh(&si->lock); - - if (ct) { - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, - "\t\t\n", - sfe_ipv4_exception_events_string[ws->iter_exception], - ct); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - } - - ws->iter_exception++; - if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) { - ws->iter_exception = 0; - ws->state++; - } - - return true; -} - -/* - * sfe_ipv4_debug_dev_read_exceptions_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_stats() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - unsigned int num_connections; - u64 packets_forwarded; - u64 packets_not_forwarded; - u64 connection_create_requests; - u64 connection_create_collisions; - u64 connection_destroy_requests; - u64 connection_destroy_misses; - u64 connection_flushes; - u64 connection_match_hash_hits; - u64 connection_match_hash_reorders; - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - num_connections = si->num_connections; - packets_forwarded = si->packets_forwarded64; - packets_not_forwarded = si->packets_not_forwarded64; - connection_create_requests = si->connection_create_requests64; - connection_create_collisions = si->connection_create_collisions64; - connection_destroy_requests = si->connection_destroy_requests64; - connection_destroy_misses = si->connection_destroy_misses64; - connection_flushes = si->connection_flushes64; - connection_match_hash_hits = si->connection_match_hash_hits64; - connection_match_hash_reorders = si->connection_match_hash_reorders64; - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", - num_connections, - packets_forwarded, - packets_not_forwarded, - connection_create_requests, - connection_create_collisions, - connection_destroy_requests, - connection_destroy_misses, - connection_flushes, - connection_match_hash_hits, - connection_match_hash_reorders); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv4_debug_dev_read_end() - * Generate part of the XML output. - */ -static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * Array of write functions that write various XML elements that correspond to - * our XML output state machine. - */ -static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = { - sfe_ipv4_debug_dev_read_start, - sfe_ipv4_debug_dev_read_connections_start, - sfe_ipv4_debug_dev_read_connections_connection, - sfe_ipv4_debug_dev_read_connections_end, - sfe_ipv4_debug_dev_read_exceptions_start, - sfe_ipv4_debug_dev_read_exceptions_exception, - sfe_ipv4_debug_dev_read_exceptions_end, - sfe_ipv4_debug_dev_read_stats, - sfe_ipv4_debug_dev_read_end, -}; - -/* - * sfe_ipv4_debug_dev_read() - * Send info to userspace upon read request from user - */ -static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) -{ - char msg[CHAR_DEV_MSG_SIZE]; - int total_read = 0; - struct sfe_ipv4_debug_xml_write_state *ws; - struct sfe_ipv4 *si = &__si; - - ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data; - while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { - if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { - continue; - } - } - - return total_read; -} - -/* - * sfe_ipv4_debug_dev_write() - * Write to char device resets some stats - */ -static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) -{ - struct sfe_ipv4 *si = &__si; - - spin_lock_bh(&si->lock); - sfe_ipv4_update_summary_stats(si); - - si->packets_forwarded64 = 0; - si->packets_not_forwarded64 = 0; - si->connection_create_requests64 = 0; - si->connection_create_collisions64 = 0; - si->connection_destroy_requests64 = 0; - si->connection_destroy_misses64 = 0; - si->connection_flushes64 = 0; - si->connection_match_hash_hits64 = 0; - si->connection_match_hash_reorders64 = 0; - spin_unlock_bh(&si->lock); - - return length; -} - -/* - * sfe_ipv4_debug_dev_open() - */ -static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file) -{ - struct sfe_ipv4_debug_xml_write_state *ws; - - ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; - if (!ws) { - ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL); - if (!ws) { - return -ENOMEM; - } - - ws->state = SFE_IPV4_DEBUG_XML_STATE_START; - file->private_data = ws; - } - - return 0; -} - -/* - * sfe_ipv4_debug_dev_release() - */ -static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file) -{ - struct sfe_ipv4_debug_xml_write_state *ws; - - ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; - if (ws) { - /* - * We've finished with our output so free the write state. - */ - kfree(ws); - } - - return 0; -} - -/* - * File operations used in the debug char device - */ -static struct file_operations sfe_ipv4_debug_dev_fops = { - .read = sfe_ipv4_debug_dev_read, - .write = sfe_ipv4_debug_dev_write, - .open = sfe_ipv4_debug_dev_open, - .release = sfe_ipv4_debug_dev_release -}; - -#ifdef CONFIG_NF_FLOW_COOKIE -/* - * sfe_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb) -{ - struct sfe_ipv4 *si = &__si; - - BUG_ON(!cb); - - if (si->flow_cookie_set_func) { - return -1; - } - - rcu_assign_pointer(si->flow_cookie_set_func, cb); - return 0; -} - -/* - * sfe_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb) -{ - struct sfe_ipv4 *si = &__si; - - RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); - return 0; -} - -/* - * sfe_ipv4_get_flow_cookie() - */ -static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv4 *si = &__si; - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); -} - -/* - * sfe_ipv4_set_flow_cookie() - */ -static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct sfe_ipv4 *si = &__si; - strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); - - return size; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv4_flow_cookie_attr = - __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie); -#endif /*CONFIG_NF_FLOW_COOKIE*/ - -/* - * sfe_ipv4_init() - */ -static int __init sfe_ipv4_init(void) -{ - struct sfe_ipv4 *si = &__si; - int result = -1; - - DEBUG_INFO("SFE IPv4 init\n"); - - /* - * Create sys/sfe_ipv4 - */ - si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL); - if (!si->sys_sfe_ipv4) { - DEBUG_ERROR("failed to register sfe_ipv4\n"); - goto exit1; - } - - /* - * Create files, one for each parameter supported by this module. - */ - result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev file: %d\n", result); - goto exit2; - } - -#ifdef CONFIG_NF_FLOW_COOKIE - result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); - if (result) { - DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); - goto exit3; - } -#endif /* CONFIG_NF_FLOW_COOKIE */ - - /* - * Register our debug char device. - */ - result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops); - if (result < 0) { - DEBUG_ERROR("Failed to register chrdev: %d\n", result); - goto exit4; - } - - si->debug_dev = result; - - /* - * Create a timer to handle periodic statistics. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si); -#else - timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0); -#endif - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); - - spin_lock_init(&si->lock); - - return 0; - -exit4: -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); - -exit3: -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - -exit2: - kobject_put(si->sys_sfe_ipv4); - -exit1: - return result; -} - -/* - * sfe_ipv4_exit() - */ -static void __exit sfe_ipv4_exit(void) -{ - struct sfe_ipv4 *si = &__si; - - DEBUG_INFO("SFE IPv4 exit\n"); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - - del_timer_sync(&si->timer); - - unregister_chrdev(si->debug_dev, "sfe_ipv4"); - -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); - - kobject_put(si->sys_sfe_ipv4); - -} - -module_init(sfe_ipv4_init) -module_exit(sfe_ipv4_exit) - -EXPORT_SYMBOL(sfe_ipv4_recv); -EXPORT_SYMBOL(sfe_ipv4_create_rule); -EXPORT_SYMBOL(sfe_ipv4_destroy_rule); -EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev); -EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback); -EXPORT_SYMBOL(sfe_ipv4_mark_rule); -EXPORT_SYMBOL(sfe_ipv4_update_rule); -#ifdef CONFIG_NF_FLOW_COOKIE -EXPORT_SYMBOL(sfe_register_flow_cookie_cb); -EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb); -#endif - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/shortcut-fe/sfe_ipv6.c b/shortcut-fe/shortcut-fe/sfe_ipv6.c deleted file mode 100644 index a7cb811a9..000000000 --- a/shortcut-fe/shortcut-fe/sfe_ipv6.c +++ /dev/null @@ -1,3617 +0,0 @@ -/* - * sfe_ipv6.c - * Shortcut forwarding engine - IPv6 support. - * - * Copyright (c) 2015-2016, 2019-2020 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sfe.h" -#include "sfe_cm.h" - -/* - * By default Linux IP header and transport layer header structures are - * unpacked, assuming that such headers should be 32-bit aligned. - * Unfortunately some wireless adaptors can't cope with this requirement and - * some CPUs can't handle misaligned accesses. For those platforms we - * define SFE_IPV6_UNALIGNED_IP_HEADER and mark the structures as packed. - * When we do this the compiler will generate slightly worse code than for the - * aligned case (on most platforms) but will be much quicker than fixing - * things up in an unaligned trap handler. - */ -#define SFE_IPV6_UNALIGNED_IP_HEADER 1 -#if SFE_IPV6_UNALIGNED_IP_HEADER -#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed)) -#else -#define SFE_IPV6_UNALIGNED_STRUCT -#endif - -#define CHAR_DEV_MSG_SIZE 768 - -/* - * An Ethernet header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_eth_hdr { - __be16 h_dest[ETH_ALEN / 2]; - __be16 h_source[ETH_ALEN / 2]; - __be16 h_proto; -} SFE_IPV6_UNALIGNED_STRUCT; - -#define SFE_IPV6_DSCP_MASK 0xf03f -#define SFE_IPV6_DSCP_SHIFT 2 - -/* - * An IPv6 header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_ip_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 priority:4, - version:4; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u8 version:4, - priority:4; -#else -#error "Please fix " -#endif - __u8 flow_lbl[3]; - __be16 payload_len; - __u8 nexthdr; - __u8 hop_limit; - struct sfe_ipv6_addr saddr; - struct sfe_ipv6_addr daddr; - - /* - * The extension header start here. - */ -} SFE_IPV6_UNALIGNED_STRUCT; - -#define SFE_IPV6_EXT_HDR_HOP 0 -#define SFE_IPV6_EXT_HDR_ROUTING 43 -#define SFE_IPV6_EXT_HDR_FRAG 44 -#define SFE_IPV6_EXT_HDR_ESP 50 -#define SFE_IPV6_EXT_HDR_AH 51 -#define SFE_IPV6_EXT_HDR_NONE 59 -#define SFE_IPV6_EXT_HDR_DST 60 -#define SFE_IPV6_EXT_HDR_MH 135 - -/* - * fragmentation header - */ - -struct sfe_ipv6_frag_hdr { - __u8 nexthdr; - __u8 reserved; - __be16 frag_off; - __be32 identification; -}; - -#define SFE_IPV6_FRAG_OFFSET 0xfff8 - -/* - * generic IPv6 extension header - */ -struct sfe_ipv6_ext_hdr { - __u8 next_hdr; - __u8 hdr_len; - __u8 padding[6]; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * A UDP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_udp_hdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * A TCP header, but with an optional "packed" attribute to - * help with performance on some platforms (see the definition of - * SFE_IPV6_UNALIGNED_STRUCT) - */ -struct sfe_ipv6_tcp_hdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -} SFE_IPV6_UNALIGNED_STRUCT; - -/* - * Specifies the lower bound on ACK numbers carried in the TCP header - */ -#define SFE_IPV6_TCP_MAX_ACK_WINDOW 65520 - -/* - * IPv6 TCP connection match additional data. - */ -struct sfe_ipv6_tcp_connection_match { - u8 win_scale; /* Window scale */ - u32 max_win; /* Maximum window size seen */ - u32 end; /* Sequence number of the next byte to send (seq + segment length) */ - u32 max_end; /* Sequence number of the last byte to ack */ -}; - -/* - * Bit flags for IPv6 connection matching entry. - */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) - /* Perform source translation */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) - /* Perform destination translation */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) - /* Ignore TCP sequence numbers */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) - /* Fast Ethernet header write */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) - /* Fast Ethernet header write */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) - /* remark priority of SKB */ -#define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) - /* remark DSCP of packet */ - -/* - * IPv6 connection matching structure. - */ -struct sfe_ipv6_connection_match { - /* - * References to other objects. - */ - struct sfe_ipv6_connection_match *next; - struct sfe_ipv6_connection_match *prev; - struct sfe_ipv6_connection *connection; - struct sfe_ipv6_connection_match *counter_match; - /* Matches the flow in the opposite direction as the one in connection */ - struct sfe_ipv6_connection_match *active_next; - struct sfe_ipv6_connection_match *active_prev; - bool active; /* Flag to indicate if we're on the active list */ - - /* - * Characteristics that identify flows that match this rule. - */ - struct net_device *match_dev; /* Network device */ - u8 match_protocol; /* Protocol */ - struct sfe_ipv6_addr match_src_ip[1]; /* Source IP address */ - struct sfe_ipv6_addr match_dest_ip[1]; /* Destination IP address */ - __be16 match_src_port; /* Source port/connection ident */ - __be16 match_dest_port; /* Destination port/connection ident */ - - /* - * Control the operations of the match. - */ - u32 flags; /* Bit flags */ -#ifdef CONFIG_NF_FLOW_COOKIE - u32 flow_cookie; /* used flow cookie, for debug */ -#endif -#ifdef CONFIG_XFRM - u32 flow_accel; /* The flow accelerated or not */ -#endif - - /* - * Connection state that we track once we match. - */ - union { /* Protocol-specific state */ - struct sfe_ipv6_tcp_connection_match tcp; - } protocol_state; - /* - * Stats recorded in a sync period. These stats will be added to - * rx_packet_count64/rx_byte_count64 after a sync period. - */ - u32 rx_packet_count; - u32 rx_byte_count; - - /* - * Packet translation information. - */ - struct sfe_ipv6_addr xlate_src_ip[1]; /* Address after source translation */ - __be16 xlate_src_port; /* Port/connection ident after source translation */ - u16 xlate_src_csum_adjustment; - /* Transport layer checksum adjustment after source translation */ - struct sfe_ipv6_addr xlate_dest_ip[1]; /* Address after destination translation */ - __be16 xlate_dest_port; /* Port/connection ident after destination translation */ - u16 xlate_dest_csum_adjustment; - /* Transport layer checksum adjustment after destination translation */ - - /* - * QoS information - */ - u32 priority; - u32 dscp; - - /* - * Packet transmit information. - */ - struct net_device *xmit_dev; /* Network device on which to transmit */ - unsigned short int xmit_dev_mtu; - /* Interface MTU */ - u16 xmit_dest_mac[ETH_ALEN / 2]; - /* Destination MAC address to use when forwarding */ - u16 xmit_src_mac[ETH_ALEN / 2]; - /* Source MAC address to use when forwarding */ - - /* - * Summary stats. - */ - u64 rx_packet_count64; - u64 rx_byte_count64; -}; - -/* - * Per-connection data structure. - */ -struct sfe_ipv6_connection { - struct sfe_ipv6_connection *next; - /* Pointer to the next entry in a hash chain */ - struct sfe_ipv6_connection *prev; - /* Pointer to the previous entry in a hash chain */ - int protocol; /* IP protocol number */ - struct sfe_ipv6_addr src_ip[1]; /* Src IP addr pre-translation */ - struct sfe_ipv6_addr src_ip_xlate[1]; /* Src IP addr post-translation */ - struct sfe_ipv6_addr dest_ip[1]; /* Dest IP addr pre-translation */ - struct sfe_ipv6_addr dest_ip_xlate[1]; /* Dest IP addr post-translation */ - __be16 src_port; /* Src port pre-translation */ - __be16 src_port_xlate; /* Src port post-translation */ - __be16 dest_port; /* Dest port pre-translation */ - __be16 dest_port_xlate; /* Dest port post-translation */ - struct sfe_ipv6_connection_match *original_match; - /* Original direction matching structure */ - struct net_device *original_dev; - /* Original direction source device */ - struct sfe_ipv6_connection_match *reply_match; - /* Reply direction matching structure */ - struct net_device *reply_dev; /* Reply direction source device */ - u64 last_sync_jiffies; /* Jiffies count for the last sync */ - struct sfe_ipv6_connection *all_connections_next; - /* Pointer to the next entry in the list of all connections */ - struct sfe_ipv6_connection *all_connections_prev; - /* Pointer to the previous entry in the list of all connections */ - u32 mark; /* mark for outgoing packet */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * IPv6 connections and hash table size information. - */ -#define SFE_IPV6_CONNECTION_HASH_SHIFT 12 -#define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT) -#define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1) - -#ifdef CONFIG_NF_FLOW_COOKIE -#define SFE_FLOW_COOKIE_SIZE 2048 -#define SFE_FLOW_COOKIE_MASK 0x7ff - -struct sfe_ipv6_flow_cookie_entry { - struct sfe_ipv6_connection_match *match; - unsigned long last_clean_time; -}; -#endif - -enum sfe_ipv6_exception_events { - SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL, - SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, - SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL, - SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, - SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS, - SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, - SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK, - SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, - SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_IP_OPTIONS_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UDP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_TCP_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL, - SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, - SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, - SFE_IPV6_EXCEPTION_EVENT_NON_V6, - SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, - SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, - SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, - SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL, - SFE_IPV6_EXCEPTION_EVENT_LAST -}; - -static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = { - "UDP_HEADER_INCOMPLETE", - "UDP_NO_CONNECTION", - "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "UDP_SMALL_TTL", - "UDP_NEEDS_FRAGMENTATION", - "TCP_HEADER_INCOMPLETE", - "TCP_NO_CONNECTION_SLOW_FLAGS", - "TCP_NO_CONNECTION_FAST_FLAGS", - "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", - "TCP_SMALL_TTL", - "TCP_NEEDS_FRAGMENTATION", - "TCP_FLAGS", - "TCP_SEQ_EXCEEDS_RIGHT_EDGE", - "TCP_SMALL_DATA_OFFS", - "TCP_BAD_SACK", - "TCP_BIG_DATA_OFFS", - "TCP_SEQ_BEFORE_LEFT_EDGE", - "TCP_ACK_EXCEEDS_RIGHT_EDGE", - "TCP_ACK_BEFORE_LEFT_EDGE", - "ICMP_HEADER_INCOMPLETE", - "ICMP_UNHANDLED_TYPE", - "ICMP_IPV6_HEADER_INCOMPLETE", - "ICMP_IPV6_NON_V6", - "ICMP_IPV6_IP_OPTIONS_INCOMPLETE", - "ICMP_IPV6_UDP_HEADER_INCOMPLETE", - "ICMP_IPV6_TCP_HEADER_INCOMPLETE", - "ICMP_IPV6_UNHANDLED_PROTOCOL", - "ICMP_NO_CONNECTION", - "ICMP_FLUSHED_CONNECTION", - "HEADER_INCOMPLETE", - "BAD_TOTAL_LENGTH", - "NON_V6", - "NON_INITIAL_FRAGMENT", - "DATAGRAM_INCOMPLETE", - "IP_OPTIONS_INCOMPLETE", - "UNHANDLED_PROTOCOL", - "FLOW_COOKIE_ADD_FAIL" -}; - -/* - * Per-module structure. - */ -struct sfe_ipv6 { - spinlock_t lock; /* Lock for SMP correctness */ - struct sfe_ipv6_connection_match *active_head; - /* Head of the list of recently active connections */ - struct sfe_ipv6_connection_match *active_tail; - /* Tail of the list of recently active connections */ - struct sfe_ipv6_connection *all_connections_head; - /* Head of the list of all connections */ - struct sfe_ipv6_connection *all_connections_tail; - /* Tail of the list of all connections */ - unsigned int num_connections; /* Number of connections */ - struct timer_list timer; /* Timer used for periodic sync ops */ - sfe_sync_rule_callback_t __rcu sync_rule_callback; - /* Callback function registered by a connection manager for stats syncing */ - struct sfe_ipv6_connection *conn_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; - /* Connection hash table */ - struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; - /* Connection match hash table */ -#ifdef CONFIG_NF_FLOW_COOKIE - struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; - /* flow cookie table*/ - sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func; - /* function used to configure flow cookie in hardware*/ - int flow_cookie_enable; - /* Enable/disable flow cookie at runtime */ -#endif - - /* - * Stats recorded in a sync period. These stats will be added to - * connection_xxx64 after a sync period. - */ - u32 connection_create_requests; - /* Number of IPv6 connection create requests */ - u32 connection_create_collisions; - /* Number of IPv6 connection create requests that collided with existing hash table entries */ - u32 connection_destroy_requests; - /* Number of IPv6 connection destroy requests */ - u32 connection_destroy_misses; - /* Number of IPv6 connection destroy requests that missed our hash table */ - u32 connection_match_hash_hits; - /* Number of IPv6 connection match hash hits */ - u32 connection_match_hash_reorders; - /* Number of IPv6 connection match hash reorders */ - u32 connection_flushes; /* Number of IPv6 connection flushes */ - u32 packets_forwarded; /* Number of IPv6 packets forwarded */ - u32 packets_not_forwarded; /* Number of IPv6 packets not forwarded */ - u32 exception_events[SFE_IPV6_EXCEPTION_EVENT_LAST]; - - /* - * Summary statistics. - */ - u64 connection_create_requests64; - /* Number of IPv6 connection create requests */ - u64 connection_create_collisions64; - /* Number of IPv6 connection create requests that collided with existing hash table entries */ - u64 connection_destroy_requests64; - /* Number of IPv6 connection destroy requests */ - u64 connection_destroy_misses64; - /* Number of IPv6 connection destroy requests that missed our hash table */ - u64 connection_match_hash_hits64; - /* Number of IPv6 connection match hash hits */ - u64 connection_match_hash_reorders64; - /* Number of IPv6 connection match hash reorders */ - u64 connection_flushes64; /* Number of IPv6 connection flushes */ - u64 packets_forwarded64; /* Number of IPv6 packets forwarded */ - u64 packets_not_forwarded64; - /* Number of IPv6 packets not forwarded */ - u64 exception_events64[SFE_IPV6_EXCEPTION_EVENT_LAST]; - - /* - * Control state. - */ - struct kobject *sys_sfe_ipv6; /* sysfs linkage */ - int debug_dev; /* Major number of the debug char device */ - u32 debug_read_seq; /* sequence number for debug dump */ -}; - -/* - * Enumeration of the XML output. - */ -enum sfe_ipv6_debug_xml_states { - SFE_IPV6_DEBUG_XML_STATE_START, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_START, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, - SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_END, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_START, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, - SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_END, - SFE_IPV6_DEBUG_XML_STATE_STATS, - SFE_IPV6_DEBUG_XML_STATE_END, - SFE_IPV6_DEBUG_XML_STATE_DONE -}; - -/* - * XML write state. - */ -struct sfe_ipv6_debug_xml_write_state { - enum sfe_ipv6_debug_xml_states state; - /* XML output file state machine state */ - int iter_exception; /* Next exception iterator */ -}; - -typedef bool (*sfe_ipv6_debug_xml_write_method_t)(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws); - -static struct sfe_ipv6 __si6; - -/* - * sfe_ipv6_get_debug_dev() - */ -static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf); - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv6_debug_dev_attr = - __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv6_get_debug_dev, NULL); - -/* - * sfe_ipv6_is_ext_hdr() - * check if we recognize ipv6 extension header - */ -static inline bool sfe_ipv6_is_ext_hdr(u8 hdr) -{ - return (hdr == SFE_IPV6_EXT_HDR_HOP) || - (hdr == SFE_IPV6_EXT_HDR_ROUTING) || - (hdr == SFE_IPV6_EXT_HDR_FRAG) || - (hdr == SFE_IPV6_EXT_HDR_AH) || - (hdr == SFE_IPV6_EXT_HDR_DST) || - (hdr == SFE_IPV6_EXT_HDR_MH); -} - -/* - * sfe_ipv6_change_dsfield() - * change dscp field in IPv6 packet - */ -static inline void sfe_ipv6_change_dsfield(struct sfe_ipv6_ip_hdr *iph, u8 dscp) -{ - __be16 *p = (__be16 *)iph; - - *p = ((*p & htons(SFE_IPV6_DSCP_MASK)) | htons((u16)dscp << 4)); -} - -/* - * sfe_ipv6_get_connection_match_hash() - * Generate the hash used in connection match lookups. - */ -static inline unsigned int sfe_ipv6_get_connection_match_hash(struct net_device *dev, u8 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - u32 idx, hash = 0; - size_t dev_addr = (size_t)dev; - - for (idx = 0; idx < 4; idx++) { - hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; - } - hash = ((u32)dev_addr) ^ hash ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv6_find_connection_match() - * Get the IPv6 flow match info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static struct sfe_ipv6_connection_match * -sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, u8 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *head; - unsigned int conn_match_idx; - - conn_match_idx = sfe_ipv6_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); - cm = si->conn_match_hash[conn_match_idx]; - - /* - * If we don't have anything in this chain then bail. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((cm->match_src_port == src_port) - && (cm->match_dest_port == dest_port) - && (sfe_ipv6_addr_equal(cm->match_src_ip, src_ip)) - && (sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip)) - && (cm->match_protocol == protocol) - && (cm->match_dev == dev)) { - si->connection_match_hash_hits++; - return cm; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain and - * move matching entry to the top of the hash chain. We presume that this - * will be reused again very quickly. - */ - head = cm; - do { - cm = cm->next; - } while (cm && (cm->match_src_port != src_port - || cm->match_dest_port != dest_port - || !sfe_ipv6_addr_equal(cm->match_src_ip, src_ip) - || !sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip) - || cm->match_protocol != protocol - || cm->match_dev != dev)); - - /* - * Not found then we're done. - */ - if (unlikely(!cm)) { - return NULL; - } - - /* - * We found a match so move it. - */ - if (cm->next) { - cm->next->prev = cm->prev; - } - cm->prev->next = cm->next; - cm->prev = NULL; - cm->next = head; - head->prev = cm; - si->conn_match_hash[conn_match_idx] = cm; - si->connection_match_hash_reorders++; - - return cm; -} - -/* - * sfe_ipv6_connection_match_update_summary_stats() - * Update the summary stats for a connection match entry. - */ -static inline void sfe_ipv6_connection_match_update_summary_stats(struct sfe_ipv6_connection_match *cm) -{ - cm->rx_packet_count64 += cm->rx_packet_count; - cm->rx_packet_count = 0; - cm->rx_byte_count64 += cm->rx_byte_count; - cm->rx_byte_count = 0; -} - -/* - * sfe_ipv6_connection_match_compute_translations() - * Compute port and address translations for a connection match entry. - */ -static void sfe_ipv6_connection_match_compute_translations(struct sfe_ipv6_connection_match *cm) -{ - u32 diff[9]; - u32 *idx_32; - u16 *idx_16; - - /* - * Before we insert the entry look to see if this is tagged as doing address - * translations. If it is then work out the adjustment that we need to apply - * to the transport checksum. - */ - if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC) { - u32 adj = 0; - u32 carry = 0; - - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - idx_32 = diff; - *(idx_32++) = cm->match_src_ip->addr[0]; - *(idx_32++) = cm->match_src_ip->addr[1]; - *(idx_32++) = cm->match_src_ip->addr[2]; - *(idx_32++) = cm->match_src_ip->addr[3]; - - idx_16 = (u16 *)idx_32; - *(idx_16++) = cm->match_src_port; - *(idx_16++) = ~cm->xlate_src_port; - idx_32 = (u32 *)idx_16; - - *(idx_32++) = ~cm->xlate_src_ip->addr[0]; - *(idx_32++) = ~cm->xlate_src_ip->addr[1]; - *(idx_32++) = ~cm->xlate_src_ip->addr[2]; - *(idx_32++) = ~cm->xlate_src_ip->addr[3]; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { - u32 w = *idx_32; - adj += carry; - adj += w; - carry = (w > adj); - } - adj += carry; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_src_csum_adjustment = (u16)adj; - } - - if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST) { - u32 adj = 0; - u32 carry = 0; - - /* - * Precompute an incremental checksum adjustment so we can - * edit packets in this stream very quickly. The algorithm is from RFC1624. - */ - idx_32 = diff; - *(idx_32++) = cm->match_dest_ip->addr[0]; - *(idx_32++) = cm->match_dest_ip->addr[1]; - *(idx_32++) = cm->match_dest_ip->addr[2]; - *(idx_32++) = cm->match_dest_ip->addr[3]; - - idx_16 = (u16 *)idx_32; - *(idx_16++) = cm->match_dest_port; - *(idx_16++) = ~cm->xlate_dest_port; - idx_32 = (u32 *)idx_16; - - *(idx_32++) = ~cm->xlate_dest_ip->addr[0]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[1]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[2]; - *(idx_32++) = ~cm->xlate_dest_ip->addr[3]; - - /* - * When we compute this fold it down to a 16-bit offset - * as that way we can avoid having to do a double - * folding of the twos-complement result because the - * addition of 2 16-bit values cannot cause a double - * wrap-around! - */ - for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { - u32 w = *idx_32; - adj += carry; - adj += w; - carry = (w > adj); - } - adj += carry; - adj = (adj & 0xffff) + (adj >> 16); - adj = (adj & 0xffff) + (adj >> 16); - cm->xlate_dest_csum_adjustment = (u16)adj; - } -} - -/* - * sfe_ipv6_update_summary_stats() - * Update the summary stats. - */ -static void sfe_ipv6_update_summary_stats(struct sfe_ipv6 *si) -{ - int i; - - si->connection_create_requests64 += si->connection_create_requests; - si->connection_create_requests = 0; - si->connection_create_collisions64 += si->connection_create_collisions; - si->connection_create_collisions = 0; - si->connection_destroy_requests64 += si->connection_destroy_requests; - si->connection_destroy_requests = 0; - si->connection_destroy_misses64 += si->connection_destroy_misses; - si->connection_destroy_misses = 0; - si->connection_match_hash_hits64 += si->connection_match_hash_hits; - si->connection_match_hash_hits = 0; - si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; - si->connection_match_hash_reorders = 0; - si->connection_flushes64 += si->connection_flushes; - si->connection_flushes = 0; - si->packets_forwarded64 += si->packets_forwarded; - si->packets_forwarded = 0; - si->packets_not_forwarded64 += si->packets_not_forwarded; - si->packets_not_forwarded = 0; - - for (i = 0; i < SFE_IPV6_EXCEPTION_EVENT_LAST; i++) { - si->exception_events64[i] += si->exception_events[i]; - si->exception_events[i] = 0; - } -} - -/* - * sfe_ipv6_insert_connection_match() - * Insert a connection match into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si, - struct sfe_ipv6_connection_match *cm) -{ - struct sfe_ipv6_connection_match **hash_head; - struct sfe_ipv6_connection_match *prev_head; - unsigned int conn_match_idx - = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - - hash_head = &si->conn_match_hash[conn_match_idx]; - prev_head = *hash_head; - cm->prev = NULL; - if (prev_head) { - prev_head->prev = cm; - } - - cm->next = prev_head; - *hash_head = cm; - -#ifdef CONFIG_NF_FLOW_COOKIE - if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST))) - return; - - /* - * Configure hardware to put a flow cookie in packet of this flow, - * then we can accelerate the lookup process when we received this packet. - */ - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { - sfe_ipv6_flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, - cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) { - entry->match = cm; - cm->flow_cookie = conn_match_idx; - } else { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++; - } - } - rcu_read_unlock(); - - break; - } - } -#endif -} - -/* - * sfe_ipv6_remove_connection_match() - * Remove a connection match object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm) -{ -#ifdef CONFIG_NF_FLOW_COOKIE - if (si->flow_cookie_enable) { - /* - * Tell hardware that we no longer need a flow cookie in packet of this flow - */ - unsigned int conn_match_idx; - - for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { - struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; - - if (cm == entry->match) { - sfe_ipv6_flow_cookie_set_func_t func; - - rcu_read_lock(); - func = rcu_dereference(si->flow_cookie_set_func); - if (func) { - func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, - cm->match_dest_ip->addr, cm->match_dest_port, 0); - } - rcu_read_unlock(); - - cm->flow_cookie = 0; - entry->match = NULL; - entry->last_clean_time = jiffies; - break; - } - } - } -#endif - - /* - * Unlink the connection match entry from the hash. - */ - if (cm->prev) { - cm->prev->next = cm->next; - } else { - unsigned int conn_match_idx - = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, - cm->match_src_ip, cm->match_src_port, - cm->match_dest_ip, cm->match_dest_port); - si->conn_match_hash[conn_match_idx] = cm->next; - } - - if (cm->next) { - cm->next->prev = cm->prev; - } - - /* - * If the connection match entry is in the active list remove it. - */ - if (cm->active) { - if (likely(cm->active_prev)) { - cm->active_prev->active_next = cm->active_next; - } else { - si->active_head = cm->active_next; - } - - if (likely(cm->active_next)) { - cm->active_next->active_prev = cm->active_prev; - } else { - si->active_tail = cm->active_prev; - } - } -} - -/* - * sfe_ipv6_get_connection_hash() - * Generate the hash used in connection lookups. - */ -static inline unsigned int sfe_ipv6_get_connection_hash(u8 protocol, struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - u32 idx, hash = 0; - - for (idx = 0; idx < 4; idx++) { - hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; - } - hash = hash ^ protocol ^ ntohs(src_port ^ dest_port); - return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; -} - -/* - * sfe_ipv6_find_connection() - * Get the IPv6 connection info that corresponds to a particular 5-tuple. - * - * On entry we must be holding the lock that protects the hash table. - */ -static inline struct sfe_ipv6_connection *sfe_ipv6_find_connection(struct sfe_ipv6 *si, u32 protocol, - struct sfe_ipv6_addr *src_ip, __be16 src_port, - struct sfe_ipv6_addr *dest_ip, __be16 dest_port) -{ - struct sfe_ipv6_connection *c; - unsigned int conn_idx = sfe_ipv6_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); - c = si->conn_hash[conn_idx]; - - /* - * If we don't have anything in this chain then bale. - */ - if (unlikely(!c)) { - return NULL; - } - - /* - * Hopefully the first entry is the one we want. - */ - if ((c->src_port == src_port) - && (c->dest_port == dest_port) - && (sfe_ipv6_addr_equal(c->src_ip, src_ip)) - && (sfe_ipv6_addr_equal(c->dest_ip, dest_ip)) - && (c->protocol == protocol)) { - return c; - } - - /* - * Unfortunately we didn't find it at head, so we search it in chain. - */ - do { - c = c->next; - } while (c && (c->src_port != src_port - || c->dest_port != dest_port - || !sfe_ipv6_addr_equal(c->src_ip, src_ip) - || !sfe_ipv6_addr_equal(c->dest_ip, dest_ip) - || c->protocol != protocol)); - - /* - * Will need connection entry for next create/destroy metadata, - * So no need to re-order entry for these requests - */ - return c; -} - -/* - * sfe_ipv6_mark_rule() - * Updates the mark for a current offloaded connection - * - * Will take hash lock upon entry - */ -void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - - spin_lock_bh(&si->lock); - c = sfe_ipv6_find_connection(si, mark->protocol, - mark->src_ip.ip6, mark->src_port, - mark->dest_ip.ip6, mark->dest_port); - if (c) { - WARN_ON((0 != c->mark) && (0 == mark->mark)); - c->mark = mark->mark; - } - spin_unlock_bh(&si->lock); - - if (c) { - DEBUG_TRACE("Matching connection found for mark, " - "setting from %08x to %08x\n", - c->mark, mark->mark); - } -} - -/* - * sfe_ipv6_insert_connection() - * Insert a connection into the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv6_insert_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) -{ - struct sfe_ipv6_connection **hash_head; - struct sfe_ipv6_connection *prev_head; - unsigned int conn_idx; - - /* - * Insert entry into the connection hash. - */ - conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - hash_head = &si->conn_hash[conn_idx]; - prev_head = *hash_head; - c->prev = NULL; - if (prev_head) { - prev_head->prev = c; - } - - c->next = prev_head; - *hash_head = c; - - /* - * Insert entry into the "all connections" list. - */ - if (si->all_connections_tail) { - c->all_connections_prev = si->all_connections_tail; - si->all_connections_tail->all_connections_next = c; - } else { - c->all_connections_prev = NULL; - si->all_connections_head = c; - } - - si->all_connections_tail = c; - c->all_connections_next = NULL; - si->num_connections++; - - /* - * Insert the connection match objects too. - */ - sfe_ipv6_insert_connection_match(si, c->original_match); - sfe_ipv6_insert_connection_match(si, c->reply_match); -} - -/* - * sfe_ipv6_remove_connection() - * Remove a sfe_ipv6_connection object from the hash. - * - * On entry we must be holding the lock that protects the hash table. - */ -static void sfe_ipv6_remove_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) -{ - /* - * Remove the connection match objects. - */ - sfe_ipv6_remove_connection_match(si, c->reply_match); - sfe_ipv6_remove_connection_match(si, c->original_match); - - /* - * Unlink the connection. - */ - if (c->prev) { - c->prev->next = c->next; - } else { - unsigned int conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, - c->dest_ip, c->dest_port); - si->conn_hash[conn_idx] = c->next; - } - - if (c->next) { - c->next->prev = c->prev; - } - - /* - * Unlink connection from all_connections list - */ - if (c->all_connections_prev) { - c->all_connections_prev->all_connections_next = c->all_connections_next; - } else { - si->all_connections_head = c->all_connections_next; - } - - if (c->all_connections_next) { - c->all_connections_next->all_connections_prev = c->all_connections_prev; - } else { - si->all_connections_tail = c->all_connections_prev; - } - - si->num_connections--; -} - -/* - * sfe_ipv6_gen_sync_connection() - * Sync a connection. - * - * On entry to this function we expect that the lock for the connection is either - * already held or isn't required. - */ -static void sfe_ipv6_gen_sync_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c, - struct sfe_connection_sync *sis, sfe_sync_reason_t reason, - u64 now_jiffies) -{ - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - - /* - * Fill in the update message. - */ - sis->is_v6 = 1; - sis->protocol = c->protocol; - sis->src_ip.ip6[0] = c->src_ip[0]; - sis->src_ip_xlate.ip6[0] = c->src_ip_xlate[0]; - sis->dest_ip.ip6[0] = c->dest_ip[0]; - sis->dest_ip_xlate.ip6[0] = c->dest_ip_xlate[0]; - sis->src_port = c->src_port; - sis->src_port_xlate = c->src_port_xlate; - sis->dest_port = c->dest_port; - sis->dest_port_xlate = c->dest_port_xlate; - - original_cm = c->original_match; - reply_cm = c->reply_match; - sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; - sis->src_td_end = original_cm->protocol_state.tcp.end; - sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; - sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; - sis->dest_td_end = reply_cm->protocol_state.tcp.end; - sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; - - sis->src_new_packet_count = original_cm->rx_packet_count; - sis->src_new_byte_count = original_cm->rx_byte_count; - sis->dest_new_packet_count = reply_cm->rx_packet_count; - sis->dest_new_byte_count = reply_cm->rx_byte_count; - - sfe_ipv6_connection_match_update_summary_stats(original_cm); - sfe_ipv6_connection_match_update_summary_stats(reply_cm); - - sis->src_dev = original_cm->match_dev; - sis->src_packet_count = original_cm->rx_packet_count64; - sis->src_byte_count = original_cm->rx_byte_count64; - - sis->dest_dev = reply_cm->match_dev; - sis->dest_packet_count = reply_cm->rx_packet_count64; - sis->dest_byte_count = reply_cm->rx_byte_count64; - - sis->reason = reason; - - /* - * Get the time increment since our last sync. - */ - sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; - c->last_sync_jiffies = now_jiffies; -} - -/* - * sfe_ipv6_flush_connection() - * Flush a connection and free all associated resources. - * - * We need to be called with bottom halves disabled locally as we need to acquire - * the connection hash lock and release it again. In general we're actually called - * from within a BH and so we're fine, but we're also called when connections are - * torn down. - */ -static void sfe_ipv6_flush_connection(struct sfe_ipv6 *si, - struct sfe_ipv6_connection *c, - sfe_sync_reason_t reason) -{ - struct sfe_connection_sync sis; - u64 now_jiffies; - sfe_sync_rule_callback_t sync_rule_callback; - - rcu_read_lock(); - spin_lock_bh(&si->lock); - si->connection_flushes++; - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - spin_unlock_bh(&si->lock); - - if (sync_rule_callback) { - /* - * Generate a sync message and then sync. - */ - now_jiffies = get_jiffies_64(); - sfe_ipv6_gen_sync_connection(si, c, &sis, reason, now_jiffies); - sync_rule_callback(&sis); - } - - rcu_read_unlock(); - - /* - * Release our hold of the source and dest devices and free the memory - * for our connection objects. - */ - dev_put(c->original_dev); - dev_put(c->reply_dev); - kfree(c->original_match); - kfree(c->reply_match); - kfree(c); -} - -/* - * sfe_ipv6_recv_udp() - * Handle UDP packet receives and forwarding. - */ -static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv6_udp_hdr *udph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_udp_hdr) + ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for UDP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the UDP header cached though so allow more time for any prefetching. - */ - src_ip = &iph->saddr; - dest_ip = &iph->daddr; - - udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - src_port = udph->source; - dest_port = udph->dest; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our hop_limit allow forwarding? - */ - if (unlikely(iph->hop_limit < 2)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("hop_limit too low\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely(len > cm->xmit_dev_mtu)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and udph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - sfe_ipv6_change_dsfield(iph, cm->dscp); - } - - /* - * Decrement our hop_limit. - */ - iph->hop_limit -= 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 udp_csum; - - iph->saddr = cm->xlate_src_ip[0]; - udph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum = udp_csum + cm->xlate_src_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 udp_csum; - - iph->daddr = cm->xlate_dest_ip[0]; - udph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - udp_csum = udph->check; - if (likely(udp_csum)) { - u32 sum = udp_csum + cm->xlate_dest_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - udph->check = (u16)sum; - } - } - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IPV6, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IPV6); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet. - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv6_process_tcp_option_sack() - * Parse TCP SACK option and update ack according - */ -static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const u32 data_offs, - u32 *ack) -{ - u32 length = sizeof(struct sfe_ipv6_tcp_hdr); - u8 *ptr = (u8 *)th + length; - - /* - * Ignore processing if TCP packet has only TIMESTAMP option. - */ - if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) - && likely(ptr[0] == TCPOPT_NOP) - && likely(ptr[1] == TCPOPT_NOP) - && likely(ptr[2] == TCPOPT_TIMESTAMP) - && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { - return true; - } - - /* - * TCP options. Parse SACK option. - */ - while (length < data_offs) { - u8 size; - u8 kind; - - ptr = (u8 *)th + length; - kind = *ptr; - - /* - * NOP, for padding - * Not in the switch because to fast escape and to not calculate size - */ - if (kind == TCPOPT_NOP) { - length++; - continue; - } - - if (kind == TCPOPT_SACK) { - u32 sack = 0; - u8 re = 1 + 1; - - size = *(ptr + 1); - if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) - || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) - || (size > (data_offs - length))) { - return false; - } - - re += 4; - while (re < size) { - u32 sack_re; - u8 *sptr = ptr + re; - sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; - if (sack_re > sack) { - sack = sack_re; - } - re += TCPOLEN_SACK_PERBLOCK; - } - if (sack > *ack) { - *ack = sack; - } - length += size; - continue; - } - if (kind == TCPOPT_EOL) { - return true; - } - size = *(ptr + 1); - if (size < 2) { - return false; - } - length += size; - } - - return true; -} - -/* - * sfe_ipv6_recv_tcp() - * Handle TCP packet receives and forwarding. - */ -static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) -{ - struct sfe_ipv6_tcp_hdr *tcph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *counter_cm; - u32 flags; - struct net_device *xmit_dev; - - /* - * Is our packet too short to contain a valid UDP header? - */ - if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_tcp_hdr) + ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for TCP header\n"); - return 0; - } - - /* - * Read the IP address and port information. Read the IP header data first - * because we've almost certainly got that in the cache. We may not yet have - * the TCP header cached though so allow more time for any prefetching. - */ - src_ip = &iph->saddr; - dest_ip = &iph->daddr; - - tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - src_port = tcph->source; - dest_port = tcph->dest; - flags = tcp_flag_word(tcph); - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. - */ -#ifdef CONFIG_NF_FLOW_COOKIE - cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; - if (unlikely(!cm)) { - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); - } -#else - cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); -#endif - if (unlikely(!cm)) { - /* - * We didn't get a connection but as TCP is connection-oriented that - * may be because this is a non-fast connection (not running established). - * For diagnostic purposes we differentiate this here. - */ - if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - fast flags\n"); - return 0; - } - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found - slow flags: 0x%x\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - return 0; - } - - /* - * If our packet has beern marked as "flush on find" we can't actually - * forward it in the fast path, but now that we've found an associated - * connection we can flush that out before we process the packet. - */ - if (unlikely(flush_on_find)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("flush on find\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - -#ifdef CONFIG_XFRM - /* - * We can't accelerate the flow on this direction, just let it go - * through the slow path. - */ - if (unlikely(!cm->flow_accel)) { - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - return 0; - } -#endif - - /* - * Does our hop_limit allow forwarding? - */ - if (unlikely(iph->hop_limit < 2)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("hop_limit too low\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * If our packet is larger than the MTU of the transmit interface then - * we can't forward it easily. - */ - if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("larger than mtu\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN - * set is not a fast path packet. - */ - if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP flags: 0x%x are not fast\n", - flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - counter_cm = cm->counter_match; - - /* - * Are we doing sequence number checking? - */ - if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { - u32 seq; - u32 ack; - u32 sack; - u32 data_offs; - u32 end; - u32 left_edge; - u32 scaled_win; - u32 max_end; - - /* - * Is our sequence fully past the right hand edge of the window? - */ - seq = ntohl(tcph->seq); - if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u exceeds right edge: %u\n", - seq, cm->protocol_state.tcp.max_end + 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't too short. - */ - data_offs = tcph->doff << 2; - if (unlikely(data_offs < sizeof(struct sfe_ipv6_tcp_hdr))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Update ACK according to any SACK option. - */ - ack = ntohl(tcph->ack_seq); - sack = ack; - if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP option SACK size is wrong\n"); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Check that our TCP data offset isn't past the end of the packet. - */ - data_offs += sizeof(struct sfe_ipv6_ip_hdr); - if (unlikely(len < data_offs)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", - data_offs, len); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - end = seq + len - data_offs; - - /* - * Is our sequence fully before the left hand edge of the window? - */ - if (unlikely((s32)(end - (cm->protocol_state.tcp.end - - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("seq: %u before left edge: %u\n", - end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Are we acking data that is to the right of what has been sent? - */ - if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u exceeds right edge: %u\n", - sack, counter_cm->protocol_state.tcp.end + 1); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Is our ack too far before the left hand edge of the window? - */ - left_edge = counter_cm->protocol_state.tcp.end - - cm->protocol_state.tcp.max_win - - SFE_IPV6_TCP_MAX_ACK_WINDOW - - 1; - if (unlikely((s32)(sack - left_edge) < 0)) { - struct sfe_ipv6_connection *c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; - } - - /* - * Have we just seen the largest window size yet for this connection? If yes - * then we need to record the new value. - */ - scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; - scaled_win += (sack - ack); - if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { - cm->protocol_state.tcp.max_win = scaled_win; - } - - /* - * If our sequence and/or ack numbers have advanced then record the new state. - */ - if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { - cm->protocol_state.tcp.end = end; - } - - max_end = sack + scaled_win; - if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { - counter_cm->protocol_state.tcp.max_end = max_end; - } - } - - /* - * From this point on we're good to modify the packet. - */ - - /* - * Check if skb was cloned. If it was, unshare it. Because - * the data area is going to be written in this path and we don't want to - * change the cloned skb's data section. - */ - if (unlikely(skb_cloned(skb))) { - DEBUG_TRACE("%px: skb is a cloned skb\n", skb); - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - DEBUG_WARN("Failed to unshare the cloned skb\n"); - return 0; - } - - /* - * Update the iph and tcph pointers with the unshared skb's data area. - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - } - - /* - * Update DSCP - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { - sfe_ipv6_change_dsfield(iph, cm->dscp); - } - - /* - * Decrement our hop_limit. - */ - iph->hop_limit -= 1; - - /* - * Do we have to perform translations of the source address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { - u16 tcp_csum; - u32 sum; - - iph->saddr = cm->xlate_src_ip[0]; - tcph->source = cm->xlate_src_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - sum = tcp_csum + cm->xlate_src_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Do we have to perform translations of the destination address/port? - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { - u16 tcp_csum; - u32 sum; - - iph->daddr = cm->xlate_dest_ip[0]; - tcph->dest = cm->xlate_dest_port; - - /* - * Do we have a non-zero UDP checksum? If we do then we need - * to update it. - */ - tcp_csum = tcph->check; - sum = tcp_csum + cm->xlate_dest_csum_adjustment; - sum = (sum & 0xffff) + (sum >> 16); - tcph->check = (u16)sum; - } - - /* - * Update traffic stats. - */ - cm->rx_packet_count++; - cm->rx_byte_count += len; - - /* - * If we're not already on the active list then insert ourselves at the tail - * of the current list. - */ - if (unlikely(!cm->active)) { - cm->active = true; - cm->active_prev = si->active_tail; - if (likely(si->active_tail)) { - si->active_tail->active_next = cm; - } else { - si->active_head = cm; - } - si->active_tail = cm; - } - - xmit_dev = cm->xmit_dev; - skb->dev = xmit_dev; - - /* - * Check to see if we need to write a header. - */ - if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { - if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { - dev_hard_header(skb, xmit_dev, ETH_P_IPV6, - cm->xmit_dest_mac, cm->xmit_src_mac, len); - } else { - /* - * For the simple case we write this really fast. - */ - struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); - eth->h_proto = htons(ETH_P_IPV6); - eth->h_dest[0] = cm->xmit_dest_mac[0]; - eth->h_dest[1] = cm->xmit_dest_mac[1]; - eth->h_dest[2] = cm->xmit_dest_mac[2]; - eth->h_source[0] = cm->xmit_src_mac[0]; - eth->h_source[1] = cm->xmit_src_mac[1]; - eth->h_source[2] = cm->xmit_src_mac[2]; - } - } - - /* - * Update priority of skb. - */ - if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { - skb->priority = cm->priority; - } - - /* - * Mark outgoing packet - */ - skb->mark = cm->connection->mark; - if (skb->mark) { - DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); - } - - si->packets_forwarded++; - spin_unlock_bh(&si->lock); - - /* - * We're going to check for GSO flags when we transmit the packet so - * start fetching the necessary cache line now. - */ - prefetch(skb_shinfo(skb)); - - /* - * Mark that this packet has been fast forwarded. - */ - skb->fast_forwarded = 1; - - /* - * Send the packet on its way. - */ - dev_queue_xmit(skb); - - return 1; -} - -/* - * sfe_ipv6_recv_icmp() - * Handle ICMP packet receives. - * - * ICMP packets aren't handled as a "fast path" and always have us process them - * through the default Linux stack. What we do need to do is look for any errors - * about connections we are handling in the fast path. If we find any such - * connections then we want to flush their state so that the ICMP error path - * within Linux has all of the correct state should it need it. - */ -static int sfe_ipv6_recv_icmp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, - unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl) -{ - struct icmp6hdr *icmph; - struct sfe_ipv6_ip_hdr *icmp_iph; - struct sfe_ipv6_udp_hdr *icmp_udph; - struct sfe_ipv6_tcp_hdr *icmp_tcph; - struct sfe_ipv6_addr *src_ip; - struct sfe_ipv6_addr *dest_ip; - __be16 src_port; - __be16 dest_port; - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection *c; - u8 next_hdr; - - /* - * Is our packet too short to contain a valid ICMP header? - */ - len -= ihl; - if (!pskb_may_pull(skb, ihl + sizeof(struct icmp6hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("packet too short for ICMP header\n"); - return 0; - } - - /* - * We only handle "destination unreachable" and "time exceeded" messages. - */ - icmph = (struct icmp6hdr *)(skb->data + ihl); - if ((icmph->icmp6_type != ICMPV6_DEST_UNREACH) - && (icmph->icmp6_type != ICMPV6_TIME_EXCEED)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->icmp6_type); - return 0; - } - - /* - * Do we have the full embedded IP header? - * We should have 8 bytes of next L4 header - that's enough to identify - * the connection. - */ - len -= sizeof(struct icmp6hdr); - ihl += sizeof(struct icmp6hdr); - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ip_hdr) + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Embedded IP header not complete\n"); - return 0; - } - - /* - * Is our embedded IP version wrong? - */ - icmp_iph = (struct sfe_ipv6_ip_hdr *)(icmph + 1); - if (unlikely(icmp_iph->version != 6)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", icmp_iph->version); - return 0; - } - - len -= sizeof(struct sfe_ipv6_ip_hdr); - ihl += sizeof(struct sfe_ipv6_ip_hdr); - next_hdr = icmp_iph->nexthdr; - while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { - struct sfe_ipv6_ext_hdr *ext_hdr; - unsigned int ext_hdr_len; - - ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); - if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { - struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; - unsigned int frag_off = ntohs(frag_hdr->frag_off); - - if (frag_off & SFE_IPV6_FRAG_OFFSET) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - } - - ext_hdr_len = ext_hdr->hdr_len; - ext_hdr_len <<= 3; - ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); - len -= ext_hdr_len; - ihl += ext_hdr_len; - /* - * We should have 8 bytes of next header - that's enough to identify - * the connection. - */ - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("extension header %d not completed\n", next_hdr); - return 0; - } - - next_hdr = ext_hdr->next_hdr; - } - - /* - * Handle the embedded transport layer header. - */ - switch (next_hdr) { - case IPPROTO_UDP: - icmp_udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); - src_port = icmp_udph->source; - dest_port = icmp_udph->dest; - break; - - case IPPROTO_TCP: - icmp_tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); - src_port = icmp_tcph->source; - dest_port = icmp_tcph->dest; - break; - - default: - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", next_hdr); - return 0; - } - - src_ip = &icmp_iph->saddr; - dest_ip = &icmp_iph->daddr; - - spin_lock_bh(&si->lock); - - /* - * Look for a connection match. Note that we reverse the source and destination - * here because our embedded message contains a packet that was sent in the - * opposite direction to the one in which we just received it. It will have - * been sent on the interface from which we received it though so that's still - * ok to use. - */ - cm = sfe_ipv6_find_connection_match(si, dev, icmp_iph->nexthdr, dest_ip, dest_port, src_ip, src_port); - if (unlikely(!cm)) { - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("no connection found\n"); - return 0; - } - - /* - * We found a connection so now remove it from the connection list and flush - * its state. - */ - c = cm->connection; - sfe_ipv6_remove_connection(si, c); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); - return 0; -} - -/* - * sfe_ipv6_recv() - * Handle packet receives and forwaring. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) -{ - struct sfe_ipv6 *si = &__si6; - unsigned int len; - unsigned int payload_len; - unsigned int ihl = sizeof(struct sfe_ipv6_ip_hdr); - bool flush_on_find = false; - struct sfe_ipv6_ip_hdr *iph; - u8 next_hdr; - - /* - * Check that we have space for an IP header and an uplayer header here. - */ - len = skb->len; - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("len: %u is too short\n", len); - return 0; - } - - /* - * Is our IP version wrong? - */ - iph = (struct sfe_ipv6_ip_hdr *)skb->data; - if (unlikely(iph->version != 6)) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_V6]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("IP version: %u\n", iph->version); - return 0; - } - - /* - * Does our datagram fit inside the skb? - */ - payload_len = ntohs(iph->payload_len); - if (unlikely(payload_len > (len - ihl))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("payload_len: %u, exceeds len: %u\n", payload_len, (len - (unsigned int)sizeof(struct sfe_ipv6_ip_hdr))); - return 0; - } - - next_hdr = iph->nexthdr; - while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { - struct sfe_ipv6_ext_hdr *ext_hdr; - unsigned int ext_hdr_len; - - ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); - if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { - struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; - unsigned int frag_off = ntohs(frag_hdr->frag_off); - - if (frag_off & SFE_IPV6_FRAG_OFFSET) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("non-initial fragment\n"); - return 0; - } - } - - ext_hdr_len = ext_hdr->hdr_len; - ext_hdr_len <<= 3; - ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); - ihl += ext_hdr_len; - if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("extension header %d not completed\n", next_hdr); - return 0; - } - - flush_on_find = true; - next_hdr = ext_hdr->next_hdr; - } - - if (IPPROTO_UDP == next_hdr) { - return sfe_ipv6_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_TCP == next_hdr) { - return sfe_ipv6_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); - } - - if (IPPROTO_ICMPV6 == next_hdr) { - return sfe_ipv6_recv_icmp(si, skb, dev, len, iph, ihl); - } - - spin_lock_bh(&si->lock); - si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; - si->packets_not_forwarded++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", next_hdr); - return 0; -} - -/* - * sfe_ipv6_update_tcp_state() - * update TCP window variables. - */ -static void -sfe_ipv6_update_tcp_state(struct sfe_ipv6_connection *c, - struct sfe_connection_create *sic) -{ - struct sfe_ipv6_connection_match *orig_cm; - struct sfe_ipv6_connection_match *repl_cm; - struct sfe_ipv6_tcp_connection_match *orig_tcp; - struct sfe_ipv6_tcp_connection_match *repl_tcp; - - orig_cm = c->original_match; - repl_cm = c->reply_match; - orig_tcp = &orig_cm->protocol_state.tcp; - repl_tcp = &repl_cm->protocol_state.tcp; - - /* update orig */ - if (orig_tcp->max_win < sic->src_td_max_window) { - orig_tcp->max_win = sic->src_td_max_window; - } - if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { - orig_tcp->end = sic->src_td_end; - } - if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { - orig_tcp->max_end = sic->src_td_max_end; - } - - /* update reply */ - if (repl_tcp->max_win < sic->dest_td_max_window) { - repl_tcp->max_win = sic->dest_td_max_window; - } - if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { - repl_tcp->end = sic->dest_td_end; - } - if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { - repl_tcp->max_end = sic->dest_td_max_end; - } - - /* update match flags */ - orig_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - orig_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - repl_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } -} - -/* - * sfe_ipv6_update_protocol_state() - * update protocol specified state machine. - */ -static void -sfe_ipv6_update_protocol_state(struct sfe_ipv6_connection *c, - struct sfe_connection_create *sic) -{ - switch (sic->protocol) { - case IPPROTO_TCP: - sfe_ipv6_update_tcp_state(c, sic); - break; - } -} - -/* - * sfe_ipv6_update_rule() - * update forwarding rule after rule is created. - */ -void sfe_ipv6_update_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv6_connection *c; - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - - c = sfe_ipv6_find_connection(si, - sic->protocol, - sic->src_ip.ip6, - sic->src_port, - sic->dest_ip.ip6, - sic->dest_port); - if (c != NULL) { - sfe_ipv6_update_protocol_state(c, sic); - } - - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv6_create_rule() - * Create a forwarding rule. - */ -int sfe_ipv6_create_rule(struct sfe_connection_create *sic) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - struct net_device *dest_dev; - struct net_device *src_dev; - - dest_dev = sic->dest_dev; - src_dev = sic->src_dev; - - if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || - (src_dev->reg_state != NETREG_REGISTERED))) { - return -EINVAL; - } - - spin_lock_bh(&si->lock); - si->connection_create_requests++; - - /* - * Check to see if there is already a flow that matches the rule we're - * trying to create. If there is then we can't create a new one. - */ - c = sfe_ipv6_find_connection(si, - sic->protocol, - sic->src_ip.ip6, - sic->src_port, - sic->dest_ip.ip6, - sic->dest_port); - if (c != NULL) { - si->connection_create_collisions++; - - /* - * If we already have the flow then it's likely that this - * request to create the connection rule contains more - * up-to-date information. Check and update accordingly. - */ - sfe_ipv6_update_protocol_state(c, sic); - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" - " s: %s:%pxM:%pI6:%u, d: %s:%pxM:%pI6:%u\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_ip.ip6, ntohs(sic->src_port), - sic->dest_dev->name, sic->dest_mac, sic->dest_ip.ip6, ntohs(sic->dest_port)); - return -EADDRINUSE; - } - - /* - * Allocate the various connection tracking objects. - */ - c = (struct sfe_ipv6_connection *)kmalloc(sizeof(struct sfe_ipv6_connection), GFP_ATOMIC); - if (unlikely(!c)) { - spin_unlock_bh(&si->lock); - return -ENOMEM; - } - - original_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); - if (unlikely(!original_cm)) { - spin_unlock_bh(&si->lock); - kfree(c); - return -ENOMEM; - } - - reply_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); - if (unlikely(!reply_cm)) { - spin_unlock_bh(&si->lock); - kfree(original_cm); - kfree(c); - return -ENOMEM; - } - - /* - * Fill in the "original" direction connection matching object. - * Note that the transmit MAC address is "dest_mac_xlate" because - * we always know both ends of a connection by their translated - * addresses and not their public addresses. - */ - original_cm->match_dev = src_dev; - original_cm->match_protocol = sic->protocol; - original_cm->match_src_ip[0] = sic->src_ip.ip6[0]; - original_cm->match_src_port = sic->src_port; - original_cm->match_dest_ip[0] = sic->dest_ip.ip6[0]; - original_cm->match_dest_port = sic->dest_port; - original_cm->xlate_src_ip[0] = sic->src_ip_xlate.ip6[0]; - original_cm->xlate_src_port = sic->src_port_xlate; - original_cm->xlate_dest_ip[0] = sic->dest_ip_xlate.ip6[0]; - original_cm->xlate_dest_port = sic->dest_port_xlate; - original_cm->rx_packet_count = 0; - original_cm->rx_packet_count64 = 0; - original_cm->rx_byte_count = 0; - original_cm->rx_byte_count64 = 0; - original_cm->xmit_dev = dest_dev; - original_cm->xmit_dev_mtu = sic->dest_mtu; - memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); - memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); - original_cm->connection = c; - original_cm->counter_match = reply_cm; - original_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - original_cm->priority = sic->src_priority; - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT; - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - original_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - original_cm->flow_accel = sic->original_accel; -#endif - original_cm->active_next = NULL; - original_cm->active_prev = NULL; - original_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(dest_dev->flags & IFF_POINTOPOINT)) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (dest_dev->header_ops) { - if (dest_dev->header_ops->create == eth_header) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - /* - * Fill in the "reply" direction connection matching object. - */ - reply_cm->match_dev = dest_dev; - reply_cm->match_protocol = sic->protocol; - reply_cm->match_src_ip[0] = sic->dest_ip_xlate.ip6[0]; - reply_cm->match_src_port = sic->dest_port_xlate; - reply_cm->match_dest_ip[0] = sic->src_ip_xlate.ip6[0]; - reply_cm->match_dest_port = sic->src_port_xlate; - reply_cm->xlate_src_ip[0] = sic->dest_ip.ip6[0]; - reply_cm->xlate_src_port = sic->dest_port; - reply_cm->xlate_dest_ip[0] = sic->src_ip.ip6[0]; - reply_cm->xlate_dest_port = sic->src_port; - reply_cm->rx_packet_count = 0; - reply_cm->rx_packet_count64 = 0; - reply_cm->rx_byte_count = 0; - reply_cm->rx_byte_count64 = 0; - reply_cm->xmit_dev = src_dev; - reply_cm->xmit_dev_mtu = sic->src_mtu; - memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); - memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); - reply_cm->connection = c; - reply_cm->counter_match = original_cm; - reply_cm->flags = 0; - if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { - reply_cm->priority = sic->dest_priority; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; - } - if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { - reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; - } -#ifdef CONFIG_NF_FLOW_COOKIE - reply_cm->flow_cookie = 0; -#endif -#ifdef CONFIG_XFRM - reply_cm->flow_accel = sic->reply_accel; -#endif - reply_cm->active_next = NULL; - reply_cm->active_prev = NULL; - reply_cm->active = false; - - /* - * For PPP links we don't write an L2 header. For everything else we do. - */ - if (!(src_dev->flags & IFF_POINTOPOINT)) { - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; - - /* - * If our dev writes Ethernet headers then we can write a really fast - * version. - */ - if (src_dev->header_ops) { - if (src_dev->header_ops->create == eth_header) { - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; - } - } - } - - - if (!sfe_ipv6_addr_equal(sic->dest_ip.ip6, sic->dest_ip_xlate.ip6) || sic->dest_port != sic->dest_port_xlate) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; - } - - if (!sfe_ipv6_addr_equal(sic->src_ip.ip6, sic->src_ip_xlate.ip6) || sic->src_port != sic->src_port_xlate) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; - } - - c->protocol = sic->protocol; - c->src_ip[0] = sic->src_ip.ip6[0]; - c->src_ip_xlate[0] = sic->src_ip_xlate.ip6[0]; - c->src_port = sic->src_port; - c->src_port_xlate = sic->src_port_xlate; - c->original_dev = src_dev; - c->original_match = original_cm; - c->dest_ip[0] = sic->dest_ip.ip6[0]; - c->dest_ip_xlate[0] = sic->dest_ip_xlate.ip6[0]; - c->dest_port = sic->dest_port; - c->dest_port_xlate = sic->dest_port_xlate; - c->reply_dev = dest_dev; - c->reply_match = reply_cm; - c->mark = sic->mark; - c->debug_read_seq = 0; - c->last_sync_jiffies = get_jiffies_64(); - - /* - * Take hold of our source and dest devices for the duration of the connection. - */ - dev_hold(c->original_dev); - dev_hold(c->reply_dev); - - /* - * Initialize the protocol-specific information that we track. - */ - switch (sic->protocol) { - case IPPROTO_TCP: - original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; - original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; - original_cm->protocol_state.tcp.end = sic->src_td_end; - original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; - reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; - reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; - reply_cm->protocol_state.tcp.end = sic->dest_td_end; - reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; - if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { - original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; - } - break; - } - - sfe_ipv6_connection_match_compute_translations(original_cm); - sfe_ipv6_connection_match_compute_translations(reply_cm); - sfe_ipv6_insert_connection(si, c); - - spin_unlock_bh(&si->lock); - - /* - * We have everything we need! - */ - DEBUG_INFO("new connection - mark: %08x, p: %d\n" - " s: %s:%pxM(%pxM):%pI6(%pI6):%u(%u)\n" - " d: %s:%pxM(%pxM):%pI6(%pI6):%u(%u)\n", - sic->mark, sic->protocol, - sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, - sic->src_ip.ip6, sic->src_ip_xlate.ip6, ntohs(sic->src_port), ntohs(sic->src_port_xlate), - dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, - sic->dest_ip.ip6, sic->dest_ip_xlate.ip6, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); - - return 0; -} - -/* - * sfe_ipv6_destroy_rule() - * Destroy a forwarding rule. - */ -void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - - spin_lock_bh(&si->lock); - si->connection_destroy_requests++; - - /* - * Check to see if we have a flow that matches the rule we're trying - * to destroy. If there isn't then we can't destroy it. - */ - c = sfe_ipv6_find_connection(si, sid->protocol, sid->src_ip.ip6, sid->src_port, - sid->dest_ip.ip6, sid->dest_port); - if (!c) { - si->connection_destroy_misses++; - spin_unlock_bh(&si->lock); - - DEBUG_TRACE("connection does not exist - p: %d, s: %pI6:%u, d: %pI6:%u\n", - sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), - sid->dest_ip.ip6, ntohs(sid->dest_port)); - return; - } - - /* - * Remove our connection details from the hash tables. - */ - sfe_ipv6_remove_connection(si, c); - spin_unlock_bh(&si->lock); - - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); - - DEBUG_INFO("connection destroyed - p: %d, s: %pI6:%u, d: %pI6:%u\n", - sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), - sid->dest_ip.ip6, ntohs(sid->dest_port)); -} - -/* - * sfe_ipv6_register_sync_rule_callback() - * Register a callback for rule synchronization. - */ -void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) -{ - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); - spin_unlock_bh(&si->lock); -} - -/* - * sfe_ipv6_get_debug_dev() - */ -static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv6 *si = &__si6; - ssize_t count; - int num; - - spin_lock_bh(&si->lock); - num = si->debug_dev; - spin_unlock_bh(&si->lock); - - count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); - return count; -} - -/* - * sfe_ipv6_destroy_all_rules_for_dev() - * Destroy all connections that match a particular device. - * - * If we pass dev as NULL then this destroys all connections. - */ -void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) -{ - struct sfe_ipv6 *si = &__si6; - struct sfe_ipv6_connection *c; - -another_round: - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - /* - * Does this connection relate to the device we are destroying? - */ - if (!dev - || (dev == c->original_dev) - || (dev == c->reply_dev)) { - break; - } - } - - if (c) { - sfe_ipv6_remove_connection(si, c); - } - - spin_unlock_bh(&si->lock); - - if (c) { - sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); - goto another_round; - } -} - -/* - * sfe_ipv6_periodic_sync() - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) -static void sfe_ipv6_periodic_sync(unsigned long arg) -#else -static void sfe_ipv6_periodic_sync(struct timer_list *tl) -#endif -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg; -#else - struct sfe_ipv6 *si = from_timer(si, tl, timer); -#endif - u64 now_jiffies; - int quota; - sfe_sync_rule_callback_t sync_rule_callback; - - now_jiffies = get_jiffies_64(); - - rcu_read_lock(); - sync_rule_callback = rcu_dereference(si->sync_rule_callback); - if (!sync_rule_callback) { - rcu_read_unlock(); - goto done; - } - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - /* - * Get an estimate of the number of connections to parse in this sync. - */ - quota = (si->num_connections + 63) / 64; - - /* - * Walk the "active" list and sync the connection state. - */ - while (quota--) { - struct sfe_ipv6_connection_match *cm; - struct sfe_ipv6_connection_match *counter_cm; - struct sfe_ipv6_connection *c; - struct sfe_connection_sync sis; - - cm = si->active_head; - if (!cm) { - break; - } - - /* - * There's a possibility that our counter match is in the active list too. - * If it is then remove it. - */ - counter_cm = cm->counter_match; - if (counter_cm->active) { - counter_cm->active = false; - - /* - * We must have a connection preceding this counter match - * because that's the one that got us to this point, so we don't have - * to worry about removing the head of the list. - */ - counter_cm->active_prev->active_next = counter_cm->active_next; - - if (likely(counter_cm->active_next)) { - counter_cm->active_next->active_prev = counter_cm->active_prev; - } else { - si->active_tail = counter_cm->active_prev; - } - - counter_cm->active_next = NULL; - counter_cm->active_prev = NULL; - } - - /* - * Now remove the head of the active scan list. - */ - cm->active = false; - si->active_head = cm->active_next; - if (likely(cm->active_next)) { - cm->active_next->active_prev = NULL; - } else { - si->active_tail = NULL; - } - cm->active_next = NULL; - - /* - * Sync the connection state. - */ - c = cm->connection; - sfe_ipv6_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); - - /* - * We don't want to be holding the lock when we sync! - */ - spin_unlock_bh(&si->lock); - sync_rule_callback(&sis); - spin_lock_bh(&si->lock); - } - - spin_unlock_bh(&si->lock); - rcu_read_unlock(); - -done: - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); -} - -/* - * sfe_ipv6_debug_dev_read_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - si->debug_read_seq++; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_connection() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - struct sfe_ipv6_connection *c; - struct sfe_ipv6_connection_match *original_cm; - struct sfe_ipv6_connection_match *reply_cm; - int bytes_read; - int protocol; - struct net_device *src_dev; - struct sfe_ipv6_addr src_ip; - struct sfe_ipv6_addr src_ip_xlate; - __be16 src_port; - __be16 src_port_xlate; - u64 src_rx_packets; - u64 src_rx_bytes; - struct net_device *dest_dev; - struct sfe_ipv6_addr dest_ip; - struct sfe_ipv6_addr dest_ip_xlate; - __be16 dest_port; - __be16 dest_port_xlate; - u64 dest_rx_packets; - u64 dest_rx_bytes; - u64 last_sync_jiffies; - u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; -#ifdef CONFIG_NF_FLOW_COOKIE - int src_flow_cookie, dst_flow_cookie; -#endif - - spin_lock_bh(&si->lock); - - for (c = si->all_connections_head; c; c = c->all_connections_next) { - if (c->debug_read_seq < si->debug_read_seq) { - c->debug_read_seq = si->debug_read_seq; - break; - } - } - - /* - * If there were no connections then move to the next state. - */ - if (!c) { - spin_unlock_bh(&si->lock); - ws->state++; - return true; - } - - original_cm = c->original_match; - reply_cm = c->reply_match; - - protocol = c->protocol; - src_dev = c->original_dev; - src_ip = c->src_ip[0]; - src_ip_xlate = c->src_ip_xlate[0]; - src_port = c->src_port; - src_port_xlate = c->src_port_xlate; - src_priority = original_cm->priority; - src_dscp = original_cm->dscp >> SFE_IPV6_DSCP_SHIFT; - - sfe_ipv6_connection_match_update_summary_stats(original_cm); - sfe_ipv6_connection_match_update_summary_stats(reply_cm); - - src_rx_packets = original_cm->rx_packet_count64; - src_rx_bytes = original_cm->rx_byte_count64; - dest_dev = c->reply_dev; - dest_ip = c->dest_ip[0]; - dest_ip_xlate = c->dest_ip_xlate[0]; - dest_port = c->dest_port; - dest_port_xlate = c->dest_port_xlate; - dest_priority = reply_cm->priority; - dest_dscp = reply_cm->dscp >> SFE_IPV6_DSCP_SHIFT; - dest_rx_packets = reply_cm->rx_packet_count64; - dest_rx_bytes = reply_cm->rx_byte_count64; - last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; - mark = c->mark; -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie = original_cm->flow_cookie; - dst_flow_cookie = reply_cm->flow_cookie; -#endif - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", - protocol, - src_dev->name, - &src_ip, &src_ip_xlate, - ntohs(src_port), ntohs(src_port_xlate), - src_priority, src_dscp, - src_rx_packets, src_rx_bytes, - dest_dev->name, - &dest_ip, &dest_ip_xlate, - ntohs(dest_port), ntohs(dest_port_xlate), - dest_priority, dest_dscp, - dest_rx_packets, dest_rx_bytes, -#ifdef CONFIG_NF_FLOW_COOKIE - src_flow_cookie, dst_flow_cookie, -#endif - last_sync_jiffies, mark); - - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - return true; -} - -/* - * sfe_ipv6_debug_dev_read_connections_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_connections_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_start() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_exception() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_exception(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - u64 ct; - - spin_lock_bh(&si->lock); - ct = si->exception_events64[ws->iter_exception]; - spin_unlock_bh(&si->lock); - - if (ct) { - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, - "\t\t\n", - sfe_ipv6_exception_events_string[ws->iter_exception], - ct); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - } - - ws->iter_exception++; - if (ws->iter_exception >= SFE_IPV6_EXCEPTION_EVENT_LAST) { - ws->iter_exception = 0; - ws->state++; - } - - return true; -} - -/* - * sfe_ipv6_debug_dev_read_exceptions_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_exceptions_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_stats() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_stats(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - unsigned int num_connections; - u64 packets_forwarded; - u64 packets_not_forwarded; - u64 connection_create_requests; - u64 connection_create_collisions; - u64 connection_destroy_requests; - u64 connection_destroy_misses; - u64 connection_flushes; - u64 connection_match_hash_hits; - u64 connection_match_hash_reorders; - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - num_connections = si->num_connections; - packets_forwarded = si->packets_forwarded64; - packets_not_forwarded = si->packets_not_forwarded64; - connection_create_requests = si->connection_create_requests64; - connection_create_collisions = si->connection_create_collisions64; - connection_destroy_requests = si->connection_destroy_requests64; - connection_destroy_misses = si->connection_destroy_misses64; - connection_flushes = si->connection_flushes64; - connection_match_hash_hits = si->connection_match_hash_hits64; - connection_match_hash_reorders = si->connection_match_hash_reorders64; - spin_unlock_bh(&si->lock); - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", - num_connections, - packets_forwarded, - packets_not_forwarded, - connection_create_requests, - connection_create_collisions, - connection_destroy_requests, - connection_destroy_misses, - connection_flushes, - connection_match_hash_hits, - connection_match_hash_reorders); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * sfe_ipv6_debug_dev_read_end() - * Generate part of the XML output. - */ -static bool sfe_ipv6_debug_dev_read_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, - int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) -{ - int bytes_read; - - bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); - if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { - return false; - } - - *length -= bytes_read; - *total_read += bytes_read; - - ws->state++; - return true; -} - -/* - * Array of write functions that write various XML elements that correspond to - * our XML output state machine. - */ -static sfe_ipv6_debug_xml_write_method_t sfe_ipv6_debug_xml_write_methods[SFE_IPV6_DEBUG_XML_STATE_DONE] = { - sfe_ipv6_debug_dev_read_start, - sfe_ipv6_debug_dev_read_connections_start, - sfe_ipv6_debug_dev_read_connections_connection, - sfe_ipv6_debug_dev_read_connections_end, - sfe_ipv6_debug_dev_read_exceptions_start, - sfe_ipv6_debug_dev_read_exceptions_exception, - sfe_ipv6_debug_dev_read_exceptions_end, - sfe_ipv6_debug_dev_read_stats, - sfe_ipv6_debug_dev_read_end, -}; - -/* - * sfe_ipv6_debug_dev_read() - * Send info to userspace upon read request from user - */ -static ssize_t sfe_ipv6_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) -{ - char msg[CHAR_DEV_MSG_SIZE]; - int total_read = 0; - struct sfe_ipv6_debug_xml_write_state *ws; - struct sfe_ipv6 *si = &__si6; - - ws = (struct sfe_ipv6_debug_xml_write_state *)filp->private_data; - while ((ws->state != SFE_IPV6_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { - if ((sfe_ipv6_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { - continue; - } - } - - return total_read; -} - -/* - * sfe_ipv6_debug_dev_write() - * Write to char device resets some stats - */ -static ssize_t sfe_ipv6_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) -{ - struct sfe_ipv6 *si = &__si6; - - spin_lock_bh(&si->lock); - sfe_ipv6_update_summary_stats(si); - - si->packets_forwarded64 = 0; - si->packets_not_forwarded64 = 0; - si->connection_create_requests64 = 0; - si->connection_create_collisions64 = 0; - si->connection_destroy_requests64 = 0; - si->connection_destroy_misses64 = 0; - si->connection_flushes64 = 0; - si->connection_match_hash_hits64 = 0; - si->connection_match_hash_reorders64 = 0; - spin_unlock_bh(&si->lock); - - return length; -} - -/* - * sfe_ipv6_debug_dev_open() - */ -static int sfe_ipv6_debug_dev_open(struct inode *inode, struct file *file) -{ - struct sfe_ipv6_debug_xml_write_state *ws; - - ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; - if (ws) { - return 0; - } - - ws = kzalloc(sizeof(struct sfe_ipv6_debug_xml_write_state), GFP_KERNEL); - if (!ws) { - return -ENOMEM; - } - - ws->state = SFE_IPV6_DEBUG_XML_STATE_START; - file->private_data = ws; - - return 0; -} - -/* - * sfe_ipv6_debug_dev_release() - */ -static int sfe_ipv6_debug_dev_release(struct inode *inode, struct file *file) -{ - struct sfe_ipv6_debug_xml_write_state *ws; - - ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; - if (ws) { - /* - * We've finished with our output so free the write state. - */ - kfree(ws); - } - - return 0; -} - -/* - * File operations used in the debug char device - */ -static struct file_operations sfe_ipv6_debug_dev_fops = { - .read = sfe_ipv6_debug_dev_read, - .write = sfe_ipv6_debug_dev_write, - .open = sfe_ipv6_debug_dev_open, - .release = sfe_ipv6_debug_dev_release -}; - -#ifdef CONFIG_NF_FLOW_COOKIE -/* - * sfe_ipv6_register_flow_cookie_cb - * register a function in SFE to let SFE use this function to configure flow cookie for a flow - * - * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE - * can use this function to configure flow cookie for a flow. - * return: 0, success; !=0, fail - */ -int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) -{ - struct sfe_ipv6 *si = &__si6; - - BUG_ON(!cb); - - if (si->flow_cookie_set_func) { - return -1; - } - - rcu_assign_pointer(si->flow_cookie_set_func, cb); - return 0; -} - -/* - * sfe_ipv6_unregister_flow_cookie_cb - * unregister function which is used to configure flow cookie for a flow - * - * return: 0, success; !=0, fail - */ -int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) -{ - struct sfe_ipv6 *si = &__si6; - - RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); - return 0; -} - -/* - * sfe_ipv6_get_flow_cookie() - */ -static ssize_t sfe_ipv6_get_flow_cookie(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct sfe_ipv6 *si = &__si6; - return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); -} - -/* - * sfe_ipv6_set_flow_cookie() - */ -static ssize_t sfe_ipv6_set_flow_cookie(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct sfe_ipv6 *si = &__si6; - strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); - - return size; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_ipv6_flow_cookie_attr = - __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv6_get_flow_cookie, sfe_ipv6_set_flow_cookie); -#endif /*CONFIG_NF_FLOW_COOKIE*/ - -/* - * sfe_ipv6_init() - */ -static int __init sfe_ipv6_init(void) -{ - struct sfe_ipv6 *si = &__si6; - int result = -1; - - DEBUG_INFO("SFE IPv6 init\n"); - - /* - * Create sys/sfe_ipv6 - */ - si->sys_sfe_ipv6 = kobject_create_and_add("sfe_ipv6", NULL); - if (!si->sys_sfe_ipv6) { - DEBUG_ERROR("failed to register sfe_ipv6\n"); - goto exit1; - } - - /* - * Create files, one for each parameter supported by this module. - */ - result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - if (result) { - DEBUG_ERROR("failed to register debug dev file: %d\n", result); - goto exit2; - } - -#ifdef CONFIG_NF_FLOW_COOKIE - result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); - if (result) { - DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); - goto exit3; - } -#endif /* CONFIG_NF_FLOW_COOKIE */ - - /* - * Register our debug char device. - */ - result = register_chrdev(0, "sfe_ipv6", &sfe_ipv6_debug_dev_fops); - if (result < 0) { - DEBUG_ERROR("Failed to register chrdev: %d\n", result); - goto exit4; - } - - si->debug_dev = result; - - /* - * Create a timer to handle periodic statistics. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)) - setup_timer(&si->timer, sfe_ipv6_periodic_sync, (unsigned long)si); -#else - timer_setup(&si->timer, sfe_ipv6_periodic_sync, 0); -#endif - mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); - - spin_lock_init(&si->lock); - - return 0; - -exit4: -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); - -exit3: -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - -exit2: - kobject_put(si->sys_sfe_ipv6); - -exit1: - return result; -} - -/* - * sfe_ipv6_exit() - */ -static void __exit sfe_ipv6_exit(void) -{ - struct sfe_ipv6 *si = &__si6; - - DEBUG_INFO("SFE IPv6 exit\n"); - - /* - * Destroy all connections. - */ - sfe_ipv6_destroy_all_rules_for_dev(NULL); - - del_timer_sync(&si->timer); - - unregister_chrdev(si->debug_dev, "sfe_ipv6"); - -#ifdef CONFIG_NF_FLOW_COOKIE - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); -#endif /* CONFIG_NF_FLOW_COOKIE */ - sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); - - kobject_put(si->sys_sfe_ipv6); -} - -module_init(sfe_ipv6_init) -module_exit(sfe_ipv6_exit) - -EXPORT_SYMBOL(sfe_ipv6_recv); -EXPORT_SYMBOL(sfe_ipv6_create_rule); -EXPORT_SYMBOL(sfe_ipv6_destroy_rule); -EXPORT_SYMBOL(sfe_ipv6_destroy_all_rules_for_dev); -EXPORT_SYMBOL(sfe_ipv6_register_sync_rule_callback); -EXPORT_SYMBOL(sfe_ipv6_mark_rule); -EXPORT_SYMBOL(sfe_ipv6_update_rule); -#ifdef CONFIG_NF_FLOW_COOKIE -EXPORT_SYMBOL(sfe_ipv6_register_flow_cookie_cb); -EXPORT_SYMBOL(sfe_ipv6_unregister_flow_cookie_cb); -#endif - -MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv6 support"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/simulated-driver/sfe_drv.c b/shortcut-fe/simulated-driver/sfe_drv.c deleted file mode 100644 index 98677d911..000000000 --- a/shortcut-fe/simulated-driver/sfe_drv.c +++ /dev/null @@ -1,1323 +0,0 @@ -/* - * sfe_drv.c - * simulated sfe driver for shortcut forwarding engine. - * - * Copyright (c) 2015,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "../shortcut-fe/sfe.h" -#include "../shortcut-fe/sfe_cm.h" -#include "sfe_drv.h" - -typedef enum sfe_drv_exception { - SFE_DRV_EXCEPTION_IPV4_MSG_UNKNOW, - SFE_DRV_EXCEPTION_IPV6_MSG_UNKNOW, - SFE_DRV_EXCEPTION_CONNECTION_INVALID, - SFE_DRV_EXCEPTION_NOT_SUPPORT_BRIDGE, - SFE_DRV_EXCEPTION_TCP_INVALID, - SFE_DRV_EXCEPTION_PROTOCOL_NOT_SUPPORT, - SFE_DRV_EXCEPTION_SRC_DEV_NOT_L3, - SFE_DRV_EXCEPTION_DEST_DEV_NOT_L3, - SFE_DRV_EXCEPTION_CREATE_FAILED, - SFE_DRV_EXCEPTION_ENQUEUE_FAILED, - SFE_DRV_EXCEPTION_NOT_SUPPORT_6RD, - SFE_DRV_EXCEPTION_NO_SYNC_CB, - SFE_DRV_EXCEPTION_MAX -} sfe_drv_exception_t; - -static char *sfe_drv_exception_events_string[SFE_DRV_EXCEPTION_MAX] = { - "IPV4_MSG_UNKNOW", - "IPV6_MSG_UNKNOW", - "CONNECTION_INVALID", - "NOT_SUPPORT_BRIDGE", - "TCP_INVALID", - "PROTOCOL_NOT_SUPPORT", - "SRC_DEV_NOT_L3", - "DEST_DEV_NOT_L3", - "CREATE_FAILED", - "ENQUEUE_FAILED", - "NOT_SUPPORT_6RD", - "NO_SYNC_CB" -}; - -#define SFE_MESSAGE_VERSION 0x1 -#define SFE_MAX_CONNECTION_NUM 65535 -#define sfe_drv_ipv6_addr_copy(src, dest) memcpy((void *)(dest), (void *)(src), 16) -#define sfe_drv_ipv4_stopped(CTX) (rcu_dereference((CTX)->ipv4_stats_sync_cb) == NULL) -#define sfe_drv_ipv6_stopped(CTX) (rcu_dereference((CTX)->ipv6_stats_sync_cb) == NULL) - -/* - * message type of queued response message - */ -typedef enum { - SFE_DRV_MSG_TYPE_IPV4, - SFE_DRV_MSG_TYPE_IPV6 -} sfe_drv_msg_types_t; - -/* - * queued response message, - * will be sent back to caller in workqueue - */ -struct sfe_drv_response_msg { - struct list_head node; - sfe_drv_msg_types_t type; - void *msg[0]; -}; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define list_first_entry_or_null(ptr, type, member) \ - (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) -#endif - -/* - * sfe driver context instance, private for sfe driver - */ -struct sfe_drv_ctx_instance_internal { - struct sfe_drv_ctx_instance base;/* exported sfe driver context, is public to user of sfe driver*/ - - /* - * Control state. - */ - struct kobject *sys_sfe_drv; /* sysfs linkage */ - - struct list_head msg_queue; /* response message queue*/ - spinlock_t lock; /* Lock to protect message queue */ - - struct work_struct work; /* work to send response message back to caller*/ - - sfe_ipv4_msg_callback_t __rcu ipv4_stats_sync_cb; /* callback to call to sync ipv4 statistics */ - void *ipv4_stats_sync_data; /* argument for above callback: ipv4_stats_sync_cb */ - - sfe_ipv6_msg_callback_t __rcu ipv6_stats_sync_cb; /* callback to call to sync ipv6 statistics */ - void *ipv6_stats_sync_data; /* argument for above callback: ipv6_stats_sync_cb */ - - u32 exceptions[SFE_DRV_EXCEPTION_MAX]; /* statistics for exception */ -}; - -static struct sfe_drv_ctx_instance_internal __sfe_drv_ctx; - -/* - * convert public sfe driver context to internal context - */ -#define SFE_DRV_CTX_TO_PRIVATE(base) (struct sfe_drv_ctx_instance_internal *)(base) -/* - * convert internal sfe driver context to public context - */ -#define SFE_DRV_CTX_TO_PUBLIC(intrv) (struct sfe_drv_ctx_instance *)(intrv) - -/* - * sfe_drv_incr_exceptions() - * increase an exception counter. - */ -static inline void sfe_drv_incr_exceptions(sfe_drv_exception_t except) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - sfe_drv_ctx->exceptions[except]++; - spin_unlock_bh(&sfe_drv_ctx->lock); -} - -/* - * sfe_drv_dev_is_layer_3_interface() - * check if a network device is ipv4 or ipv6 layer 3 interface - * - * @param dev network device to check - * @param check_v4 check ipv4 layer 3 interface(which have ipv4 address) or ipv6 layer 3 interface(which have ipv6 address) - */ -inline bool sfe_drv_dev_is_layer_3_interface(struct net_device *dev, bool check_v4) -{ - struct in_device *in4_dev; - struct inet6_dev *in6_dev; - - BUG_ON(!dev); - - if (likely(check_v4)) { - /* - * Does our input device support IPv4 processing? - */ - in4_dev = (struct in_device *)dev->ip_ptr; - if (unlikely(!in4_dev)) { - return false; - } - - /* - * Does it have an IPv4 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(!in4_dev->ifa_list)) { - return false; - } - - return true; - } - - /* - * Does our input device support IPv6 processing? - */ - in6_dev = (struct inet6_dev *)dev->ip6_ptr; - if (unlikely(!in6_dev)) { - return false; - } - - /* - * Does it have an IPv6 address? If it doesn't then we can't do anything - * interesting here! - */ - if (unlikely(list_empty(&in6_dev->addr_list))) { - return false; - } - - return true; -} - -/* - * sfe_drv_clean_response_msg_by_type() - * clean response message in queue when ECM exit - * - * @param sfe_drv_ctx sfe driver context - * @param msg_type message type, ipv4 or ipv6 - */ -static void sfe_drv_clean_response_msg_by_type(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, sfe_drv_msg_types_t msg_type) -{ - struct sfe_drv_response_msg *response, *tmp; - - if (!sfe_drv_ctx) { - return; - } - - spin_lock_bh(&sfe_drv_ctx->lock); - list_for_each_entry_safe(response, tmp, &sfe_drv_ctx->msg_queue, node) { - if (response->type == msg_type) { - list_del(&response->node); - /* - * free response message - */ - kfree(response); - } - } - spin_unlock_bh(&sfe_drv_ctx->lock); - -} - -/* - * sfe_drv_process_response_msg() - * send all pending response message to ECM by calling callback function included in message - * - * @param work work structure - */ -static void sfe_drv_process_response_msg(struct work_struct *work) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = container_of(work, struct sfe_drv_ctx_instance_internal, work); - struct sfe_drv_response_msg *response; - - spin_lock_bh(&sfe_drv_ctx->lock); - while ((response = list_first_entry_or_null(&sfe_drv_ctx->msg_queue, struct sfe_drv_response_msg, node))) { - list_del(&response->node); - spin_unlock_bh(&sfe_drv_ctx->lock); - rcu_read_lock(); - - /* - * send response message back to caller - */ - if ((response->type == SFE_DRV_MSG_TYPE_IPV4) && !sfe_drv_ipv4_stopped(sfe_drv_ctx)) { - struct sfe_ipv4_msg *msg = (struct sfe_ipv4_msg *)response->msg; - sfe_ipv4_msg_callback_t callback = (sfe_ipv4_msg_callback_t)msg->cm.cb; - if (callback) { - callback((void *)msg->cm.app_data, msg); - } - } else if ((response->type == SFE_DRV_MSG_TYPE_IPV6) && !sfe_drv_ipv6_stopped(sfe_drv_ctx)) { - struct sfe_ipv6_msg *msg = (struct sfe_ipv6_msg *)response->msg; - sfe_ipv6_msg_callback_t callback = (sfe_ipv6_msg_callback_t)msg->cm.cb; - if (callback) { - callback((void *)msg->cm.app_data, msg); - } - } - - rcu_read_unlock(); - /* - * free response message - */ - kfree(response); - spin_lock_bh(&sfe_drv_ctx->lock); - } - spin_unlock_bh(&sfe_drv_ctx->lock); -} - -/* - * sfe_drv_alloc_response_msg() - * alloc and construct new response message - * - * @param type message type - * @param msg used to construct response message if not NULL - * - * @return !NULL, success; NULL, failed - */ -static struct sfe_drv_response_msg * -sfe_drv_alloc_response_msg(sfe_drv_msg_types_t type, void *msg) -{ - struct sfe_drv_response_msg *response; - int size; - - switch (type) { - case SFE_DRV_MSG_TYPE_IPV4: - size = sizeof(struct sfe_ipv4_msg); - break; - case SFE_DRV_MSG_TYPE_IPV6: - size = sizeof(struct sfe_ipv6_msg); - break; - default: - DEBUG_ERROR("message type %d not supported\n", type); - return NULL; - } - - response = (struct sfe_drv_response_msg *)kzalloc(sizeof(struct sfe_drv_response_msg) + size, GFP_ATOMIC); - if (!response) { - DEBUG_ERROR("allocate memory failed\n"); - return NULL; - } - - response->type = type; - - if (msg) { - memcpy(response->msg, msg, size); - } - - return response; -} - -/* - * sfe_drv_enqueue_msg() - * queue response message - * - * @param sfe_drv_ctx sfe driver context - * @param response response message to be queue - */ -static inline void sfe_drv_enqueue_msg(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, struct sfe_drv_response_msg *response) -{ - spin_lock_bh(&sfe_drv_ctx->lock); - list_add_tail(&response->node, &sfe_drv_ctx->msg_queue); - spin_unlock_bh(&sfe_drv_ctx->lock); - - schedule_work(&sfe_drv_ctx->work); -} - -/* - * sfe_cmn_msg_init() - * Initialize the common message structure. - * - * @param ncm message to init - * @param if_num interface number related with this message - * @param type message type - * @param cb callback function to process repsonse of this message - * @param app_data argument for above callback function - */ -static void sfe_cmn_msg_init(struct sfe_cmn_msg *ncm, u16 if_num, u32 type, u32 len, void *cb, void *app_data) -{ - ncm->interface = if_num; - ncm->version = SFE_MESSAGE_VERSION; - ncm->type = type; - ncm->len = len; - ncm->cb = (sfe_ptr_t)cb; - ncm->app_data = (sfe_ptr_t)app_data; -} - -/* - * sfe_drv_ipv4_stats_sync_callback() - * Synchronize a connection's state. - * - * @param sis SFE statistics from SFE core engine - */ -static void sfe_drv_ipv4_stats_sync_callback(struct sfe_connection_sync *sis) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - struct sfe_ipv4_msg msg; - struct sfe_ipv4_conn_sync *sync_msg; - sfe_ipv4_msg_callback_t sync_cb; - - rcu_read_lock(); - sync_cb = rcu_dereference(sfe_drv_ctx->ipv4_stats_sync_cb); - if (!sync_cb) { - rcu_read_unlock(); - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_NO_SYNC_CB); - return; - } - - sync_msg = &msg.msg.conn_stats; - - memset(&msg, 0, sizeof(msg)); - sfe_cmn_msg_init(&msg.cm, 0, SFE_RX_CONN_STATS_SYNC_MSG, - sizeof(struct sfe_ipv4_conn_sync), NULL, NULL); - - /* - * fill connection specific information - */ - sync_msg->protocol = (u8)sis->protocol; - sync_msg->flow_ip = sis->src_ip.ip; - sync_msg->flow_ip_xlate = sis->src_ip_xlate.ip; - sync_msg->flow_ident = sis->src_port; - sync_msg->flow_ident_xlate = sis->src_port_xlate; - - sync_msg->return_ip = sis->dest_ip.ip; - sync_msg->return_ip_xlate = sis->dest_ip_xlate.ip; - sync_msg->return_ident = sis->dest_port; - sync_msg->return_ident_xlate = sis->dest_port_xlate; - - /* - * fill TCP protocol specific information - */ - if (sis->protocol == IPPROTO_TCP) { - sync_msg->flow_max_window = sis->src_td_max_window; - sync_msg->flow_end = sis->src_td_end; - sync_msg->flow_max_end = sis->src_td_max_end; - - sync_msg->return_max_window = sis->dest_td_max_window; - sync_msg->return_end = sis->dest_td_end; - sync_msg->return_max_end = sis->dest_td_max_end; - } - - /* - * fill statistics information - */ - sync_msg->flow_rx_packet_count = sis->src_new_packet_count; - sync_msg->flow_rx_byte_count = sis->src_new_byte_count; - sync_msg->flow_tx_packet_count = sis->dest_new_packet_count; - sync_msg->flow_tx_byte_count = sis->dest_new_byte_count; - - sync_msg->return_rx_packet_count = sis->dest_new_packet_count; - sync_msg->return_rx_byte_count = sis->dest_new_byte_count; - sync_msg->return_tx_packet_count = sis->src_new_packet_count; - sync_msg->return_tx_byte_count = sis->src_new_byte_count; - - /* - * fill expiration time to extend, in unit of msec - */ - sync_msg->inc_ticks = (((u32)sis->delta_jiffies) * MSEC_PER_SEC)/HZ; - - /* - * fill other information - */ - switch (sis->reason) { - case SFE_SYNC_REASON_DESTROY: - sync_msg->reason = SFE_RULE_SYNC_REASON_DESTROY; - break; - case SFE_SYNC_REASON_FLUSH: - sync_msg->reason = SFE_RULE_SYNC_REASON_FLUSH; - break; - default: - sync_msg->reason = SFE_RULE_SYNC_REASON_STATS; - break; - } - - /* - * SFE sync calling is excuted in a timer, so we can redirect it to ECM directly. - */ - sync_cb(sfe_drv_ctx->ipv4_stats_sync_data, &msg); - rcu_read_unlock(); -} - -/* - * sfe_drv_create_ipv4_rule_msg() - * convert create message format from ecm to sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv4 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_create_ipv4_rule_msg(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, struct sfe_ipv4_msg *msg) -{ - struct sfe_connection_create sic; - struct net_device *src_dev = NULL; - struct net_device *dest_dev = NULL; - struct sfe_drv_response_msg *response; - enum sfe_cmn_response ret; - - response = sfe_drv_alloc_response_msg(SFE_DRV_MSG_TYPE_IPV4, msg); - if (!response) { - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_ENQUEUE_FAILED); - return SFE_TX_FAILURE_QUEUE; - } - - if (!(msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_CONN_VALID)) { - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_CONNECTION_INVALID); - goto failed_ret; - } - - /* - * not support bridged flows now - */ - if (msg->msg.rule_create.rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_NOT_SUPPORT_BRIDGE); - goto failed_ret; - } - - sic.protocol = msg->msg.rule_create.tuple.protocol; - sic.src_ip.ip = msg->msg.rule_create.tuple.flow_ip; - sic.dest_ip.ip = msg->msg.rule_create.tuple.return_ip; - sic.src_ip_xlate.ip = msg->msg.rule_create.conn_rule.flow_ip_xlate; - sic.dest_ip_xlate.ip = msg->msg.rule_create.conn_rule.return_ip_xlate; - - sic.flags = 0; - switch (sic.protocol) { - case IPPROTO_TCP: - if (!(msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_TCP_VALID)) { - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_TCP_INVALID); - goto failed_ret; - } - - sic.src_port = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port = msg->msg.rule_create.tuple.return_ident; - sic.src_port_xlate = msg->msg.rule_create.conn_rule.flow_ident_xlate; - sic.dest_port_xlate = msg->msg.rule_create.conn_rule.return_ident_xlate; - sic.src_td_window_scale = msg->msg.rule_create.tcp_rule.flow_window_scale; - sic.src_td_max_window = msg->msg.rule_create.tcp_rule.flow_max_window; - sic.src_td_end = msg->msg.rule_create.tcp_rule.flow_end; - sic.src_td_max_end = msg->msg.rule_create.tcp_rule.flow_max_end; - sic.dest_td_window_scale = msg->msg.rule_create.tcp_rule.return_window_scale; - sic.dest_td_max_window = msg->msg.rule_create.tcp_rule.return_max_window; - sic.dest_td_end = msg->msg.rule_create.tcp_rule.return_end; - sic.dest_td_max_end = msg->msg.rule_create.tcp_rule.return_max_end; - if (msg->msg.rule_create.rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { - sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - break; - - case IPPROTO_UDP: - sic.src_port = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port = msg->msg.rule_create.tuple.return_ident; - sic.src_port_xlate = msg->msg.rule_create.conn_rule.flow_ident_xlate; - sic.dest_port_xlate = msg->msg.rule_create.conn_rule.return_ident_xlate; - break; - - default: - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_PROTOCOL_NOT_SUPPORT); - goto failed_ret; - } - - memcpy(sic.src_mac, msg->msg.rule_create.conn_rule.flow_mac, ETH_ALEN); - memset(sic.src_mac_xlate, 0, ETH_ALEN); - memset(sic.dest_mac, 0, ETH_ALEN); - memcpy(sic.dest_mac_xlate, msg->msg.rule_create.conn_rule.return_mac, ETH_ALEN); - - /* - * Does our input device support IP processing? - */ - src_dev = dev_get_by_index(&init_net, msg->msg.rule_create.conn_rule.flow_top_interface_num); - if (!src_dev || !sfe_drv_dev_is_layer_3_interface(src_dev, true)) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_SRC_DEV_NOT_L3); - goto failed_ret; - } - - /* - * Does our output device support IP processing? - */ - dest_dev = dev_get_by_index(&init_net, msg->msg.rule_create.conn_rule.return_top_interface_num); - if (!dest_dev || !sfe_drv_dev_is_layer_3_interface(dest_dev, true)) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_DEST_DEV_NOT_L3); - goto failed_ret; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = msg->msg.rule_create.conn_rule.flow_mtu; - sic.dest_mtu = msg->msg.rule_create.conn_rule.return_mtu; - - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_QOS_VALID) { - sic.src_priority = msg->msg.rule_create.qos_rule.flow_qos_tag; - sic.dest_priority = msg->msg.rule_create.qos_rule.return_qos_tag; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { - sic.src_dscp = msg->msg.rule_create.dscp_rule.flow_dscp; - sic.dest_dscp = msg->msg.rule_create.dscp_rule.return_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - -#ifdef CONFIG_XFRM - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { - sic.original_accel = msg->msg.rule_create.direction_rule.flow_accel; - sic.reply_accel = msg->msg.rule_create.direction_rule.return_accel; - } else { - sic.original_accel = sic.reply_accel = 1; - } -#endif - - if (!sfe_ipv4_create_rule(&sic)) { - /* success */ - ret = SFE_CMN_RESPONSE_ACK; - } else { - /* failed */ - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_CREATE_FAILED); - } - - /* - * fall through - */ -failed_ret: - if (src_dev) { - dev_put(src_dev); - } - - if (dest_dev) { - dev_put(dest_dev); - } - - /* - * try to queue response message - */ - ((struct sfe_ipv4_msg *)response->msg)->cm.response = msg->cm.response = ret; - sfe_drv_enqueue_msg(sfe_drv_ctx, response); - - return SFE_TX_SUCCESS; -} - -/* - * sfe_drv_destroy_ipv4_rule_msg() - * convert destroy message format from ecm to sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv4 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_destroy_ipv4_rule_msg(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, struct sfe_ipv4_msg *msg) -{ - struct sfe_connection_destroy sid; - struct sfe_drv_response_msg *response; - - response = sfe_drv_alloc_response_msg(SFE_DRV_MSG_TYPE_IPV4, msg); - if (!response) { - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_ENQUEUE_FAILED); - return SFE_TX_FAILURE_QUEUE; - } - - sid.protocol = msg->msg.rule_destroy.tuple.protocol; - sid.src_ip.ip = msg->msg.rule_destroy.tuple.flow_ip; - sid.dest_ip.ip = msg->msg.rule_destroy.tuple.return_ip; - sid.src_port = msg->msg.rule_destroy.tuple.flow_ident; - sid.dest_port = msg->msg.rule_destroy.tuple.return_ident; - - sfe_ipv4_destroy_rule(&sid); - - /* - * try to queue response message - */ - ((struct sfe_ipv4_msg *)response->msg)->cm.response = msg->cm.response = SFE_CMN_RESPONSE_ACK; - sfe_drv_enqueue_msg(sfe_drv_ctx, response); - - return SFE_TX_SUCCESS; -} - -/* - * sfe_drv_ipv4_tx() - * Transmit an IPv4 message to the sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv4 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_ipv4_tx(struct sfe_drv_ctx_instance *sfe_drv_ctx, struct sfe_ipv4_msg *msg) -{ - switch (msg->cm.type) { - case SFE_TX_CREATE_RULE_MSG: - return sfe_drv_create_ipv4_rule_msg(SFE_DRV_CTX_TO_PRIVATE(sfe_drv_ctx), msg); - case SFE_TX_DESTROY_RULE_MSG: - return sfe_drv_destroy_ipv4_rule_msg(SFE_DRV_CTX_TO_PRIVATE(sfe_drv_ctx), msg); - default: - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_IPV4_MSG_UNKNOW); - return SFE_TX_FAILURE_NOT_ENABLED; - } -} -EXPORT_SYMBOL(sfe_drv_ipv4_tx); - -/* - * sfe_ipv4_msg_init() - * Initialize IPv4 message. - */ -void sfe_ipv4_msg_init(struct sfe_ipv4_msg *nim, u16 if_num, u32 type, u32 len, - sfe_ipv4_msg_callback_t cb, void *app_data) -{ - sfe_cmn_msg_init(&nim->cm, if_num, type, len, (void *)cb, app_data); -} -EXPORT_SYMBOL(sfe_ipv4_msg_init); - -/* - * sfe_drv_ipv4_max_conn_count() - * return maximum number of entries SFE supported - */ -int sfe_drv_ipv4_max_conn_count(void) -{ - return SFE_MAX_CONNECTION_NUM; -} -EXPORT_SYMBOL(sfe_drv_ipv4_max_conn_count); - -/* - * sfe_drv_ipv4_notify_register() - * Register a notifier callback for IPv4 messages from sfe driver - * - * @param cb The callback pointer - * @param app_data The application context for this message - * - * @return struct sfe_drv_ctx_instance * The sfe driver context - */ -struct sfe_drv_ctx_instance *sfe_drv_ipv4_notify_register(sfe_ipv4_msg_callback_t cb, void *app_data) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - /* - * Hook the shortcut sync callback. - */ - if (cb && !sfe_drv_ctx->ipv4_stats_sync_cb) { - sfe_ipv4_register_sync_rule_callback(sfe_drv_ipv4_stats_sync_callback); - } - - rcu_assign_pointer(sfe_drv_ctx->ipv4_stats_sync_cb, cb); - sfe_drv_ctx->ipv4_stats_sync_data = app_data; - - spin_unlock_bh(&sfe_drv_ctx->lock); - - return SFE_DRV_CTX_TO_PUBLIC(sfe_drv_ctx); -} -EXPORT_SYMBOL(sfe_drv_ipv4_notify_register); - -/* - * sfe_drv_ipv4_notify_unregister() - * Un-Register a notifier callback for IPv4 messages from sfe driver - */ -void sfe_drv_ipv4_notify_unregister(void) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - /* - * Unregister our sync callback. - */ - if (sfe_drv_ctx->ipv4_stats_sync_cb) { - sfe_ipv4_register_sync_rule_callback(NULL); - rcu_assign_pointer(sfe_drv_ctx->ipv4_stats_sync_cb, NULL); - sfe_drv_ctx->ipv4_stats_sync_data = NULL; - } - spin_unlock_bh(&sfe_drv_ctx->lock); - - sfe_drv_clean_response_msg_by_type(sfe_drv_ctx, SFE_DRV_MSG_TYPE_IPV4); - - return; -} -EXPORT_SYMBOL(sfe_drv_ipv4_notify_unregister); - -/* - * sfe_drv_ipv6_stats_sync_callback() - * Synchronize a connection's state. - */ -static void sfe_drv_ipv6_stats_sync_callback(struct sfe_connection_sync *sis) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - struct sfe_ipv6_msg msg; - struct sfe_ipv6_conn_sync *sync_msg; - sfe_ipv6_msg_callback_t sync_cb; - - rcu_read_lock(); - sync_cb = rcu_dereference(sfe_drv_ctx->ipv6_stats_sync_cb); - if (!sync_cb) { - rcu_read_unlock(); - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_NO_SYNC_CB); - return; - } - - sync_msg = &msg.msg.conn_stats; - - memset(&msg, 0, sizeof(msg)); - sfe_cmn_msg_init(&msg.cm, 0, SFE_RX_CONN_STATS_SYNC_MSG, - sizeof(struct sfe_ipv6_conn_sync), NULL, NULL); - - /* - * fill connection specific information - */ - sync_msg->protocol = (u8)sis->protocol; - sfe_drv_ipv6_addr_copy(sis->src_ip.ip6, sync_msg->flow_ip); - sync_msg->flow_ident = sis->src_port; - - sfe_drv_ipv6_addr_copy(sis->dest_ip.ip6, sync_msg->return_ip); - sync_msg->return_ident = sis->dest_port; - - /* - * fill TCP protocol specific information - */ - if (sis->protocol == IPPROTO_TCP) { - sync_msg->flow_max_window = sis->src_td_max_window; - sync_msg->flow_end = sis->src_td_end; - sync_msg->flow_max_end = sis->src_td_max_end; - - sync_msg->return_max_window = sis->dest_td_max_window; - sync_msg->return_end = sis->dest_td_end; - sync_msg->return_max_end = sis->dest_td_max_end; - } - - /* - * fill statistics information - */ - sync_msg->flow_rx_packet_count = sis->src_new_packet_count; - sync_msg->flow_rx_byte_count = sis->src_new_byte_count; - sync_msg->flow_tx_packet_count = sis->dest_new_packet_count; - sync_msg->flow_tx_byte_count = sis->dest_new_byte_count; - - sync_msg->return_rx_packet_count = sis->dest_new_packet_count; - sync_msg->return_rx_byte_count = sis->dest_new_byte_count; - sync_msg->return_tx_packet_count = sis->src_new_packet_count; - sync_msg->return_tx_byte_count = sis->src_new_byte_count; - - /* - * fill expiration time to extend, in unit of msec - */ - sync_msg->inc_ticks = (((u32)sis->delta_jiffies) * MSEC_PER_SEC)/HZ; - - /* - * fill other information - */ - switch (sis->reason) { - case SFE_SYNC_REASON_DESTROY: - sync_msg->reason = SFE_RULE_SYNC_REASON_DESTROY; - break; - case SFE_SYNC_REASON_FLUSH: - sync_msg->reason = SFE_RULE_SYNC_REASON_FLUSH; - break; - default: - sync_msg->reason = SFE_RULE_SYNC_REASON_STATS; - break; - } - - /* - * SFE sync calling is excuted in a timer, so we can redirect it to ECM directly. - */ - sync_cb(sfe_drv_ctx->ipv6_stats_sync_data, &msg); - rcu_read_unlock(); -} - -/* - * sfe_drv_create_ipv6_rule_msg() - * convert create message format from ecm to sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv6 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_create_ipv6_rule_msg(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, struct sfe_ipv6_msg *msg) -{ - struct sfe_connection_create sic; - struct net_device *src_dev = NULL; - struct net_device *dest_dev = NULL; - struct sfe_drv_response_msg *response; - enum sfe_cmn_response ret; - - response = sfe_drv_alloc_response_msg(SFE_DRV_MSG_TYPE_IPV6, msg); - if (!response) { - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_ENQUEUE_FAILED); - return SFE_TX_FAILURE_QUEUE; - } - - if (!(msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_CONN_VALID)) { - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_CONNECTION_INVALID); - goto failed_ret; - } - - /* - * not support bridged flows now - */ - if (msg->msg.rule_create.rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_NOT_SUPPORT_BRIDGE); - goto failed_ret; - } - - sic.protocol = msg->msg.rule_create.tuple.protocol; - sfe_drv_ipv6_addr_copy(msg->msg.rule_create.tuple.flow_ip, sic.src_ip.ip6); - sfe_drv_ipv6_addr_copy(msg->msg.rule_create.tuple.return_ip, sic.dest_ip.ip6); - sfe_drv_ipv6_addr_copy(msg->msg.rule_create.tuple.flow_ip, sic.src_ip_xlate.ip6); - sfe_drv_ipv6_addr_copy(msg->msg.rule_create.tuple.return_ip, sic.dest_ip_xlate.ip6); - - sic.flags = 0; - switch (sic.protocol) { - case IPPROTO_TCP: - if (!(msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_TCP_VALID)) { - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_TCP_INVALID); - goto failed_ret; - } - - sic.src_port = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port = msg->msg.rule_create.tuple.return_ident; - sic.src_port_xlate = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port_xlate = msg->msg.rule_create.tuple.return_ident; - sic.src_td_window_scale = msg->msg.rule_create.tcp_rule.flow_window_scale; - sic.src_td_max_window = msg->msg.rule_create.tcp_rule.flow_max_window; - sic.src_td_end = msg->msg.rule_create.tcp_rule.flow_end; - sic.src_td_max_end = msg->msg.rule_create.tcp_rule.flow_max_end; - sic.dest_td_window_scale = msg->msg.rule_create.tcp_rule.return_window_scale; - sic.dest_td_max_window = msg->msg.rule_create.tcp_rule.return_max_window; - sic.dest_td_end = msg->msg.rule_create.tcp_rule.return_end; - sic.dest_td_max_end = msg->msg.rule_create.tcp_rule.return_max_end; - if (msg->msg.rule_create.rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { - sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; - } - break; - - case IPPROTO_UDP: - sic.src_port = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port = msg->msg.rule_create.tuple.return_ident; - sic.src_port_xlate = msg->msg.rule_create.tuple.flow_ident; - sic.dest_port_xlate = msg->msg.rule_create.tuple.return_ident; - break; - - default: - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_PROTOCOL_NOT_SUPPORT); - goto failed_ret; - } - - memcpy(sic.src_mac, msg->msg.rule_create.conn_rule.flow_mac, ETH_ALEN); - memset(sic.src_mac_xlate, 0, ETH_ALEN); - memset(sic.dest_mac, 0, ETH_ALEN); - memcpy(sic.dest_mac_xlate, msg->msg.rule_create.conn_rule.return_mac, ETH_ALEN); - /* - * Does our input device support IP processing? - */ - src_dev = dev_get_by_index(&init_net, msg->msg.rule_create.conn_rule.flow_top_interface_num); - if (!src_dev || !sfe_drv_dev_is_layer_3_interface(src_dev, false)) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_SRC_DEV_NOT_L3); - goto failed_ret; - } - - /* - * Does our output device support IP processing? - */ - dest_dev = dev_get_by_index(&init_net, msg->msg.rule_create.conn_rule.return_top_interface_num); - if (!dest_dev || !sfe_drv_dev_is_layer_3_interface(dest_dev, false)) { - ret = SFE_CMN_RESPONSE_EINTERFACE; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_DEST_DEV_NOT_L3); - goto failed_ret; - } - - sic.src_dev = src_dev; - sic.dest_dev = dest_dev; - - sic.src_mtu = msg->msg.rule_create.conn_rule.flow_mtu; - sic.dest_mtu = msg->msg.rule_create.conn_rule.return_mtu; - - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_QOS_VALID) { - sic.src_priority = msg->msg.rule_create.qos_rule.flow_qos_tag; - sic.dest_priority = msg->msg.rule_create.qos_rule.return_qos_tag; - sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; - } - - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { - sic.src_dscp = msg->msg.rule_create.dscp_rule.flow_dscp; - sic.dest_dscp = msg->msg.rule_create.dscp_rule.return_dscp; - sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; - } - -#ifdef CONFIG_XFRM - if (msg->msg.rule_create.valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { - sic.original_accel = msg->msg.rule_create.direction_rule.flow_accel; - sic.reply_accel = msg->msg.rule_create.direction_rule.return_accel; - } else { - sic.original_accel = sic.reply_accel = 1; - } -#endif - - if (!sfe_ipv6_create_rule(&sic)) { - /* success */ - ret = SFE_CMN_RESPONSE_ACK; - } else { - /* failed */ - ret = SFE_CMN_RESPONSE_EMSG; - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_CREATE_FAILED); - } - - /* - * fall through - */ -failed_ret: - if (src_dev) { - dev_put(src_dev); - } - - if (dest_dev) { - dev_put(dest_dev); - } - - /* - * try to queue response message - */ - ((struct sfe_ipv6_msg *)response->msg)->cm.response = msg->cm.response = ret; - sfe_drv_enqueue_msg(sfe_drv_ctx, response); - - return SFE_TX_SUCCESS; -} - -/* - * sfe_drv_destroy_ipv6_rule_msg() - * convert destroy message format from ecm to sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv6 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_destroy_ipv6_rule_msg(struct sfe_drv_ctx_instance_internal *sfe_drv_ctx, struct sfe_ipv6_msg *msg) -{ - struct sfe_connection_destroy sid; - struct sfe_drv_response_msg *response; - - response = sfe_drv_alloc_response_msg(SFE_DRV_MSG_TYPE_IPV6, msg); - if (!response) { - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_ENQUEUE_FAILED); - return SFE_TX_FAILURE_QUEUE; - } - - sid.protocol = msg->msg.rule_destroy.tuple.protocol; - sfe_drv_ipv6_addr_copy(msg->msg.rule_destroy.tuple.flow_ip, sid.src_ip.ip6); - sfe_drv_ipv6_addr_copy(msg->msg.rule_destroy.tuple.return_ip, sid.dest_ip.ip6); - sid.src_port = msg->msg.rule_destroy.tuple.flow_ident; - sid.dest_port = msg->msg.rule_destroy.tuple.return_ident; - - sfe_ipv6_destroy_rule(&sid); - - /* - * try to queue response message - */ - ((struct sfe_ipv6_msg *)response->msg)->cm.response = msg->cm.response = SFE_CMN_RESPONSE_ACK; - sfe_drv_enqueue_msg(sfe_drv_ctx, response); - - return SFE_TX_SUCCESS; -} - -/* - * sfe_drv_ipv6_tx() - * Transmit an IPv6 message to the sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv6 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -sfe_tx_status_t sfe_drv_ipv6_tx(struct sfe_drv_ctx_instance *sfe_drv_ctx, struct sfe_ipv6_msg *msg) -{ - switch (msg->cm.type) { - case SFE_TX_CREATE_RULE_MSG: - return sfe_drv_create_ipv6_rule_msg(SFE_DRV_CTX_TO_PRIVATE(sfe_drv_ctx), msg); - case SFE_TX_DESTROY_RULE_MSG: - return sfe_drv_destroy_ipv6_rule_msg(SFE_DRV_CTX_TO_PRIVATE(sfe_drv_ctx), msg); - default: - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_IPV6_MSG_UNKNOW); - return SFE_TX_FAILURE_NOT_ENABLED; - } -} -EXPORT_SYMBOL(sfe_drv_ipv6_tx); - -/* - * sfe_ipv6_msg_init() - * Initialize IPv6 message. - */ -void sfe_ipv6_msg_init(struct sfe_ipv6_msg *nim, u16 if_num, u32 type, u32 len, - sfe_ipv6_msg_callback_t cb, void *app_data) -{ - sfe_cmn_msg_init(&nim->cm, if_num, type, len, (void *)cb, app_data); -} -EXPORT_SYMBOL(sfe_ipv6_msg_init); - -/* - * sfe_drv_ipv6_max_conn_count() - * return maximum number of entries SFE supported - */ -int sfe_drv_ipv6_max_conn_count(void) -{ - return SFE_MAX_CONNECTION_NUM; -} -EXPORT_SYMBOL(sfe_drv_ipv6_max_conn_count); - -/* - * sfe_drv_ipv6_notify_register() - * Register a notifier callback for IPv6 messages from sfe driver - * - * @param cb The callback pointer - * @param app_data The application context for this message - * - * @return struct sfe_drv_ctx_instance * The sfe driver context - */ -struct sfe_drv_ctx_instance *sfe_drv_ipv6_notify_register(sfe_ipv6_msg_callback_t cb, void *app_data) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - /* - * Hook the shortcut sync callback. - */ - if (cb && !sfe_drv_ctx->ipv6_stats_sync_cb) { - sfe_ipv6_register_sync_rule_callback(sfe_drv_ipv6_stats_sync_callback); - } - - rcu_assign_pointer(sfe_drv_ctx->ipv6_stats_sync_cb, cb); - sfe_drv_ctx->ipv6_stats_sync_data = app_data; - - spin_unlock_bh(&sfe_drv_ctx->lock); - - return SFE_DRV_CTX_TO_PUBLIC(sfe_drv_ctx); -} -EXPORT_SYMBOL(sfe_drv_ipv6_notify_register); - -/* - * sfe_drv_ipv6_notify_unregister() - * Un-Register a notifier callback for IPv6 messages from sfe driver - */ -void sfe_drv_ipv6_notify_unregister(void) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - /* - * Unregister our sync callback. - */ - if (sfe_drv_ctx->ipv6_stats_sync_cb) { - sfe_ipv6_register_sync_rule_callback(NULL); - rcu_assign_pointer(sfe_drv_ctx->ipv6_stats_sync_cb, NULL); - sfe_drv_ctx->ipv6_stats_sync_data = NULL; - } - spin_unlock_bh(&sfe_drv_ctx->lock); - - sfe_drv_clean_response_msg_by_type(sfe_drv_ctx, SFE_DRV_MSG_TYPE_IPV6); - - return; -} -EXPORT_SYMBOL(sfe_drv_ipv6_notify_unregister); - -/* - * sfe_tun6rd_tx() - * Transmit a tun6rd message to sfe engine - */ -sfe_tx_status_t sfe_tun6rd_tx(struct sfe_drv_ctx_instance *sfe_drv_ctx, struct sfe_tun6rd_msg *msg) -{ - sfe_drv_incr_exceptions(SFE_DRV_EXCEPTION_NOT_SUPPORT_6RD); - return SFE_TX_FAILURE_NOT_ENABLED; -} -EXPORT_SYMBOL(sfe_tun6rd_tx); - -/* - * sfe_tun6rd_msg_init() - * Initialize sfe_tun6rd msg. - */ -void sfe_tun6rd_msg_init(struct sfe_tun6rd_msg *ncm, u16 if_num, u32 type, u32 len, void *cb, void *app_data) -{ - sfe_cmn_msg_init(&ncm->cm, if_num, type, len, cb, app_data); -} -EXPORT_SYMBOL(sfe_tun6rd_msg_init); - -/* - * sfe_drv_recv() - * Handle packet receives. - * - * Returns 1 if the packet is forwarded or 0 if it isn't. - */ -int sfe_drv_recv(struct sk_buff *skb) -{ - struct net_device *dev; - - /* - * We know that for the vast majority of packets we need the transport - * layer header so we may as well start to fetch it now! - */ - prefetch(skb->data + 32); - barrier(); - - dev = skb->dev; - -/* - * TODO: Remove the check when INgress Qdisc is ported to 5.4 kernel. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) -#ifdef CONFIG_NET_CLS_ACT - /* - * If ingress Qdisc configured, and packet not processed by ingress Qdisc yet - * We can not accelerate this packet. - */ - if (dev->ingress_queue && !(skb->tc_verd & TC_NCLS)) { - return 0; - } -#endif -#endif - - /* - * We're only interested in IPv4 and IPv6 packets. - */ - if (likely(htons(ETH_P_IP) == skb->protocol)) { - if (sfe_drv_dev_is_layer_3_interface(dev, true)) { - return sfe_ipv4_recv(dev, skb); - } else { - DEBUG_TRACE("no IPv4 address for device: %s\n", dev->name); - return 0; - } - } - - if (likely(htons(ETH_P_IPV6) == skb->protocol)) { - if (sfe_drv_dev_is_layer_3_interface(dev, false)) { - return sfe_ipv6_recv(dev, skb); - } else { - DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); - return 0; - } - } - - DEBUG_TRACE("not IP packet\n"); - return 0; -} - -/* - * sfe_drv_get_exceptions() - * dump exception counters - */ -static ssize_t sfe_drv_get_exceptions(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - int idx, len; - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - spin_lock_bh(&sfe_drv_ctx->lock); - for (len = 0, idx = 0; idx < SFE_DRV_EXCEPTION_MAX; idx++) { - if (sfe_drv_ctx->exceptions[idx]) { - len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", sfe_drv_exception_events_string[idx], sfe_drv_ctx->exceptions[idx]); - } - } - spin_unlock_bh(&sfe_drv_ctx->lock); - - return len; -} - -/* - * sysfs attributes. - */ -static const struct device_attribute sfe_drv_exceptions_attr = - __ATTR(exceptions, S_IRUGO, sfe_drv_get_exceptions, NULL); - -/* - * sfe_drv_init() - */ -static int __init sfe_drv_init(void) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - int result = -1; - - /* - * Create sys/sfe_drv - */ - sfe_drv_ctx->sys_sfe_drv = kobject_create_and_add("sfe_drv", NULL); - if (!sfe_drv_ctx->sys_sfe_drv) { - DEBUG_ERROR("failed to register sfe_drv\n"); - goto exit1; - } - - /* - * Create sys/sfe_drv/exceptions - */ - result = sysfs_create_file(sfe_drv_ctx->sys_sfe_drv, &sfe_drv_exceptions_attr.attr); - if (result) { - DEBUG_ERROR("failed to register exceptions file: %d\n", result); - goto exit2; - } - - spin_lock_init(&sfe_drv_ctx->lock); - - INIT_LIST_HEAD(&sfe_drv_ctx->msg_queue); - INIT_WORK(&sfe_drv_ctx->work, sfe_drv_process_response_msg); - - /* - * Hook the receive path in the network stack. - */ - BUG_ON(athrs_fast_nat_recv); - RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_drv_recv); - - return 0; -exit2: - kobject_put(sfe_drv_ctx->sys_sfe_drv); -exit1: - return result; -} - -/* - * sfe_drv_exit() - */ -static void __exit sfe_drv_exit(void) -{ - struct sfe_drv_ctx_instance_internal *sfe_drv_ctx = &__sfe_drv_ctx; - - /* - * Unregister our receive callback. - */ - RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); - - /* - * Wait for all callbacks to complete. - */ - rcu_barrier(); - - /* - * Destroy all connections. - */ - sfe_ipv4_destroy_all_rules_for_dev(NULL); - sfe_ipv6_destroy_all_rules_for_dev(NULL); - - /* - * stop work queue, and flush all pending message in queue - */ - cancel_work_sync(&sfe_drv_ctx->work); - sfe_drv_process_response_msg(&sfe_drv_ctx->work); - - /* - * Unregister our sync callback. - */ - sfe_drv_ipv4_notify_unregister(); - sfe_drv_ipv6_notify_unregister(); - - kobject_put(sfe_drv_ctx->sys_sfe_drv); - - return; -} - -module_init(sfe_drv_init) -module_exit(sfe_drv_exit) - -MODULE_AUTHOR("Qualcomm Atheros Inc."); -MODULE_DESCRIPTION("Simulated driver for Shortcut Forwarding Engine"); -MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/shortcut-fe/simulated-driver/sfe_drv.h b/shortcut-fe/simulated-driver/sfe_drv.h deleted file mode 100644 index 729caffd3..000000000 --- a/shortcut-fe/simulated-driver/sfe_drv.h +++ /dev/null @@ -1,553 +0,0 @@ -/* - * sfe_drv.h - * simulated driver headers for shortcut forwarding engine. - * - * Copyright (c) 2015,2016 The Linux Foundation. All rights reserved. - * Permission to use, copy, modify, and/or distribute this software for - * any purpose with or without fee is hereby granted, provided that the - * above copyright notice and this permission notice appear in all copies. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef __SFE_DRV_H -#define __SFE_DRV_H - -#define MAX_VLAN_DEPTH 2 -#define SFE_VLAN_ID_NOT_CONFIGURED 0xfff -#define SFE_MC_IF_MAX 16 - -#define SFE_SPECIAL_INTERFACE_BASE 0x7f00 -#define SFE_SPECIAL_INTERFACE_IPV4 (SFE_SPECIAL_INTERFACE_BASE + 1) -#define SFE_SPECIAL_INTERFACE_IPV6 (SFE_SPECIAL_INTERFACE_BASE + 2) -#define SFE_SPECIAL_INTERFACE_IPSEC (SFE_SPECIAL_INTERFACE_BASE + 3) -#define SFE_SPECIAL_INTERFACE_L2TP (SFE_SPECIAL_INTERFACE_BASE + 4) -#define SFE_SPECIAL_INTERFACE_PPTP (SFE_SPECIAL_INTERFACE_BASE + 5) - -/** - * Rule creation & rule update flags. - */ -#define SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK (1<<0) /**< Do not perform TCP sequence number checks */ -#define SFE_RULE_CREATE_FLAG_BRIDGE_FLOW (1<<1) /**< This is a pure bridge forwarding flow */ -#define SFE_RULE_CREATE_FLAG_ROUTED (1<<2) /**< Rule is for a routed connection */ -#define SFE_RULE_CREATE_FLAG_DSCP_MARKING (1<<3) /**< Rule has for a DSCP marking configured*/ -#define SFE_RULE_CREATE_FLAG_VLAN_MARKING (1<<4) /**< Rule has for a VLAN marking configured*/ -#define SFE_RULE_UPDATE_FLAG_CHANGE_MTU (1<<5) /**< Update MTU of connection interfaces */ -#define SFE_RULE_CREATE_FLAG_ICMP_NO_CME_FLUSH (1<<6)/**< Rule for not flushing CME on ICMP pkt */ -#define SFE_RULE_CREATE_FLAG_L2_ENCAP (1<<7) /**< consists of an encapsulating protocol that carries an IPv4 payload within it. */ -#define SFE_RULE_CREATE_FLAG_MC_JOIN (1<<8) /**< Interface has joined the flow */ -#define SFE_RULE_CREATE_FLAG_MC_LEAVE (1<<9) /**< Interface has left the flow */ -#define SFE_RULE_CREATE_FLAG_MC_UPDATE (1<<10)/**< Multicast Rule update */ -/** - * Rule creation validity flags. - */ -#define SFE_RULE_CREATE_CONN_VALID (1<<0) /**< IPv4 Connection is valid */ -#define SFE_RULE_CREATE_TCP_VALID (1<<1) /**< TCP Protocol fields are valid */ -#define SFE_RULE_CREATE_PPPOE_VALID (1<<2) /**< PPPoE fields are valid */ -#define SFE_RULE_CREATE_QOS_VALID (1<<3) /**< QoS fields are valid */ -#define SFE_RULE_CREATE_VLAN_VALID (1<<4) /**< VLAN fields are valid */ -#define SFE_RULE_CREATE_DSCP_MARKING_VALID (1<<5) /**< DSCP marking fields are valid */ -#define SFE_RULE_CREATE_VLAN_MARKING_VALID (1<<6) /**< VLAN marking fields are valid */ -#define SFE_RULE_CREATE_MC_NAT_VALID (1<<7) /**< Interface is configured with Source-NAT */ -#define SFE_RULE_CREATE_DIRECTION_VALID (1<<8) /**< specify acceleration directions */ - -/* - * 32/64 bits pointer type - */ -#ifdef __LP64__ -typedef uint64_t sfe_ptr_t; -#else -typedef uint32_t sfe_ptr_t; -#endif - -typedef enum sfe_rule_sync_reason { - SFE_RULE_SYNC_REASON_STATS, /* Sync is to synchronize stats */ - SFE_RULE_SYNC_REASON_FLUSH, /* Sync is to flush a entry */ - SFE_RULE_SYNC_REASON_EVICT, /* Sync is to evict a entry */ - SFE_RULE_SYNC_REASON_DESTROY /* Sync is to destroy a entry(requested by connection manager) */ - -} sfe_rule_sync_reason_t; - -/** - * Tx command status - */ -typedef enum { - SFE_TX_SUCCESS = 0, /**< Success */ - SFE_TX_FAILURE, /**< Command failure other than descriptor not available */ - SFE_TX_FAILURE_QUEUE, /**< Command failure due to descriptor not available */ - SFE_TX_FAILURE_NOT_READY, /**< Command failure due to SFE state uninitialized */ - SFE_TX_FAILURE_TOO_LARGE, /**< Command is too large to fit in one message */ - SFE_TX_FAILURE_TOO_SHORT, /**< Command/Packet is shorter than expected size */ - SFE_TX_FAILURE_NOT_SUPPORTED, /**< Command/Packet not accepted for forwarding */ - SFE_TX_FAILURE_BAD_PARAM, /**< Command failure due to bad parameters */ - SFE_TX_FAILURE_NOT_ENABLED, /**< Command failure due to SFE feature is not enabled */ -} sfe_tx_status_t; - -/** - * Common response structure - */ -enum sfe_cmn_response { - SFE_CMN_RESPONSE_ACK, /**< Message Acknowledge */ - SFE_CMN_RESPONSE_EVERSION, /**< Message Version Error */ - SFE_CMN_RESPONSE_EINTERFACE, /**< Message Interface Error */ - SFE_CMN_RESPONSE_ELENGTH, /**< Message Length Error */ - SFE_CMN_RESPONSE_EMSG, /**< Message Error */ - SFE_CMM_RESPONSE_NOTIFY, /**< Message Independant of Request */ - SFE_CMN_RESPONSE_LAST -}; - -/** - * IPv4 bridge/route rule messages - */ -enum sfe_message_types { - SFE_TX_CREATE_RULE_MSG, /**< IPv4/6 create rule message */ - SFE_TX_DESTROY_RULE_MSG, /**< IPv4/6 destroy rule message */ - SFE_RX_CONN_STATS_SYNC_MSG, /**< IPv4/6 connection stats sync message */ - SFE_TX_CREATE_MC_RULE_MSG, /**< IPv4/6 multicast create rule message */ - SFE_TUN6RD_ADD_UPDATE_PEER, /**< Add/update peer for 6rd tunnel */ - SFE_MAX_MSG_TYPES, /**< IPv4/6 message max type number */ -}; - -/** - * Common message structure - */ -struct sfe_cmn_msg { - u16 version; /**< Version id for main message format */ - u16 interface; /**< Primary Key for all messages */ - enum sfe_cmn_response response; /**< Primary response */ - u32 type; /**< Decetralized request #, to be used to match response # */ - u32 error; /**< Decentralized specific error message, response == EMSG */ - sfe_ptr_t cb; /**< Place for callback pointer */ - sfe_ptr_t app_data; /**< Place for app data */ - u32 len; /**< What is the length of the message excluding this header */ -}; - -/** - * Common 5 tuple structure - */ -struct sfe_ipv4_5tuple { - __be32 flow_ip; /**< Flow IP address */ - __be32 return_ip; /**< Return IP address */ - __be16 flow_ident; /**< Flow ident (e.g. TCP/UDP port) */ - __be16 return_ident; /**< Return ident (e.g. TCP/UDP port) */ - u8 protocol; /**< Protocol number */ - u8 reserved[3]; /**< Padded for alignment */ -}; - -/** - * Common 5 tuple structure - */ -struct sfe_ipv6_5tuple { - __be32 flow_ip[4]; /**< Flow IP address */ - __be32 return_ip[4]; /**< Return IP address */ - __be16 flow_ident; /**< Flow ident (e.g. TCP/UDP port) */ - __be16 return_ident; /**< Return ident (e.g. TCP/UDP port) */ - u8 protocol; /**< Protocol number */ - u8 reserved[3]; /**< Padded for alignment */ -}; - -/** - * Connection create structure - */ -struct sfe_ipv4_connection_rule { - u8 flow_mac[6]; /**< Flow MAC address */ - u8 return_mac[6]; /**< Return MAC address */ - s32 flow_interface_num; /**< Flow interface number */ - s32 return_interface_num; /**< Return interface number */ - s32 flow_top_interface_num; /* Top flow interface number */ - s32 return_top_interface_num;/* Top return interface number */ - u32 flow_mtu; /**< Flow interface`s MTU */ - u32 return_mtu; /**< Return interface`s MTU */ - __be32 flow_ip_xlate; /**< Translated flow IP address */ - __be32 return_ip_xlate; /**< Translated return IP address */ - __be16 flow_ident_xlate; /**< Translated flow ident (e.g. port) */ - __be16 return_ident_xlate; /**< Translated return ident (e.g. port) */ -}; - -/** - * Connection create structure - */ -struct sfe_ipv6_connection_rule { - u8 flow_mac[6]; /**< Flow MAC address */ - u8 return_mac[6]; /**< Return MAC address */ - s32 flow_interface_num; /**< Flow interface number */ - s32 return_interface_num; /**< Return interface number */ - s32 flow_top_interface_num; /* Top flow interface number */ - s32 return_top_interface_num;/* Top return interface number */ - u32 flow_mtu; /**< Flow interface's MTU */ - u32 return_mtu; /**< Return interface's MTU */ -}; - -/** - * TCP connection rule structure - */ -struct sfe_protocol_tcp_rule { - u32 flow_max_window; /**< Flow direction's largest seen window */ - u32 return_max_window; /**< Return direction's largest seen window */ - u32 flow_end; /**< Flow direction's largest seen sequence + segment length */ - u32 return_end; /**< Return direction's largest seen sequence + segment length */ - u32 flow_max_end; /**< Flow direction's largest seen ack + max(1, win) */ - u32 return_max_end; /**< Return direction's largest seen ack + max(1, win) */ - u8 flow_window_scale; /**< Flow direction's window scaling factor */ - u8 return_window_scale; /**< Return direction's window scaling factor */ - u16 reserved; /**< Padded for alignment */ -}; - -/** - * PPPoE connection rules structure - */ -struct sfe_pppoe_rule { - u16 flow_pppoe_session_id; /**< Flow direction`s PPPoE session ID. */ - u16 flow_pppoe_remote_mac[3]; /**< Flow direction`s PPPoE Server MAC address */ - u16 return_pppoe_session_id; /**< Return direction's PPPoE session ID. */ - u16 return_pppoe_remote_mac[3]; /**< Return direction's PPPoE Server MAC address */ -}; - -/** - * QoS connection rule structure - */ -struct sfe_qos_rule { - u32 flow_qos_tag; /**< QoS tag associated with this rule for flow direction */ - u32 return_qos_tag; /**< QoS tag associated with this rule for return direction */ -}; - -/** - * DSCP connection rule structure - */ -struct sfe_dscp_rule { - u8 flow_dscp; /**< Egress DSCP value for flow direction */ - u8 return_dscp; /**< Egress DSCP value for return direction */ - u8 reserved[2]; /**< Padded for alignment */ -}; - -/** - * VLAN connection rule structure - */ -struct sfe_vlan_rule { - u32 ingress_vlan_tag; /**< VLAN Tag for the ingress packets */ - u32 egress_vlan_tag; /**< VLAN Tag for egress packets */ -}; - -/** - * Acceleration direction rule structure - * Sometimes we just want to accelerate traffic in one direction but not in another. - */ -struct sfe_acceleration_direction_rule { - u8 flow_accel; /**< Accelerate in flow direction */ - u8 return_accel; /**< Accelerate in return direction */ - u8 reserved[2]; /**< Padded for alignment */ -}; - -/** - * The IPv4 rule create sub-message structure. - */ -struct sfe_ipv4_rule_create_msg { - /* Request */ - u16 valid_flags; /**< Bit flags associated with the validity of parameters */ - u16 rule_flags; /**< Bit flags associated with the rule */ - - struct sfe_ipv4_5tuple tuple; /**< Holds values of the 5 tuple */ - - struct sfe_ipv4_connection_rule conn_rule; /**< Basic connection specific data */ - struct sfe_protocol_tcp_rule tcp_rule; /**< TCP related accleration parameters */ - struct sfe_pppoe_rule pppoe_rule; /**< PPPoE related accleration parameters */ - struct sfe_qos_rule qos_rule; /**< QoS related accleration parameters */ - struct sfe_dscp_rule dscp_rule; /**< DSCP related accleration parameters */ - struct sfe_vlan_rule vlan_primary_rule; /**< Primary VLAN related accleration parameters */ - struct sfe_vlan_rule vlan_secondary_rule; /**< Secondary VLAN related accleration parameters */ -#ifdef CONFIG_XFRM - struct sfe_acceleration_direction_rule direction_rule;/* Direction related accleration parameters*/ -#endif - /* Response */ - u32 index; /**< Slot ID for cache stats to host OS */ -}; - -/** - * The IPv4 rule destroy sub-message structure. - */ -struct sfe_ipv4_rule_destroy_msg { - struct sfe_ipv4_5tuple tuple; /**< Holds values of the 5 tuple */ -}; - -/** - * The SFE IPv4 rule sync structure. - */ -struct sfe_ipv4_conn_sync { - u32 index; /**< Slot ID for cache stats to host OS */ - u8 protocol; /**< Protocol number */ - __be32 flow_ip; /**< Flow IP address */ - __be32 flow_ip_xlate; /**< Translated flow IP address */ - __be16 flow_ident; /**< Flow ident (e.g. port) */ - __be16 flow_ident_xlate; /**< Translated flow ident (e.g. port) */ - u32 flow_max_window; /**< Flow direction's largest seen window */ - u32 flow_end; /**< Flow direction's largest seen sequence + segment length */ - u32 flow_max_end; /**< Flow direction's largest seen ack + max(1, win) */ - u32 flow_rx_packet_count; /**< Flow interface's RX packet count */ - u32 flow_rx_byte_count; /**< Flow interface's RX byte count */ - u32 flow_tx_packet_count; /**< Flow interface's TX packet count */ - u32 flow_tx_byte_count; /**< Flow interface's TX byte count */ - u16 flow_pppoe_session_id; /**< Flow interface`s PPPoE session ID. */ - u16 flow_pppoe_remote_mac[3]; - /**< Flow interface's PPPoE remote server MAC address if there is any */ - __be32 return_ip; /**< Return IP address */ - __be32 return_ip_xlate; /**< Translated return IP address */ - __be16 return_ident; /**< Return ident (e.g. port) */ - __be16 return_ident_xlate; /**< Translated return ident (e.g. port) */ - u32 return_max_window; /**< Return direction's largest seen window */ - u32 return_end; /**< Return direction's largest seen sequence + segment length */ - u32 return_max_end; /**< Return direction's largest seen ack + max(1, win) */ - u32 return_rx_packet_count; - /**< Return interface's RX packet count */ - u32 return_rx_byte_count; /**< Return interface's RX byte count */ - u32 return_tx_packet_count; - /**< Return interface's TX packet count */ - u32 return_tx_byte_count; /**< Return interface's TX byte count */ - u16 return_pppoe_session_id; - /**< Return interface`s PPPoE session ID. */ - u16 return_pppoe_remote_mac[3]; - /**< Return interface's PPPoE remote server MAC address if there is any */ - u32 inc_ticks; /**< Number of ticks since the last sync */ - u32 reason; /**< Reason for the sync */ - - u8 flags; /**< Bit flags associated with the rule */ - u32 qos_tag; /**< QoS Tag */ - u32 cause; /**< Flush Cause */ -}; - -/* - * Message structure to send/receive IPv4 bridge/route commands - */ -struct sfe_ipv4_msg { - struct sfe_cmn_msg cm; /**< Message Header */ - union { - struct sfe_ipv4_rule_create_msg rule_create; /**< Message: rule create */ - struct sfe_ipv4_rule_destroy_msg rule_destroy; /**< Message: rule destroy */ - struct sfe_ipv4_conn_sync conn_stats; /**< Message: connection stats sync */ - } msg; -}; - -/** - * Callback to be called when IPv4 message is received - */ -typedef void (*sfe_ipv4_msg_callback_t)(void *app_data, struct sfe_ipv4_msg *msg); - -/** - * The IPv6 rule create sub-message structure. - */ -struct sfe_ipv6_rule_create_msg { - /* - * Request - */ - u16 valid_flags; /**< Bit flags associated with the validity of parameters */ - u16 rule_flags; /**< Bit flags associated with the rule */ - struct sfe_ipv6_5tuple tuple; /**< Holds values of the 5 tuple */ - struct sfe_ipv6_connection_rule conn_rule; /**< Basic connection specific data */ - struct sfe_protocol_tcp_rule tcp_rule; /**< Protocol related accleration parameters */ - struct sfe_pppoe_rule pppoe_rule; /**< PPPoE related accleration parameters */ - struct sfe_qos_rule qos_rule; /**< QoS related accleration parameters */ - struct sfe_dscp_rule dscp_rule; /**< DSCP related accleration parameters */ - struct sfe_vlan_rule vlan_primary_rule; /**< VLAN related accleration parameters */ - struct sfe_vlan_rule vlan_secondary_rule; /**< VLAN related accleration parameters */ -#ifdef CONFIG_XFRM - struct sfe_acceleration_direction_rule direction_rule;/* Direction related accleration parameters*/ -#endif - /* - * Response - */ - u32 index; /**< Slot ID for cache stats to host OS */ -}; - -/** - * The IPv6 rule destroy sub-message structure. - */ -struct sfe_ipv6_rule_destroy_msg { - struct sfe_ipv6_5tuple tuple; /**< Holds values of the 5 tuple */ -}; - -/** - * The SFE IPv6 rule sync structure. - */ -struct sfe_ipv6_conn_sync { - u32 index; /**< Slot ID for cache stats to host OS */ - u8 protocol; /**< Protocol number */ - __be32 flow_ip[4]; /**< Flow IP address */ - __be16 flow_ident; /**< Flow ident (e.g. port) */ - u32 flow_max_window; /**< Flow direction's largest seen window */ - u32 flow_end; /**< Flow direction's largest seen sequence + segment length */ - u32 flow_max_end; /**< Flow direction's largest seen ack + max(1, win) */ - u32 flow_rx_packet_count; /**< Flow interface's RX packet count */ - u32 flow_rx_byte_count; /**< Flow interface's RX byte count */ - u32 flow_tx_packet_count; /**< Flow interface's TX packet count */ - u32 flow_tx_byte_count; /**< Flow interface's TX byte count */ - u16 flow_pppoe_session_id; /**< Flow interface`s PPPoE session ID. */ - u16 flow_pppoe_remote_mac[3]; - /**< Flow interface's PPPoE remote server MAC address if there is any */ - __be32 return_ip[4]; /**< Return IP address */ - __be16 return_ident; /**< Return ident (e.g. port) */ - u32 return_max_window; /**< Return direction's largest seen window */ - u32 return_end; /**< Return direction's largest seen sequence + segment length */ - u32 return_max_end; /**< Return direction's largest seen ack + max(1, win) */ - u32 return_rx_packet_count; - /**< Return interface's RX packet count */ - u32 return_rx_byte_count; /**< Return interface's RX byte count */ - u32 return_tx_packet_count; - /**< Return interface's TX packet count */ - u32 return_tx_byte_count; /**< Return interface's TX byte count */ - u16 return_pppoe_session_id; - /**< Return interface`s PPPoE session ID. */ - u16 return_pppoe_remote_mac[3]; - /**< Return interface's PPPoE remote server MAC address if there is any */ - u32 inc_ticks; /**< Number of ticks since the last sync */ - u32 reason; /**< Reason for the sync */ - u8 flags; /**< Bit flags associated with the rule */ - u32 qos_tag; /**< QoS Tag */ - u32 cause; /**< Flush cause associated with the rule */ -}; - -/** - * Message structure to send/receive IPv6 bridge/route commands - */ -struct sfe_ipv6_msg { - struct sfe_cmn_msg cm; /**< Message Header */ - union { - struct sfe_ipv6_rule_create_msg rule_create; /**< Message: rule create */ - struct sfe_ipv6_rule_destroy_msg rule_destroy; /**< Message: rule destroy */ - struct sfe_ipv6_conn_sync conn_stats; /**< Message: stats sync */ - } msg; -}; - -/** - * Callback to be called when IPv6 message is received - */ -typedef void (*sfe_ipv6_msg_callback_t)(void *app_data, struct sfe_ipv6_msg *msg); - -/** - * 6rd tunnel peer addr. - */ -struct sfe_tun6rd_set_peer_msg { - __be32 ipv6_address[4]; /* The peer's ipv6 addr*/ - __be32 dest; /* The peer's ipv4 addr*/ -}; - -/** - * Message structure to send/receive 6rd tunnel messages - */ -struct sfe_tun6rd_msg { - struct sfe_cmn_msg cm; /* Message Header */ - union { - struct sfe_tun6rd_set_peer_msg peer; /* Message: add/update peer */ - } msg; -}; - -/* - * sfe driver context instance - */ -struct sfe_drv_ctx_instance { - int not_used; -}; - -/* - * sfe_drv_ipv4_max_conn_count() - * Return the maximum number of IPv4 connections that the sfe acceleration engine supports - * - * @return int The number of connections that can be accelerated by the sfe - */ -int sfe_drv_ipv4_max_conn_count(void); - -/* - * sfe_drv_ipv4_tx() - * Transmit an IPv4 message to the sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv4 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -extern sfe_tx_status_t sfe_drv_ipv4_tx(struct sfe_drv_ctx_instance *sfe_drv_ctx, struct sfe_ipv4_msg *msg); - -/* - * sfe_drv_ipv4_notify_register() - * Register a notifier callback for IPv4 messages from sfe driver - * - * @param cb The callback pointer - * @param app_data The application context for this message - * - * @return struct sfe_drv_ctx_instance * The sfe driver context - */ -extern struct sfe_drv_ctx_instance *sfe_drv_ipv4_notify_register(sfe_ipv4_msg_callback_t cb, void *app_data); - -/* - * sfe_drv_ipv4_notify_unregister() - * Un-Register a notifier callback for IPv4 messages from sfe driver - */ -extern void sfe_drv_ipv4_notify_unregister(void); - -/* - * sfe_ipv4_msg_init() - * IPv4 message init - */ -extern void sfe_ipv4_msg_init(struct sfe_ipv4_msg *nim, u16 if_num, u32 type, u32 len, - sfe_ipv4_msg_callback_t cb, void *app_data); - -/* - * sfe_drv_ipv6_max_conn_count() - * Return the maximum number of IPv6 connections that the sfe acceleration engine supports - * - * @return int The number of connections that can be accelerated by the sfe - */ -int sfe_drv_ipv6_max_conn_count(void); - -/* - * sfe_drv_ipv6_tx() - * Transmit an IPv6 message to the sfe - * - * @param sfe_drv_ctx sfe driver context - * @param msg The IPv6 message - * - * @return sfe_tx_status_t The status of the Tx operation - */ -extern sfe_tx_status_t sfe_drv_ipv6_tx(struct sfe_drv_ctx_instance *sfe_drv_ctx, struct sfe_ipv6_msg *msg); - -/* - * sfe_drv_ipv6_notify_register() - * Register a notifier callback for IPv6 messages from sfe driver - * - * @param cb The callback pointer - * @param app_data The application context for this message - * - * @return struct sfe_drv_ctx_instance * The sfe driver context - */ -extern struct sfe_drv_ctx_instance *sfe_drv_ipv6_notify_register(sfe_ipv6_msg_callback_t cb, void *app_data); - -/* - * sfe_drv_ipv6_notify_unregister() - * Un-Register a notifier callback for IPv6 messages from sfe driver - */ -extern void sfe_drv_ipv6_notify_unregister(void); - -/* - * sfe_ipv6_msg_init() - * IPv6 message init - */ -extern void sfe_ipv6_msg_init(struct sfe_ipv6_msg *nim, u16 if_num, u32 type, u32 len, - sfe_ipv6_msg_callback_t cb, void *app_data); - -/* - * sfe_tun6rd_tx() - * Transmit a tun6rd message to sfe engine - */ -sfe_tx_status_t sfe_tun6rd_tx(struct sfe_drv_ctx_instance *sfe_ctx, struct sfe_tun6rd_msg *msg); - -/* - * sfe_tun6rd_msg_init() - * Initialize sfe_tun6rd msg. - */ -void sfe_tun6rd_msg_init(struct sfe_tun6rd_msg *ncm, u16 if_num, u32 type, u32 len, - void *cb, void *app_data); - -#endif /* __SFE_DRV_H */