/* * sfe_ipv4.c * Shortcut forwarding engine - IPv4 edition. * * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sfe_debug.h" #include "sfe_api.h" #include "sfe.h" #include "sfe_flow_cookie.h" #include "sfe_ipv4.h" #include "sfe_ipv4_udp.h" #include "sfe_ipv4_tcp.h" #include "sfe_ipv4_icmp.h" #include "sfe_pppoe.h" #include "sfe_pppoe_mgr.h" #include "sfe_ipv4_pppoe_br.h" #include "sfe_ipv4_gre.h" #include "sfe_ipv4_tun6rd.h" #include "sfe_ipv4_esp.h" static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = { "UDP_HEADER_INCOMPLETE", "UDP_NO_CONNECTION", "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", "UDP_SMALL_TTL", "UDP_NEEDS_FRAGMENTATION", "TCP_HEADER_INCOMPLETE", "TCP_NO_CONNECTION_SLOW_FLAGS", "TCP_NO_CONNECTION_FAST_FLAGS", "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", "TCP_SMALL_TTL", "TCP_NEEDS_FRAGMENTATION", "TCP_FLAGS", "TCP_SEQ_EXCEEDS_RIGHT_EDGE", "TCP_SMALL_DATA_OFFS", "TCP_BAD_SACK", "TCP_BIG_DATA_OFFS", "TCP_SEQ_BEFORE_LEFT_EDGE", "TCP_ACK_EXCEEDS_RIGHT_EDGE", "TCP_ACK_BEFORE_LEFT_EDGE", "ICMP_HEADER_INCOMPLETE", "ICMP_UNHANDLED_TYPE", "ICMP_IPV4_HEADER_INCOMPLETE", "ICMP_IPV4_NON_V4", "ICMP_IPV4_IP_OPTIONS_INCOMPLETE", "ICMP_IPV4_UDP_HEADER_INCOMPLETE", "ICMP_IPV4_TCP_HEADER_INCOMPLETE", "ICMP_IPV4_UNHANDLED_PROTOCOL", "ICMP_NO_CONNECTION", "ICMP_FLUSHED_CONNECTION", "HEADER_INCOMPLETE", "HEADER_CSUM_BAD", "BAD_TOTAL_LENGTH", "NON_V4", "NON_INITIAL_FRAGMENT", "DATAGRAM_INCOMPLETE", "IP_OPTIONS_INCOMPLETE", "UNHANDLED_PROTOCOL", "NO_HEADROOM", "INVALID_PPPOE_SESSION", "INCORRECT_PPPOE_PARSING", "PPPOE_NOT_SET_IN_CME", "PPPOE_BR_NOT_IN_CME", "INGRESS_VLAN_TAG_MISMATCH", "INVALID_SOURCE_INTERFACE", "TUN6RD_NO_CONNECTION", "TUN6RD_NEEDS_FRAGMENTATION", "TUN6RD_SYNC_ON_FIND", "GRE_HEADER_INCOMPLETE", "GRE_NO_CONNECTION", "GRE_IP_OPTIONS_OR_INITIAL_FRAGMENT", "GRE_SMALL_TTL", "GRE_NEEDS_FRAGMENTATION", "ESP_NO_CONNECTION", "ESP_IP_OPTIONS_OR_INITIAL_FRAGMENT", "ESP_NEEDS_FRAGMENTATION", "ESP_SMALL_TTL" }; static struct sfe_ipv4 __si; struct sfe_ipv4_msg *sfe_ipv4_sync_many_msg; uint32_t sfe_ipv4_sync_max_number; /* * sfe_ipv4_gen_ip_csum() * Generate the IP checksum for an IPv4 header. * * Note that this function assumes that we have only 20 bytes of IP header. */ u16 sfe_ipv4_gen_ip_csum(struct iphdr *iph) { u32 sum; u16 *i = (u16 *)iph; iph->check = 0; /* * Generate the sum. */ sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9]; /* * Fold it to ones-complement form. */ sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); return (u16)sum ^ 0xffff; } /* * sfe_ipv4_get_connection_match_hash() * Generate the hash used in connection match lookups. */ static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol, __be32 src_ip, __be16 src_port, __be32 dest_ip, __be16 dest_port) { u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; } /* * sfe_ipv4_find_connection_match_rcu() * Get the IPv4 flow match info that corresponds to a particular 5-tuple. * * On entry we must be holding the lock that protects the hash table. */ struct sfe_ipv4_connection_match * sfe_ipv4_find_connection_match_rcu(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol, __be32 src_ip, __be16 src_port, __be32 dest_ip, __be16 dest_port) { struct sfe_ipv4_connection_match *cm = NULL; unsigned int conn_match_idx; struct hlist_head *lhead; WARN_ON_ONCE(!rcu_read_lock_held()); conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); lhead = &si->hlist_conn_match_hash_head[conn_match_idx]; hlist_for_each_entry_rcu(cm, lhead, hnode) { if (cm->match_src_port != src_port || cm->match_dest_port != dest_port || cm->match_src_ip != src_ip || cm->match_dest_ip != dest_ip || cm->match_protocol != protocol) { continue; } this_cpu_inc(si->stats_pcpu->connection_match_hash_hits64); break; } return cm; } /* * sfe_ipv4_connection_match_update_summary_stats() * Update the summary stats for a connection match entry. * * Stats are incremented atomically. So use atomic substraction to update summary * stats. */ static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm, u32 *packets, u32 *bytes) { u32 packet_count, byte_count; packet_count = atomic_read(&cm->rx_packet_count); cm->rx_packet_count64 += packet_count; atomic_sub(packet_count, &cm->rx_packet_count); byte_count = atomic_read(&cm->rx_byte_count); cm->rx_byte_count64 += byte_count; atomic_sub(byte_count, &cm->rx_byte_count); *packets = packet_count; *bytes = byte_count; } /* * sfe_ipv4_connection_match_compute_translations() * Compute port and address translations for a connection match entry. */ static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm) { /* * Before we insert the entry look to see if this is tagged as doing address * translations. If it is then work out the adjustment that we need to apply * to the transport checksum. */ if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { /* * Precompute an incremental checksum adjustment so we can * edit packets in this stream very quickly. The algorithm is from RFC1624. */ u16 src_ip_hi = cm->match_src_ip >> 16; u16 src_ip_lo = cm->match_src_ip & 0xffff; u32 xlate_src_ip = ~cm->xlate_src_ip; u16 xlate_src_ip_hi = xlate_src_ip >> 16; u16 xlate_src_ip_lo = xlate_src_ip & 0xffff; u16 xlate_src_port = ~cm->xlate_src_port; u32 adj; /* * When we compute this fold it down to a 16-bit offset * as that way we can avoid having to do a double * folding of the twos-complement result because the * addition of 2 16-bit values cannot cause a double * wrap-around! */ adj = src_ip_hi + src_ip_lo + cm->match_src_port + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port; adj = (adj & 0xffff) + (adj >> 16); adj = (adj & 0xffff) + (adj >> 16); cm->xlate_src_csum_adjustment = (u16)adj; } if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { /* * Precompute an incremental checksum adjustment so we can * edit packets in this stream very quickly. The algorithm is from RFC1624. */ u16 dest_ip_hi = cm->match_dest_ip >> 16; u16 dest_ip_lo = cm->match_dest_ip & 0xffff; u32 xlate_dest_ip = ~cm->xlate_dest_ip; u16 xlate_dest_ip_hi = xlate_dest_ip >> 16; u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff; u16 xlate_dest_port = ~cm->xlate_dest_port; u32 adj; /* * When we compute this fold it down to a 16-bit offset * as that way we can avoid having to do a double * folding of the twos-complement result because the * addition of 2 16-bit values cannot cause a double * wrap-around! */ adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port; adj = (adj & 0xffff) + (adj >> 16); adj = (adj & 0xffff) + (adj >> 16); cm->xlate_dest_csum_adjustment = (u16)adj; } if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { u32 adj = ~cm->match_src_ip + cm->xlate_src_ip; if (adj < cm->xlate_src_ip) { adj++; } adj = (adj & 0xffff) + (adj >> 16); adj = (adj & 0xffff) + (adj >> 16); cm->xlate_src_partial_csum_adjustment = (u16)adj; } if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip; if (adj < cm->xlate_dest_ip) { adj++; } adj = (adj & 0xffff) + (adj >> 16); adj = (adj & 0xffff) + (adj >> 16); cm->xlate_dest_partial_csum_adjustment = (u16)adj; } } /* * sfe_ipv4_update_summary_stats() * Update the summary stats. */ static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si, struct sfe_ipv4_stats *stats) { int i = 0; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(i) { const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i); stats->connection_create_requests64 += s->connection_create_requests64; stats->connection_create_collisions64 += s->connection_create_collisions64; stats->connection_create_failures64 += s->connection_create_failures64; stats->connection_destroy_requests64 += s->connection_destroy_requests64; stats->connection_destroy_misses64 += s->connection_destroy_misses64; stats->connection_match_hash_hits64 += s->connection_match_hash_hits64; stats->connection_match_hash_reorders64 += s->connection_match_hash_reorders64; stats->connection_flushes64 += s->connection_flushes64; stats->packets_dropped64 += s->packets_dropped64; stats->packets_forwarded64 += s->packets_forwarded64; stats->packets_fast_xmited64 += s->packets_fast_xmited64; stats->packets_not_forwarded64 += s->packets_not_forwarded64; stats->pppoe_encap_packets_forwarded64 += s->pppoe_encap_packets_forwarded64; stats->pppoe_decap_packets_forwarded64 += s->pppoe_decap_packets_forwarded64; stats->pppoe_bridge_packets_forwarded64 += s->pppoe_bridge_packets_forwarded64; stats->pppoe_bridge_packets_3tuple_forwarded64 += s->pppoe_bridge_packets_3tuple_forwarded64; } } /* * sfe_ipv4_insert_connection_match() * Insert a connection match into the hash. */ static inline void sfe_ipv4_insert_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) { unsigned int conn_match_idx = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, cm->match_src_ip, cm->match_src_port, cm->match_dest_ip, cm->match_dest_port); lockdep_assert_held(&si->lock); hlist_add_head_rcu(&cm->hnode, &si->hlist_conn_match_hash_head[conn_match_idx]); #ifdef CONFIG_NF_FLOW_COOKIE if (!si->flow_cookie_enable) return; /* * Configure hardware to put a flow cookie in packet of this flow, * then we can accelerate the lookup process when we received this packet. */ for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { flow_cookie_set_func_t func; rcu_read_lock(); func = rcu_dereference(si->flow_cookie_set_func); if (func) { if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) { entry->match = cm; cm->flow_cookie = conn_match_idx; } } rcu_read_unlock(); break; } } #endif } /* * sfe_ipv4_remove_connection_match() * Remove a connection match object from the hash. */ static inline void sfe_ipv4_remove_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) { lockdep_assert_held(&si->lock); #ifdef CONFIG_NF_FLOW_COOKIE if (si->flow_cookie_enable) { /* * Tell hardware that we no longer need a flow cookie in packet of this flow */ unsigned int conn_match_idx; for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; if (cm == entry->match) { flow_cookie_set_func_t func; rcu_read_lock(); func = rcu_dereference(si->flow_cookie_set_func); if (func) { func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, cm->match_dest_ip, cm->match_dest_port, 0); } rcu_read_unlock(); cm->flow_cookie = 0; entry->match = NULL; entry->last_clean_time = jiffies; break; } } } #endif hlist_del_init_rcu(&cm->hnode); } /* * sfe_ipv4_get_connection_hash() * Generate the hash used in connection lookups. */ static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port, __be32 dest_ip, __be16 dest_port) { u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port) ^ dest_port; return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; } /* * sfe_ipv4_find_connection() * Get the IPv4 connection info that corresponds to a particular 5-tuple. * * On entry we must be holding the lock that protects the hash table. */ static inline struct sfe_ipv4_connection *sfe_ipv4_find_connection(struct sfe_ipv4 *si, u32 protocol, __be32 src_ip, __be16 src_port, __be32 dest_ip, __be16 dest_port) { struct sfe_ipv4_connection *c; unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); lockdep_assert_held(&si->lock); c = si->conn_hash[conn_idx]; /* * Will need connection entry for next create/destroy metadata, * So no need to re-order entry for these requests */ while (c) { if ((c->src_port == src_port) && (c->dest_port == dest_port) && (c->src_ip == src_ip) && (c->dest_ip == dest_ip) && (c->protocol == protocol)) { return c; } c = c->next; } return NULL; } /* * sfe_ipv4_insert_connection() * Insert a connection into the hash. * * On entry we must be holding the lock that protects the hash table. */ static void sfe_ipv4_insert_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) { struct sfe_ipv4_connection **hash_head; struct sfe_ipv4_connection *prev_head; unsigned int conn_idx; lockdep_assert_held(&si->lock); /* * Insert entry into the connection hash. */ conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, c->dest_ip, c->dest_port); hash_head = &si->conn_hash[conn_idx]; prev_head = *hash_head; c->prev = NULL; if (prev_head) { prev_head->prev = c; } c->next = prev_head; *hash_head = c; /* * Insert entry into the "all connections" list. */ if (si->all_connections_tail) { c->all_connections_prev = si->all_connections_tail; si->all_connections_tail->all_connections_next = c; } else { c->all_connections_prev = NULL; si->all_connections_head = c; } si->all_connections_tail = c; c->all_connections_next = NULL; si->num_connections++; /* * Insert the connection match objects too. */ sfe_ipv4_insert_connection_match(si, c->original_match); sfe_ipv4_insert_connection_match(si, c->reply_match); } /* * sfe_ipv4_remove_connection() * Remove a sfe_ipv4_connection object from the hash. */ bool sfe_ipv4_remove_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) { lockdep_assert_held(&si->lock); if (c->removed) { DEBUG_ERROR("%px: Connection has been removed already\n", c); return false; } /* * dereference the decap direction top_interface_dev */ if (c->reply_match->top_interface_dev) { dev_put(c->reply_match->top_interface_dev); } /* * Remove the connection match objects. */ sfe_ipv4_remove_connection_match(si, c->reply_match); sfe_ipv4_remove_connection_match(si, c->original_match); /* * Unlink the connection. */ if (c->prev) { c->prev->next = c->next; } else { unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, c->dest_ip, c->dest_port); si->conn_hash[conn_idx] = c->next; } if (c->next) { c->next->prev = c->prev; } /* * Unlink connection from all_connections list */ if (c->all_connections_prev) { c->all_connections_prev->all_connections_next = c->all_connections_next; } else { si->all_connections_head = c->all_connections_next; } if (c->all_connections_next) { c->all_connections_next->all_connections_prev = c->all_connections_prev; } else { si->all_connections_tail = c->all_connections_prev; } /* * If I am the next sync connection, move the sync to my next or head. */ if (unlikely(si->wc_next == c)) { si->wc_next = c->all_connections_next; } c->removed = true; si->num_connections--; return true; } /* * sfe_ipv4_gen_sync_connection() * Sync a connection. * * On entry to this function we expect that the lock for the connection is either * already held (while called from sfe_ipv4_periodic_sync() or isn't required * (while called from sfe_ipv4_flush_connection()) */ static void sfe_ipv4_gen_sync_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, struct sfe_connection_sync *sis, sfe_sync_reason_t reason, u64 now_jiffies) { struct sfe_ipv4_connection_match *original_cm; struct sfe_ipv4_connection_match *reply_cm; u32 packet_count, byte_count; /* * Fill in the update message. */ sis->is_v6 = 0; sis->protocol = c->protocol; sis->src_ip.ip = c->src_ip; sis->src_ip_xlate.ip = c->src_ip_xlate; sis->dest_ip.ip = c->dest_ip; sis->dest_ip_xlate.ip = c->dest_ip_xlate; sis->src_port = c->src_port; sis->src_port_xlate = c->src_port_xlate; sis->dest_port = c->dest_port; sis->dest_port_xlate = c->dest_port_xlate; original_cm = c->original_match; reply_cm = c->reply_match; sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; sis->src_td_end = original_cm->protocol_state.tcp.end; sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; sis->dest_td_end = reply_cm->protocol_state.tcp.end; sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet_count, &byte_count); sis->src_new_packet_count = packet_count; sis->src_new_byte_count = byte_count; sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet_count, &byte_count); sis->dest_new_packet_count = packet_count; sis->dest_new_byte_count = byte_count; sis->src_dev = original_cm->match_dev; sis->src_packet_count = original_cm->rx_packet_count64; sis->src_byte_count = original_cm->rx_byte_count64; sis->dest_dev = reply_cm->match_dev; sis->dest_packet_count = reply_cm->rx_packet_count64; sis->dest_byte_count = reply_cm->rx_byte_count64; sis->reason = reason; /* * Get the time increment since our last sync. */ sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; c->last_sync_jiffies = now_jiffies; } /* * sfe_ipv4_free_connection_rcu() * Called at RCU qs state to free the connection object. */ static void sfe_ipv4_free_connection_rcu(struct rcu_head *head) { struct sfe_ipv4_connection *c; struct udp_sock *up; struct sock *sk; /* * We dont need spin lock as the connection is already removed from link list */ c = container_of(head, struct sfe_ipv4_connection, rcu); BUG_ON(!c->removed); DEBUG_TRACE("%px: connecton has been deleted\n", c); /* * Decrease the refcount taken in function sfe_ipv4_create_rule(), * during call of __udp4_lib_lookup() */ up = c->reply_match->up; if (up) { sk = (struct sock *)up; sock_put(sk); } /* * Release our hold of the source and dest devices and free the memory * for our connection objects. */ dev_put(c->original_dev); dev_put(c->reply_dev); kfree(c->original_match); kfree(c->reply_match); kfree(c); } /* * sfe_ipv4_sync_status() * update a connection status to its connection manager. * * si: the ipv4 context * c: which connection to be notified * reason: what kind of notification: flush, stats or destroy */ void sfe_ipv4_sync_status(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, sfe_sync_reason_t reason) { struct sfe_connection_sync sis; u64 now_jiffies; sfe_sync_rule_callback_t sync_rule_callback; rcu_read_lock(); sync_rule_callback = rcu_dereference(si->sync_rule_callback); rcu_read_unlock(); if (!sync_rule_callback) { return; } /* * Generate a sync message and then sync. */ now_jiffies = get_jiffies_64(); sfe_ipv4_gen_sync_connection(si, c, &sis, reason, now_jiffies); sync_rule_callback(&sis); } /* * sfe_ipv4_flush_connection() * Flush a connection and free all associated resources. * * We need to be called with bottom halves disabled locally as we need to acquire * the connection hash lock and release it again. In general we're actually called * from within a BH and so we're fine, but we're also called when connections are * torn down. */ void sfe_ipv4_flush_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, sfe_sync_reason_t reason) { BUG_ON(!c->removed); this_cpu_inc(si->stats_pcpu->connection_flushes64); sfe_ipv4_sync_status(si, c, reason); /* * Release our hold of the source and dest devices and free the memory * for our connection objects. */ call_rcu(&c->rcu, sfe_ipv4_free_connection_rcu); } /* * sfe_ipv4_service_class_stats_inc() * Increment per cpu per service class stats. */ void sfe_ipv4_service_class_stats_inc(struct sfe_ipv4 *si, uint8_t sid, uint64_t bytes) { struct sfe_ipv4_service_class_stats_db *sc_stats_db = this_cpu_ptr(si->stats_pcpu_psc); struct sfe_ipv4_per_service_class_stats *sc_stats = &sc_stats_db->psc_stats[sid]; write_seqcount_begin(&sc_stats->seq); sc_stats->tx_bytes += bytes; sc_stats->tx_packets++; write_seqcount_end(&sc_stats->seq); } /* * sfe_ipv4_exception_stats_inc() * Increment exception stats. */ void sfe_ipv4_exception_stats_inc(struct sfe_ipv4 *si, enum sfe_ipv4_exception_events reason) { struct sfe_ipv4_stats *stats = this_cpu_ptr(si->stats_pcpu); stats->exception_events64[reason]++; stats->packets_not_forwarded64++; } /* * sfe_ipv4_is_loal_ip() * Returns true if IP is local; returns false otherwise. */ static bool sfe_ipv4_is_local_ip(struct sfe_ipv4 *si, __be32 ip_addr) { struct net_device *dev; dev = ip_dev_find(&init_net, ip_addr); if (dev) { dev_put(dev); return true; } return false; } /* * sfe_ipv4_recv() * Handle packet receives and forwaring. * * Returns 1 if the packet is forwarded or 0 if it isn't. */ int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb, struct sfe_l2_info *l2_info, bool tun_outer) { struct sfe_ipv4 *si = &__si; unsigned int len; unsigned int tot_len; unsigned int frag_off; unsigned int ihl; bool sync_on_find; bool ip_options; struct iphdr *iph; u32 protocol; /* * Check that we have space for an IP header here. */ len = skb->len; if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) { sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE); DEBUG_TRACE("len: %u is too short\n", len); return 0; } /* * Validate ip csum if necessary. If ip_summed is set to CHECKSUM_UNNECESSARY, it is assumed * that the L3 checksum is validated by the Rx interface or the tunnel interface that has * generated the packet. */ iph = (struct iphdr *)skb->data; if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY) && (ip_fast_csum((u8 *)iph, iph->ihl))) { sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_HEADER_CSUM_BAD); DEBUG_TRACE("Bad IPv4 header csum: 0x%x\n", iph->check); return 0; } /* * Check that our "total length" is large enough for an IP header. */ tot_len = ntohs(iph->tot_len); if (unlikely(tot_len < sizeof(struct iphdr))) { sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH); DEBUG_TRACE("tot_len: %u is too short\n", tot_len); return 0; } /* * Is our IP version wrong? */ if (unlikely(iph->version != 4)) { sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_V4); DEBUG_TRACE("IP version: %u\n", iph->version); return 0; } /* * Does our datagram fit inside the skb? */ if (unlikely(tot_len > len)) { DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len); sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE); return 0; } /* * Do we have a non-initial fragment? */ frag_off = ntohs(iph->frag_off); if (unlikely(frag_off & IP_OFFSET)) { sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT); DEBUG_TRACE("non-initial fragment\n"); return 0; } /* * If we have a (first) fragment then mark it to cause any connection to flush. */ sync_on_find = unlikely(frag_off & IP_MF) ? true : false; /* * Do we have any IP options? That's definite a slow path! If we do have IP * options we need to recheck our header size. */ ihl = iph->ihl << 2; ip_options = unlikely(ihl != sizeof(struct iphdr)) ? true : false; if (unlikely(ip_options)) { if (unlikely(len < ihl)) { DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl); sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE); return 0; } sync_on_find = true; } /* * Handle PPPoE bridge packets using 3-tuple acceleration if SFE_PPPOE_BR_ACCEL_MODE_EN_3T */ if (unlikely(sfe_l2_parse_flag_check(l2_info, SFE_L2_PARSE_FLAGS_PPPOE_INGRESS)) && unlikely(sfe_pppoe_get_br_accel_mode() == SFE_PPPOE_BR_ACCEL_MODE_EN_3T)) { struct ethhdr *eth = eth_hdr(skb); if (!sfe_pppoe_mgr_find_session(l2_info->pppoe_session_id, eth->h_source)) { return sfe_ipv4_recv_pppoe_bridge(si, skb, dev, len, iph, ihl, l2_info); } } protocol = iph->protocol; if (IPPROTO_UDP == protocol) { return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, sync_on_find, l2_info, tun_outer); } if (IPPROTO_TCP == protocol) { return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, sync_on_find, l2_info); } if (IPPROTO_ESP == protocol) { return sfe_ipv4_recv_esp(si, skb, dev, len, iph, ihl, sync_on_find, tun_outer); } if (IPPROTO_ICMP == protocol) { return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl); } #ifdef SFE_GRE_TUN_ENABLE if (IPPROTO_GRE == protocol) { return sfe_ipv4_recv_gre(si, skb, dev, len, iph, ihl, sync_on_find, l2_info, tun_outer); } #endif if (IPPROTO_IPV6 == protocol) { return sfe_ipv4_recv_tun6rd(si, skb, dev, len, iph, ihl, sync_on_find, l2_info, true); } sfe_ipv4_exception_stats_inc(si, SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL); DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol); return 0; } static void sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c, struct sfe_ipv4_rule_create_msg *msg) { struct sfe_ipv4_connection_match *orig_cm; struct sfe_ipv4_connection_match *repl_cm; struct sfe_ipv4_tcp_connection_match *orig_tcp; struct sfe_ipv4_tcp_connection_match *repl_tcp; orig_cm = c->original_match; repl_cm = c->reply_match; orig_tcp = &orig_cm->protocol_state.tcp; repl_tcp = &repl_cm->protocol_state.tcp; /* update orig */ if (orig_tcp->max_win < msg->tcp_rule.flow_max_window) { orig_tcp->max_win = msg->tcp_rule.flow_max_window; } if ((s32)(orig_tcp->end - msg->tcp_rule.flow_end) < 0) { orig_tcp->end = msg->tcp_rule.flow_end; } if ((s32)(orig_tcp->max_end - msg->tcp_rule.flow_max_end) < 0) { orig_tcp->max_end = msg->tcp_rule.flow_max_end; } /* update reply */ if (repl_tcp->max_win < msg->tcp_rule.return_max_window) { repl_tcp->max_win = msg->tcp_rule.return_max_window; } if ((s32)(repl_tcp->end - msg->tcp_rule.return_end) < 0) { repl_tcp->end = msg->tcp_rule.return_end; } if ((s32)(repl_tcp->max_end - msg->tcp_rule.return_max_end) < 0) { repl_tcp->max_end = msg->tcp_rule.return_max_end; } /* update match flags */ orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; if (msg->rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; } } static void sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c, struct sfe_ipv4_rule_create_msg *msg) { switch (msg->tuple.protocol) { case IPPROTO_TCP: sfe_ipv4_update_tcp_state(c, msg); break; } } /* * sfe_ipv4_match_entry_set_vlan() */ static void sfe_ipv4_match_entry_set_vlan( struct sfe_ipv4_connection_match *cm, u32 primary_ingress_vlan_tag, u32 primary_egress_vlan_tag, u32 secondary_ingress_vlan_tag, u32 secondary_egress_vlan_tag) { u16 tpid; /* * Prevent stacking header counts when updating. */ cm->ingress_vlan_hdr_cnt = 0; cm->egress_vlan_hdr_cnt = 0; memset(cm->ingress_vlan_hdr, 0, sizeof(cm->ingress_vlan_hdr)); memset(cm->egress_vlan_hdr, 0, sizeof(cm->egress_vlan_hdr)); /* * vlan_hdr[0] corresponds to outer tag * vlan_hdr[1] corresponds to inner tag * Extract the vlan information (tpid and tci) from rule message */ if ((primary_ingress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { tpid = (u16)(primary_ingress_vlan_tag >> 16); cm->ingress_vlan_hdr[0].tpid = ntohs(tpid); cm->ingress_vlan_hdr[0].tci = (u16)primary_ingress_vlan_tag; cm->ingress_vlan_hdr_cnt++; } if ((secondary_ingress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { tpid = (u16)(secondary_ingress_vlan_tag >> 16); cm->ingress_vlan_hdr[1].tpid = ntohs(tpid); cm->ingress_vlan_hdr[1].tci = (u16)secondary_ingress_vlan_tag; cm->ingress_vlan_hdr_cnt++; } if ((primary_egress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { tpid = (u16)(primary_egress_vlan_tag >> 16); cm->egress_vlan_hdr[0].tpid = ntohs(tpid); cm->egress_vlan_hdr[0].tci = (u16)primary_egress_vlan_tag; cm->egress_vlan_hdr_cnt++; } if ((secondary_egress_vlan_tag & VLAN_VID_MASK) != SFE_VLAN_ID_NOT_CONFIGURED) { tpid = (u16)(secondary_egress_vlan_tag >> 16); cm->egress_vlan_hdr[1].tpid = ntohs(tpid); cm->egress_vlan_hdr[1].tci = (u16)secondary_egress_vlan_tag; cm->egress_vlan_hdr_cnt++; } } void sfe_ipv4_update_rule(struct sfe_ipv4_rule_create_msg *msg) { struct sfe_ipv4_connection *c; struct sfe_ipv4 *si = &__si; spin_lock_bh(&si->lock); c = sfe_ipv4_find_connection(si, msg->tuple.protocol, msg->tuple.flow_ip, msg->tuple.flow_ident, msg->tuple.return_ip, msg->tuple.return_ident); if (c != NULL) { sfe_ipv4_update_protocol_state(c, msg); } spin_unlock_bh(&si->lock); } /* * sfe_ipv4_mark_rule_update() * Updates the mark values of match entries. */ void sfe_ipv4_mark_rule_update(struct sfe_connection_mark *mark) { struct sfe_ipv4_connection *c; struct sfe_ipv4 *si = &__si; spin_lock_bh(&si->lock); c = sfe_ipv4_find_connection(si, mark->protocol, mark->src_ip[0], mark->src_port, mark->dest_ip[0], mark->dest_port); if (!c) { spin_unlock_bh(&si->lock); DEBUG_WARN("%px: connection not found for mark update\n", mark); return; } c->original_match->mark = mark->mark; c->reply_match->mark = mark->mark; spin_unlock_bh(&si->lock); DEBUG_TRACE("%px: connection mark updated with %d\n", mark, mark->mark); } EXPORT_SYMBOL(sfe_ipv4_mark_rule_update); /* * sfe_ipv4_xmit_eth_type_check() * Checking if MAC header has to be written. */ static inline bool sfe_ipv4_xmit_eth_type_check(struct net_device *dev, u32 cm_flags) { if (!(dev->flags & IFF_NOARP)) { return true; } /* * For PPPoE, since we are now supporting PPPoE encapsulation, we are writing L2 header. */ if (unlikely(cm_flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP)) { return true; } return false; } /* * sfe_ipv4_service_class_stats_pcpu_get() * Gets one CPU's service class statistics. */ static inline bool sfe_ipv4_service_class_stats_pcpu_get(struct sfe_ipv4_per_service_class_stats *sc_stats, uint64_t *bytes, uint64_t *packets) { uint32_t retries = 0; uint32_t seq; uint64_t bytes_tmp, packets_tmp; do { seq = read_seqcount_begin(&sc_stats->seq); bytes_tmp = sc_stats->tx_bytes; packets_tmp = sc_stats->tx_packets; } while (read_seqcount_retry(&sc_stats->seq, seq) && ++retries < SFE_SERVICE_CLASS_STATS_MAX_RETRY); *bytes += bytes_tmp; *packets += packets_tmp; return retries < SFE_SERVICE_CLASS_STATS_MAX_RETRY; } /* * sfe_ipv4_service_class_stats_get() * Copy the ipv4 statistics for the given service class. */ bool sfe_ipv4_service_class_stats_get(uint8_t sid, uint64_t *bytes, uint64_t *packets) { struct sfe_ipv4 *si = &__si; uint32_t cpu = 0; for_each_possible_cpu(cpu) { struct sfe_ipv4_service_class_stats_db *stats_db = per_cpu_ptr(si->stats_pcpu_psc, cpu); struct sfe_ipv4_per_service_class_stats *sc_stats = &stats_db->psc_stats[sid]; if (!sfe_ipv4_service_class_stats_pcpu_get(sc_stats, bytes, packets)) { return false; } } return true; } /* * sfe_ipv4_create_rule() * Create a forwarding rule. */ int sfe_ipv4_create_rule(struct sfe_ipv4_rule_create_msg *msg) { struct sfe_ipv4 *si = &__si; struct sfe_ipv4_connection *c, *c_old; struct sfe_ipv4_connection_match *original_cm; struct sfe_ipv4_connection_match *reply_cm; struct net_device *dest_dev; struct net_device *src_dev; struct sfe_ipv4_5tuple *tuple = &msg->tuple; s32 flow_interface_num = msg->conn_rule.flow_top_interface_num; s32 return_interface_num = msg->conn_rule.return_top_interface_num; struct net *net; struct sock *sk; unsigned int src_if_idx; u32 flow_sawf_tag; u32 return_sawf_tag; if (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE) { flow_interface_num = msg->conn_rule.flow_interface_num; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE) { return_interface_num = msg->conn_rule.return_interface_num; } src_dev = dev_get_by_index(&init_net, flow_interface_num); if (!src_dev) { DEBUG_WARN("%px: Unable to find src_dev corresponding to %d\n", msg, flow_interface_num); this_cpu_inc(si->stats_pcpu->connection_create_failures64); return -EINVAL; } dest_dev = dev_get_by_index(&init_net, return_interface_num); if (!dest_dev) { DEBUG_WARN("%px: Unable to find dest_dev corresponding to %d\n", msg, return_interface_num); this_cpu_inc(si->stats_pcpu->connection_create_failures64); dev_put(src_dev); return -EINVAL; } if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || (src_dev->reg_state != NETREG_REGISTERED))) { dev_put(src_dev); dev_put(dest_dev); DEBUG_WARN("%px: src_dev=%s and dest_dev=%s are unregistered\n", msg, src_dev->name, dest_dev->name); this_cpu_inc(si->stats_pcpu->connection_create_failures64); return -EINVAL; } /* * Allocate the various connection tracking objects. */ c = (struct sfe_ipv4_connection *)kzalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC); if (unlikely(!c)) { DEBUG_WARN("%px: memory allocation of connection entry failed\n", msg); this_cpu_inc(si->stats_pcpu->connection_create_failures64); dev_put(src_dev); dev_put(dest_dev); return -ENOMEM; } original_cm = (struct sfe_ipv4_connection_match *)kzalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); if (unlikely(!original_cm)) { DEBUG_WARN("%px: memory allocation of connection match entry failed\n", msg); this_cpu_inc(si->stats_pcpu->connection_create_failures64); kfree(c); dev_put(src_dev); dev_put(dest_dev); return -ENOMEM; } reply_cm = (struct sfe_ipv4_connection_match *)kzalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); if (unlikely(!reply_cm)) { DEBUG_WARN("%px: memory allocation of connection match entry failed\n", msg); this_cpu_inc(si->stats_pcpu->connection_create_failures64); kfree(original_cm); kfree(c); dev_put(src_dev); dev_put(dest_dev); return -ENOMEM; } this_cpu_inc(si->stats_pcpu->connection_create_requests64); spin_lock_bh(&si->lock); /* * Check to see if there is already a flow that matches the rule we're * trying to create. If there is then we can't create a new one. */ c_old = sfe_ipv4_find_connection(si, msg->tuple.protocol, msg->tuple.flow_ip, msg->tuple.flow_ident, msg->tuple.return_ip, msg->tuple.return_ident); if (c_old != NULL) { this_cpu_inc(si->stats_pcpu->connection_create_collisions64); /* * If we already have the flow then it's likely that this * request to create the connection rule contains more * up-to-date information. Check and update accordingly. */ sfe_ipv4_update_protocol_state(c, msg); spin_unlock_bh(&si->lock); kfree(reply_cm); kfree(original_cm); kfree(c); dev_put(src_dev); dev_put(dest_dev); DEBUG_TRACE("%px: connection already exists - p:%d\n" " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n", msg, tuple->protocol, src_dev->name, msg->conn_rule.flow_mac, &tuple->flow_ip, ntohs(tuple->flow_ident), dest_dev->name, msg->conn_rule.return_mac, &tuple->return_ip, ntohs(tuple->return_ident)); return -EADDRINUSE; } /* * Fill in the "original" direction connection matching object. * Note that the transmit MAC address is "dest_mac_xlate" because * we always know both ends of a connection by their translated * addresses and not their public addresses. */ original_cm->match_dev = src_dev; original_cm->match_protocol = tuple->protocol; original_cm->match_src_ip = tuple->flow_ip; original_cm->match_src_port = netif_is_vxlan(src_dev) ? 0 : tuple->flow_ident; original_cm->match_dest_ip = tuple->return_ip; original_cm->match_dest_port = tuple->return_ident; original_cm->xlate_src_ip = msg->conn_rule.flow_ip_xlate; original_cm->xlate_src_port = msg->conn_rule.flow_ident_xlate; original_cm->xlate_dest_ip = msg->conn_rule.return_ip_xlate; original_cm->xlate_dest_port = msg->conn_rule.return_ident_xlate; if (tuple->protocol == IPPROTO_GRE) { /* * the PPTP is 4 tuple lookup. * During th rule lookup destination call id from packet * is matched against destination port in cm. */ original_cm->match_src_port = 0; original_cm->xlate_src_port = 0; } original_cm->xmit_dev = dest_dev; original_cm->xmit_dev_mtu = msg->conn_rule.return_mtu; original_cm->connection = c; original_cm->counter_match = reply_cm; /* * UDP Socket is valid only in decap direction. */ RCU_INIT_POINTER(original_cm->up, NULL); if (msg->valid_flags & SFE_RULE_CREATE_MARK_VALID) { original_cm->mark = msg->mark_rule.flow_mark; original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; } if (msg->valid_flags & SFE_RULE_CREATE_QOS_VALID) { original_cm->priority = msg->qos_rule.flow_qos_tag; original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; } if (msg->valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { original_cm->dscp = msg->dscp_rule.flow_dscp << SFE_IPV4_DSCP_SHIFT; original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_BRIDGE_FLOW; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_TRANSMIT_FAST) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT_DEV_ADMISSION; } /* * Mark SAWF metadata if the sawf tag is valid and set. */ original_cm->sawf_valid = false; flow_sawf_tag = SFE_GET_SAWF_TAG(msg->sawf_rule.flow_mark); if (likely(SFE_SAWF_TAG_IS_VALID(flow_sawf_tag))) { original_cm->mark = msg->sawf_rule.flow_mark; original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; original_cm->sawf_valid = true; } /* * Add VLAN rule to original_cm */ if (msg->valid_flags & SFE_RULE_CREATE_VLAN_VALID) { struct sfe_vlan_rule *vlan_primary_rule = &msg->vlan_primary_rule; struct sfe_vlan_rule *vlan_secondary_rule = &msg->vlan_secondary_rule; sfe_ipv4_match_entry_set_vlan(original_cm, vlan_primary_rule->ingress_vlan_tag, vlan_primary_rule->egress_vlan_tag, vlan_secondary_rule->ingress_vlan_tag, vlan_secondary_rule->egress_vlan_tag); if ((msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE) && original_cm->egress_vlan_hdr_cnt > 0) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_INSERT_EGRESS_VLAN_TAG; original_cm->l2_hdr_size += original_cm->egress_vlan_hdr_cnt * VLAN_HLEN; } } if (((IPPROTO_GRE == tuple->protocol) || (IPPROTO_ESP == tuple->protocol)) && !sfe_ipv4_is_local_ip(si, original_cm->match_dest_ip)) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH; } #ifdef CONFIG_NF_FLOW_COOKIE original_cm->flow_cookie = 0; #endif #ifdef CONFIG_XFRM if (msg->valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { original_cm->flow_accel = msg->direction_rule.flow_accel; } else { original_cm->flow_accel = 1; } #endif /* * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan, * bottom interfaces are expected to be disabled in the flow rule and always top interfaces * are used. In such cases, do not use HW csum offload. csum offload is used only when we * are sending directly to the destination interface that supports it. */ if (likely(dest_dev->features & NETIF_F_HW_CSUM) && sfe_dev_has_hw_csum(dest_dev)) { if ((msg->conn_rule.return_top_interface_num == msg->conn_rule.return_interface_num) || (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_RETURN_BOTTOM_INTERFACE)) { /* * Dont enable CSUM offload */ #if 0 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD; #endif } } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_SRC_INTERFACE_CHECK) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_FLOW_SRC_INTERFACE_CHECK_NO_FLUSH) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK_NO_FLUSH; } /* * Adding PPPoE parameters to original and reply entries based on the direction where * PPPoE header is valid in ECM rule. * * If PPPoE is valid in flow direction (from interface is PPPoE), then * original cm will have PPPoE at ingress (strip PPPoE header) * reply cm will have PPPoE at egress (add PPPoE header) * * If PPPoE is valid in return direction (to interface is PPPoE), then * original cm will have PPPoE at egress (add PPPoE header) * reply cm will have PPPoE at ingress (strip PPPoE header) */ if (msg->valid_flags & SFE_RULE_CREATE_PPPOE_DECAP_VALID) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP; original_cm->pppoe_session_id = msg->pppoe_rule.flow_pppoe_session_id; ether_addr_copy(original_cm->pppoe_remote_mac, msg->pppoe_rule.flow_pppoe_remote_mac); reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP; reply_cm->l2_hdr_size += PPPOE_SES_HLEN; reply_cm->pppoe_session_id = msg->pppoe_rule.flow_pppoe_session_id; ether_addr_copy(reply_cm->pppoe_remote_mac, msg->pppoe_rule.flow_pppoe_remote_mac); } if (msg->valid_flags & SFE_RULE_CREATE_PPPOE_ENCAP_VALID) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP; original_cm->l2_hdr_size += PPPOE_SES_HLEN; original_cm->pppoe_session_id = msg->pppoe_rule.return_pppoe_session_id; ether_addr_copy(original_cm->pppoe_remote_mac, msg->pppoe_rule.return_pppoe_remote_mac); reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP; reply_cm->pppoe_session_id = msg->pppoe_rule.return_pppoe_session_id; ether_addr_copy(reply_cm->pppoe_remote_mac, msg->pppoe_rule.return_pppoe_remote_mac); } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_SRC_INTERFACE_CHECK) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_SRC_INTERFACE_CHECK_NO_FLUSH) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_SRC_INTERFACE_CHECK_NO_FLUSH; } /* * For the non-arp interface, we don't write L2 HDR. */ if (sfe_ipv4_xmit_eth_type_check(dest_dev, original_cm->flags)) { /* * Check whether the rule has configured a specific source MAC address to use. * This is needed when virtual L3 interfaces such as br-lan, macvlan, vlan are used during egress */ if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)msg->conn_rule.flow_mac); } else { if ((msg->valid_flags & SFE_RULE_CREATE_SRC_MAC_VALID) && (msg->src_mac_rule.mac_valid_flags & SFE_SRC_MAC_RETURN_VALID)) { ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)msg->src_mac_rule.return_src_mac); } else { ether_addr_copy((u8 *)original_cm->xmit_src_mac, (u8 *)dest_dev->dev_addr); } } ether_addr_copy((u8 *)original_cm->xmit_dest_mac, (u8 *)msg->conn_rule.return_mac); original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; original_cm->l2_hdr_size += ETH_HLEN; /* * If our dev writes Ethernet headers then we can write a really fast * version. */ if (dest_dev->header_ops) { if (dest_dev->header_ops->create == eth_header) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; } } } /* * Fill in the "reply" direction connection matching object. */ reply_cm->match_dev = dest_dev; reply_cm->match_protocol = tuple->protocol; reply_cm->match_src_ip = msg->conn_rule.return_ip_xlate; /* * Keep source port as 0 for VxLAN tunnels. */ if (netif_is_vxlan(src_dev) || netif_is_vxlan(dest_dev)) { reply_cm->match_src_port = 0; } else { reply_cm->match_src_port = msg->conn_rule.return_ident_xlate; } reply_cm->match_dest_ip = msg->conn_rule.flow_ip_xlate; reply_cm->match_dest_port = msg->conn_rule.flow_ident_xlate; reply_cm->xlate_src_ip = tuple->return_ip; reply_cm->xlate_src_port = tuple->return_ident; reply_cm->xlate_dest_ip = tuple->flow_ip; reply_cm->xlate_dest_port = tuple->flow_ident; if (tuple->protocol == IPPROTO_GRE) { /* * the PPTP is 4 tuple lookup. * During th rule lookup destination call id from packet * is matched against destination port in cm. */ reply_cm->match_src_port = 0; reply_cm->xlate_src_port = 0; } reply_cm->xmit_dev = src_dev; reply_cm->xmit_dev_mtu = msg->conn_rule.flow_mtu; reply_cm->connection = c; reply_cm->counter_match = original_cm; if (msg->valid_flags & SFE_RULE_CREATE_MARK_VALID) { reply_cm->mark = msg->mark_rule.return_mark; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; } if (msg->valid_flags & SFE_RULE_CREATE_QOS_VALID) { reply_cm->priority = msg->qos_rule.return_qos_tag; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; } if (msg->valid_flags & SFE_RULE_CREATE_DSCP_MARKING_VALID) { reply_cm->dscp = msg->dscp_rule.return_dscp << SFE_IPV4_DSCP_SHIFT; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_BRIDGE_FLOW; } if (msg->rule_flags & SFE_RULE_CREATE_FLAG_RETURN_TRANSMIT_FAST) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT_DEV_ADMISSION; } if (((IPPROTO_GRE == tuple->protocol) || (IPPROTO_ESP == tuple->protocol)) && !sfe_ipv4_is_local_ip(si, reply_cm->match_dest_ip)) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH; } /* * Mark SAWF metadata in reply match if the sawf tag is valid. */ reply_cm->sawf_valid = false; return_sawf_tag = SFE_GET_SAWF_TAG(msg->sawf_rule.return_mark); if (likely(SFE_SAWF_TAG_IS_VALID(return_sawf_tag))) { reply_cm->mark = msg->sawf_rule.return_mark; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_MARK; reply_cm->sawf_valid = true; } /* * Setup UDP Socket if found to be valid for decap. */ RCU_INIT_POINTER(reply_cm->up, NULL); net = dev_net(reply_cm->match_dev); src_if_idx = src_dev->ifindex; rcu_read_lock(); /* * Look for the associated sock object. * __udp4_lib_lookup() holds a reference for this sock object, * which will be released in sfe_ipv4_free_connection_rcu() */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) sk = __udp4_lib_lookup(net, reply_cm->xlate_src_ip, reply_cm->xlate_src_port, reply_cm->match_dest_ip, reply_cm->match_dest_port, src_if_idx, &udp_table); #else sk = __udp4_lib_lookup(net, reply_cm->xlate_src_ip, reply_cm->xlate_src_port, reply_cm->match_dest_ip, reply_cm->match_dest_port, src_if_idx, 0, &udp_table, NULL); #endif rcu_read_unlock(); /* * We set the UDP sock pointer as valid only for decap direction. */ if (sk && udp_sk(sk)->encap_type) { #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) if (!atomic_add_unless(&sk->sk_refcnt, 1, 0)) { #else if (!refcount_inc_not_zero(&sk->sk_refcnt)) { #endif this_cpu_inc(si->stats_pcpu->connection_create_failures64); spin_unlock_bh(&si->lock); kfree(reply_cm); kfree(original_cm); kfree(c); DEBUG_TRACE("%px: sfe: unable to take reference for socket(%px) p:%d\n" " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n", msg, sk, tuple->protocol, src_dev->name, msg->conn_rule.flow_mac, &tuple->flow_ip, ntohs(tuple->flow_ident), dest_dev->name, msg->conn_rule.return_mac, &tuple->return_ip, ntohs(tuple->return_ident)); dev_put(src_dev); dev_put(dest_dev); return -ESHUTDOWN; } rcu_assign_pointer(reply_cm->up, udp_sk(sk)); DEBUG_INFO("%px: Sock(%px) lookup success with reply_cm direction\n", msg, sk); DEBUG_INFO("%px: SFE connection -\n" " s: %s:%pI4(%pI4):%u(%u)\n" " d: %s:%pI4(%pI4):%u(%u)\n", msg, reply_cm->match_dev->name, &reply_cm->match_src_ip, &reply_cm->xlate_src_ip, ntohs(reply_cm->match_src_port), ntohs(reply_cm->xlate_src_port), reply_cm->xmit_dev->name, &reply_cm->match_dest_ip, &reply_cm->xlate_dest_ip, ntohs(reply_cm->match_dest_port), ntohs(reply_cm->xlate_dest_port)); } /* * Add VLAN rule to reply_cm */ if (msg->valid_flags & SFE_RULE_CREATE_VLAN_VALID) { struct sfe_vlan_rule *vlan_primary_rule = &msg->vlan_primary_rule; struct sfe_vlan_rule *vlan_secondary_rule = &msg->vlan_secondary_rule; sfe_ipv4_match_entry_set_vlan(reply_cm, vlan_primary_rule->egress_vlan_tag, vlan_primary_rule->ingress_vlan_tag, vlan_secondary_rule->egress_vlan_tag, vlan_secondary_rule->ingress_vlan_tag); if ((msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE) && reply_cm->egress_vlan_hdr_cnt > 0) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_INSERT_EGRESS_VLAN_TAG; reply_cm->l2_hdr_size += reply_cm->egress_vlan_hdr_cnt * VLAN_HLEN; } } /* * the net_protocol handler will be used only in decap path * for non passthrough case. */ original_cm->proto = NULL; reply_cm->proto = NULL; original_cm->top_interface_dev = NULL; reply_cm->top_interface_dev = NULL; #ifdef SFE_GRE_TUN_ENABLE if ((IPPROTO_GRE == tuple->protocol) && !(reply_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH)) { rcu_read_lock(); reply_cm->proto = rcu_dereference(inet_protos[IPPROTO_GRE]); rcu_read_unlock(); if (unlikely(!reply_cm->proto)) { this_cpu_inc(si->stats_pcpu->connection_create_failures64); spin_unlock_bh(&si->lock); kfree(reply_cm); kfree(original_cm); kfree(c); dev_put(src_dev); dev_put(dest_dev); DEBUG_WARN("sfe: GRE proto handler is not registered\n"); return -EPERM; } } #endif if (IPPROTO_IPV6 == tuple->protocol) { original_cm->proto = NULL; rcu_read_lock(); reply_cm->proto = rcu_dereference(inet_protos[IPPROTO_IPV6]); rcu_read_unlock(); reply_cm->top_interface_dev = dev_get_by_index(&init_net, msg->conn_rule.return_top_interface_num); if (unlikely(!reply_cm->top_interface_dev)) { DEBUG_WARN("%px: Unable to find top_interface_dev corresponding to %d\n", msg, msg->conn_rule.return_top_interface_num); this_cpu_inc(si->stats_pcpu->connection_create_failures64); spin_unlock_bh(&si->lock); kfree(reply_cm); kfree(original_cm); kfree(c); dev_put(src_dev); dev_put(dest_dev); return -EINVAL; } } if ((IPPROTO_ESP == tuple->protocol) && !(reply_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PASSTHROUGH)) { rcu_read_lock(); reply_cm->proto = rcu_dereference(inet_protos[IPPROTO_ESP]); rcu_read_unlock(); if (unlikely(!reply_cm->proto)) { kfree(reply_cm); kfree(original_cm); kfree(c); dev_put(src_dev); dev_put(dest_dev); DEBUG_WARN("sfe: ESP proto handler is not registered\n"); return -EPERM; } } #ifdef CONFIG_NF_FLOW_COOKIE reply_cm->flow_cookie = 0; #endif #ifdef CONFIG_XFRM if (msg->valid_flags & SFE_RULE_CREATE_DIRECTION_VALID) { reply_cm->flow_accel = msg->direction_rule.return_accel; } else { reply_cm->flow_accel = 1; } #endif /* * If l2_features are disabled and flow uses l2 features such as macvlan/bridge/pppoe/vlan, * bottom interfaces are expected to be disabled in the flow rule and always top interfaces * are used. In such cases, do not use HW csum offload. csum offload is used only when we * are sending directly to the destination interface that supports it. */ if (likely(src_dev->features & NETIF_F_HW_CSUM) && sfe_dev_has_hw_csum(src_dev)) { if ((msg->conn_rule.flow_top_interface_num == msg->conn_rule.flow_interface_num) || (msg->rule_flags & SFE_RULE_CREATE_FLAG_USE_FLOW_BOTTOM_INTERFACE)) { /* * Dont enable CSUM offload */ #if 0 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_CSUM_OFFLOAD; #endif } } /* * For the non-arp interface, we don't write L2 HDR. */ if (sfe_ipv4_xmit_eth_type_check(src_dev, reply_cm->flags)) { /* * Check whether the rule has configured a specific source MAC address to use. * This is needed when virtual L3 interfaces such as br-lan, macvlan, vlan are used during egress */ if (msg->rule_flags & SFE_RULE_CREATE_FLAG_BRIDGE_FLOW) { ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)msg->conn_rule.return_mac); } else { if ((msg->valid_flags & SFE_RULE_CREATE_SRC_MAC_VALID) && (msg->src_mac_rule.mac_valid_flags & SFE_SRC_MAC_FLOW_VALID)) { ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)msg->src_mac_rule.flow_src_mac); } else { ether_addr_copy((u8 *)reply_cm->xmit_src_mac, (u8 *)src_dev->dev_addr); } } ether_addr_copy((u8 *)reply_cm->xmit_dest_mac, (u8 *)msg->conn_rule.flow_mac); reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; reply_cm->l2_hdr_size += ETH_HLEN; /* * If our dev writes Ethernet headers then we can write a really fast * version. */ if (src_dev->header_ops) { if (src_dev->header_ops->create == eth_header) { reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; } } } if ((tuple->return_ip != msg->conn_rule.return_ip_xlate) || (tuple->return_ident != msg->conn_rule.return_ident_xlate)) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; } if ((tuple->flow_ip != msg->conn_rule.flow_ip_xlate) || (tuple->flow_ident != msg->conn_rule.flow_ident_xlate)) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; } /* * Initialize the protocol-specific information that we track. */ switch (tuple->protocol) { case IPPROTO_TCP: original_cm->protocol_state.tcp.win_scale = msg->tcp_rule.flow_window_scale; original_cm->protocol_state.tcp.max_win = msg->tcp_rule.flow_max_window ? msg->tcp_rule.flow_max_window : 1; original_cm->protocol_state.tcp.end = msg->tcp_rule.flow_end; original_cm->protocol_state.tcp.max_end = msg->tcp_rule.flow_max_end; reply_cm->protocol_state.tcp.win_scale = msg->tcp_rule.return_window_scale; reply_cm->protocol_state.tcp.max_win = msg->tcp_rule.return_max_window ? msg->tcp_rule.return_max_window : 1; reply_cm->protocol_state.tcp.end = msg->tcp_rule.return_end; reply_cm->protocol_state.tcp.max_end = msg->tcp_rule.return_max_end; if (msg->rule_flags & SFE_RULE_CREATE_FLAG_NO_SEQ_CHECK) { original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; } break; case IPPROTO_RAW: /* * Set src_port to 0 to avoid hash collision in connection match lookups. */ original_cm->match_src_port = 0; original_cm->xlate_src_port = 0; reply_cm->match_src_port = 0; reply_cm->xlate_src_port = 0; break; } /* * Fill in the ipv4_connection object. */ c->protocol = tuple->protocol; c->src_ip = tuple->flow_ip; c->src_ip_xlate = msg->conn_rule.flow_ip_xlate; c->src_port = tuple->flow_ident; c->src_port_xlate = msg->conn_rule.flow_ident_xlate; c->original_dev = src_dev; c->original_match = original_cm; c->dest_ip = tuple->return_ip; c->dest_ip_xlate = msg->conn_rule.return_ip_xlate; c->dest_port = tuple->return_ident; c->dest_port_xlate = msg->conn_rule.return_ident_xlate; c->reply_dev = dest_dev; c->reply_match = reply_cm; c->debug_read_seq = 0; c->last_sync_jiffies = get_jiffies_64(); c->removed = false; sfe_ipv4_connection_match_compute_translations(original_cm); sfe_ipv4_connection_match_compute_translations(reply_cm); sfe_ipv4_insert_connection(si, c); spin_unlock_bh(&si->lock); /* * We have everything we need! */ DEBUG_INFO("%px: NEW connection - p: %d\n" "original_cm: match_dev=src_dev: %s %d %pM\n" " xmit_dev=dest_dev: %s %d %pM\n" " xmit_src_mac: %pM\n" " xmit_dest_mac: %pM\n" " flags: %x l2_hdr: %u\n" "flow_ip: %pI4:%u\n" "flow_ip_xlate: %pI4:%u\n" "flow_mac: %pM\n" "reply_cm: match_dev=dest_dev: %s %d %pM\n" " xmit_dev=src_dev: %s %d %pM\n" " xmit_src_mac: %pM\n" " xmit_dest_mac: %pM\n" " flags: %x l2_hdr: %u\n" "return_ip: %pI4:%u\n" "return_ip_xlate: %pI4:%u\n" "return_mac: %pM\n" "flags: valid=%x src_mac_valid=%x\n", c, tuple->protocol, original_cm->match_dev->name, original_cm->match_dev->ifindex, original_cm->match_dev->dev_addr, original_cm->xmit_dev->name, original_cm->xmit_dev->ifindex, original_cm->xmit_dev->dev_addr, original_cm->xmit_src_mac, original_cm->xmit_dest_mac, original_cm->flags, original_cm->l2_hdr_size, &tuple->flow_ip, ntohs(tuple->flow_ident), &msg->conn_rule.flow_ip_xlate, ntohs(msg->conn_rule.flow_ident_xlate), msg->conn_rule.flow_mac, reply_cm->match_dev->name, reply_cm->match_dev->ifindex, reply_cm->match_dev->dev_addr, reply_cm->xmit_dev->name, reply_cm->xmit_dev->ifindex, reply_cm->xmit_dev->dev_addr, reply_cm->xmit_src_mac, reply_cm->xmit_dest_mac, reply_cm->flags, reply_cm->l2_hdr_size, &tuple->return_ip, ntohs(tuple->return_ident), &msg->conn_rule.return_ip_xlate, ntohs(msg->conn_rule.return_ident_xlate), msg->conn_rule.return_mac, msg->valid_flags, msg->src_mac_rule.mac_valid_flags); return 0; } /* * sfe_ipv4_destroy_rule() * Destroy a forwarding rule. */ void sfe_ipv4_destroy_rule(struct sfe_ipv4_rule_destroy_msg *msg) { struct sfe_ipv4 *si = &__si; struct sfe_ipv4_connection *c; bool ret; struct sfe_ipv4_5tuple *tuple = &msg->tuple; this_cpu_inc(si->stats_pcpu->connection_destroy_requests64); spin_lock_bh(&si->lock); /* * Check to see if we have a flow that matches the rule we're trying * to destroy. If there isn't then we can't destroy it. */ c = sfe_ipv4_find_connection(si, tuple->protocol, tuple->flow_ip, tuple->flow_ident, tuple->return_ip, tuple->return_ident); if (!c) { spin_unlock_bh(&si->lock); this_cpu_inc(si->stats_pcpu->connection_destroy_misses64); DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n", tuple->protocol, &tuple->flow_ip, ntohs(tuple->flow_ident), &tuple->return_ip, ntohs(tuple->return_ident)); return; } /* * Remove our connection details from the hash tables. */ ret = sfe_ipv4_remove_connection(si, c); spin_unlock_bh(&si->lock); if (ret) { sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); } DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n", tuple->protocol, &tuple->flow_ip, ntohs(tuple->flow_ident), &tuple->return_ip, ntohs(tuple->return_ident)); } /* * sfe_ipv4_sync_invoke() * Schedule many sync stats. */ bool sfe_ipv4_sync_invoke(uint16_t index) { struct sfe_ipv4 *si = &__si; DEBUG_INFO("Request for a sync with index[%d]\n", index); return schedule_delayed_work_on(si->work_cpu, &(si->sync_dwork), 0); } /* * sfe_ipv4_register_sync_rule_callback() * Register a callback for many rule synchronization. */ void sfe_ipv4_register_many_sync_callback(sfe_ipv4_many_sync_callback_t cb) { struct sfe_ipv4 *si = &__si; spin_lock_bh(&si->lock); rcu_assign_pointer(si->many_sync_callback, cb); spin_unlock_bh(&si->lock); } /* * sfe_ipv4_register_sync_rule_callback() * Register a callback for rule synchronization. */ void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) { struct sfe_ipv4 *si = &__si; spin_lock_bh(&si->lock); rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); spin_unlock_bh(&si->lock); } /* * sfe_ipv4_get_debug_dev() */ static ssize_t sfe_ipv4_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf) { struct sfe_ipv4 *si = &__si; ssize_t count; int num; spin_lock_bh(&si->lock); num = si->debug_dev; spin_unlock_bh(&si->lock); count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); return count; } /* * sysfs attributes. */ static const struct device_attribute sfe_ipv4_debug_dev_attr = __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL); /* * sfe_ipv4_destroy_all_rules_for_dev() * Destroy all connections that match a particular device. * * If we pass dev as NULL then this destroys all connections. */ void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev) { struct sfe_ipv4 *si = &__si; struct sfe_ipv4_connection *c; bool ret; another_round: spin_lock_bh(&si->lock); for (c = si->all_connections_head; c; c = c->all_connections_next) { /* * Does this connection relate to the device we are destroying? */ if (!dev || (dev == c->original_dev) || (dev == c->reply_dev)) { break; } } if (c) { ret = sfe_ipv4_remove_connection(si, c); } spin_unlock_bh(&si->lock); if (c) { if (ret) { sfe_ipv4_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); } goto another_round; } } /* * sfe_ipv4_periodic_sync() */ static void sfe_ipv4_periodic_sync(struct work_struct *work) { struct sfe_ipv4 *si = container_of((struct delayed_work *)work, struct sfe_ipv4, sync_dwork); u64 now_jiffies; int quota,count; sfe_ipv4_many_sync_callback_t sync_rule_callback; struct sfe_ipv4_connection *c; struct sfe_ipv4_conn_sync *conn_sync; now_jiffies = get_jiffies_64(); rcu_read_lock(); sync_rule_callback = rcu_dereference(si->many_sync_callback); rcu_read_unlock(); if (!sync_rule_callback) { return; } spin_lock_bh(&si->lock); /* * If we have reached the end of the connection list, walk from * the connection head. */ c = si->wc_next; if (unlikely(!c)) { c = si->all_connections_head; } /* * Get the max number of connections to be put in this sync msg. */ quota = sfe_ipv4_sync_max_number; conn_sync = sfe_ipv4_sync_many_msg->msg.conn_stats_many.conn_sync; /* * Walk the "all connection" list and sync the connection state. */ while (likely(c && quota)) { struct sfe_ipv4_connection_match *cm; struct sfe_ipv4_connection_match *counter_cm; struct sfe_connection_sync sis; cm = c->original_match; counter_cm = c->reply_match; /* * Didn't receive packets in the original direction or reply * direction, move to the next connection. */ if ((!atomic_read(&cm->rx_packet_count)) && !(atomic_read(&counter_cm->rx_packet_count))) { c = c->all_connections_next; continue; } sfe_ipv4_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); sfe_ipv4_stats_convert(conn_sync, &sis); quota--; conn_sync++; c = c->all_connections_next; } /* * At the end of the sync, put the wc_next to the connection we left. */ si->wc_next = c; spin_unlock_bh(&si->lock); count = sfe_ipv4_sync_max_number - quota; /* * Tell ecm sync round done if at the end of all connection * otherwise tell the number in the msg. */ if (c == NULL) { DEBUG_INFO("Synced all connections.\n"); sfe_ipv4_sync_many_msg->msg.conn_stats_many.next = 0; } else { DEBUG_INFO("Some connections left.\n"); sfe_ipv4_sync_many_msg->msg.conn_stats_many.next = count; } DEBUG_INFO("Sync %d connections\n", count); sfe_ipv4_sync_many_msg->msg.conn_stats_many.count = count; sfe_ipv4_sync_many_msg->cm.response = SFE_CMN_RESPONSE_ACK; sync_rule_callback(sfe_ipv4_sync_many_msg); } #define CHAR_DEV_MSG_SIZE 768 /* * sfe_ipv4_debug_dev_read_start() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; si->debug_read_seq++; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_connections_start() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_connections_connection() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { struct sfe_ipv4_connection *c; struct sfe_ipv4_connection_match *original_cm; struct sfe_ipv4_connection_match *reply_cm; int bytes_read; int protocol; struct net_device *src_dev; __be32 src_ip; __be32 src_ip_xlate; __be16 src_port; __be16 src_port_xlate; u64 src_rx_packets; u64 src_rx_bytes; struct net_device *dest_dev; __be32 dest_ip; __be32 dest_ip_xlate; __be16 dest_port; __be16 dest_port_xlate; u64 dest_rx_packets; u64 dest_rx_bytes; u64 last_sync_jiffies; u32 src_mark, dest_mark, src_priority, dest_priority, src_dscp, dest_dscp; bool original_cm_sawf_valid, reply_cm_sawf_valid; u32 flow_service_class, return_service_class; u32 flow_msduq, return_msduq; u32 packet, byte, original_cm_flags; u16 pppoe_session_id; u8 pppoe_remote_mac[ETH_ALEN]; u32 original_fast_xmit, reply_fast_xmit; #ifdef CONFIG_NF_FLOW_COOKIE int src_flow_cookie, dst_flow_cookie; #endif spin_lock_bh(&si->lock); for (c = si->all_connections_head; c; c = c->all_connections_next) { if (c->debug_read_seq < si->debug_read_seq) { c->debug_read_seq = si->debug_read_seq; break; } } /* * If there were no connections then move to the next state. */ if (!c || c->removed) { spin_unlock_bh(&si->lock); ws->state++; return true; } original_cm = c->original_match; reply_cm = c->reply_match; protocol = c->protocol; src_dev = c->original_dev; src_ip = c->src_ip; src_ip_xlate = c->src_ip_xlate; src_port = c->src_port; src_port_xlate = c->src_port_xlate; src_priority = original_cm->priority; src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT; sfe_ipv4_connection_match_update_summary_stats(original_cm, &packet, &byte); sfe_ipv4_connection_match_update_summary_stats(reply_cm, &packet, &byte); src_rx_packets = original_cm->rx_packet_count64; src_rx_bytes = original_cm->rx_byte_count64; src_mark = original_cm->mark; original_fast_xmit = (original_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT); dest_dev = c->reply_dev; dest_ip = c->dest_ip; dest_ip_xlate = c->dest_ip_xlate; dest_port = c->dest_port; dest_port_xlate = c->dest_port_xlate; dest_priority = reply_cm->priority; dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT; dest_rx_packets = reply_cm->rx_packet_count64; dest_rx_bytes = reply_cm->rx_byte_count64; dest_mark = reply_cm->mark; reply_fast_xmit = (reply_cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_FAST_XMIT); last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; original_cm_flags = original_cm->flags; pppoe_session_id = original_cm->pppoe_session_id; ether_addr_copy(pppoe_remote_mac, original_cm->pppoe_remote_mac); original_cm_sawf_valid = original_cm->sawf_valid; reply_cm_sawf_valid = reply_cm->sawf_valid; flow_service_class = SFE_GET_SAWF_SERVICE_CLASS(original_cm->mark); flow_msduq = SFE_GET_SAWF_MSDUQ(original_cm->mark); return_service_class = SFE_GET_SAWF_SERVICE_CLASS(reply_cm->mark); return_msduq = SFE_GET_SAWF_MSDUQ(reply_cm->mark); #ifdef CONFIG_NF_FLOW_COOKIE src_flow_cookie = original_cm->flow_cookie; dst_flow_cookie = reply_cm->flow_cookie; #endif spin_unlock_bh(&si->lock); bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\tname, &src_ip, &src_ip_xlate, ntohs(src_port), ntohs(src_port_xlate), src_priority, src_dscp, src_rx_packets, src_rx_bytes, src_mark, original_fast_xmit ? "Yes" : "No", dest_dev->name, &dest_ip, &dest_ip_xlate, ntohs(dest_port), ntohs(dest_port_xlate), dest_priority, dest_dscp, dest_rx_packets, dest_rx_bytes, dest_mark, reply_fast_xmit ? "Yes" : "No", #ifdef CONFIG_NF_FLOW_COOKIE src_flow_cookie, dst_flow_cookie, #endif last_sync_jiffies); if (original_cm_flags &= (SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_DECAP | SFE_IPV4_CONNECTION_MATCH_FLAG_PPPOE_ENCAP)) { bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "pppoe_session_id=\"%u\" pppoe_server MAC=\"%pM\" ", pppoe_session_id, pppoe_remote_mac); } if (original_cm_sawf_valid) { bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "flow_service_class=\"%d\" flow_msduq = \"0x%x\" ", flow_service_class, flow_msduq); } if (reply_cm_sawf_valid) { bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "return_service_class=\"%d\" return_msduq = \"0x%x\" ", return_service_class, return_msduq); } bytes_read += snprintf(msg + bytes_read, CHAR_DEV_MSG_SIZE, "/>\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; return true; } /* * sfe_ipv4_debug_dev_read_connections_end() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_exceptions_start() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_exceptions_exception() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int i; u64 val = 0; for_each_possible_cpu(i) { const struct sfe_ipv4_stats *s = per_cpu_ptr(si->stats_pcpu, i); val += s->exception_events64[ws->iter_exception]; } if (val) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", sfe_ipv4_exception_events_string[ws->iter_exception], val); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; } ws->iter_exception++; if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) { ws->iter_exception = 0; ws->state++; } return true; } /* * sfe_ipv4_debug_dev_read_exceptions_end() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_stats() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; struct sfe_ipv4_stats stats; unsigned int num_conn; sfe_ipv4_update_summary_stats(si, &stats); spin_lock_bh(&si->lock); num_conn = si->num_connections; spin_unlock_bh(&si->lock); bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", num_conn, stats.packets_dropped64, stats.packets_fast_xmited64, stats.packets_forwarded64, stats.packets_not_forwarded64, stats.connection_create_requests64, stats.connection_create_collisions64, stats.connection_create_failures64, stats.connection_destroy_requests64, stats.connection_destroy_misses64, stats.connection_flushes64, stats.connection_match_hash_hits64, stats.connection_match_hash_reorders64, stats.pppoe_encap_packets_forwarded64, stats.pppoe_decap_packets_forwarded64, stats.pppoe_bridge_packets_forwarded64, stats.pppoe_bridge_packets_3tuple_forwarded64); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * sfe_ipv4_debug_dev_read_end() * Generate part of the XML output. */ static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) { int bytes_read; bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { return false; } *length -= bytes_read; *total_read += bytes_read; ws->state++; return true; } /* * Array of write functions that write various XML elements that correspond to * our XML output state machine. */ static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = { sfe_ipv4_debug_dev_read_start, sfe_ipv4_debug_dev_read_connections_start, sfe_ipv4_debug_dev_read_connections_connection, sfe_ipv4_debug_dev_read_connections_end, sfe_ipv4_debug_dev_read_exceptions_start, sfe_ipv4_debug_dev_read_exceptions_exception, sfe_ipv4_debug_dev_read_exceptions_end, sfe_ipv4_debug_dev_read_stats, sfe_ipv4_debug_dev_read_end, }; /* * sfe_ipv4_debug_dev_read() * Send info to userspace upon read request from user */ static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) { char msg[CHAR_DEV_MSG_SIZE]; int total_read = 0; struct sfe_ipv4_debug_xml_write_state *ws; struct sfe_ipv4 *si = &__si; ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data; while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { continue; } } return total_read; } /* * sfe_ipv4_debug_dev_open() */ static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file) { struct sfe_ipv4_debug_xml_write_state *ws; ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; if (!ws) { ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL); if (!ws) { return -ENOMEM; } ws->state = SFE_IPV4_DEBUG_XML_STATE_START; file->private_data = ws; } return 0; } /* * sfe_ipv4_debug_dev_release() */ static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file) { struct sfe_ipv4_debug_xml_write_state *ws; ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; if (ws) { /* * We've finished with our output so free the write state. */ kfree(ws); file->private_data = NULL; } return 0; } /* * File operations used in the debug char device */ static struct file_operations sfe_ipv4_debug_dev_fops = { .read = sfe_ipv4_debug_dev_read, .open = sfe_ipv4_debug_dev_open, .release = sfe_ipv4_debug_dev_release }; #ifdef CONFIG_NF_FLOW_COOKIE /* * sfe_register_flow_cookie_cb * register a function in SFE to let SFE use this function to configure flow cookie for a flow * * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE * can use this function to configure flow cookie for a flow. * return: 0, success; !=0, fail */ int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb) { struct sfe_ipv4 *si = &__si; BUG_ON(!cb); if (si->flow_cookie_set_func) { return -1; } rcu_assign_pointer(si->flow_cookie_set_func, cb); return 0; } /* * sfe_unregister_flow_cookie_cb * unregister function which is used to configure flow cookie for a flow * * return: 0, success; !=0, fail */ int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb) { struct sfe_ipv4 *si = &__si; RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); return 0; } /* * sfe_ipv4_get_flow_cookie() */ static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev, struct device_attribute *attr, char *buf) { struct sfe_ipv4 *si = &__si; return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); } /* * sfe_ipv4_set_flow_cookie() */ static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct sfe_ipv4 *si = &__si; si->flow_cookie_enable = simple_strtol(buf, NULL, 0); return size; } /* * sysfs attributes. */ static const struct device_attribute sfe_ipv4_flow_cookie_attr = __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie); #endif /*CONFIG_NF_FLOW_COOKIE*/ /* * sfe_ipv4_get_cpu() */ static ssize_t sfe_ipv4_get_cpu(struct device *dev, struct device_attribute *attr, char *buf) { struct sfe_ipv4 *si = &__si; return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->work_cpu); } /* * sfe_ipv4_set_cpu() */ static ssize_t sfe_ipv4_set_cpu(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct sfe_ipv4 *si = &__si; int work_cpu; work_cpu = simple_strtol(buf, NULL, 0); if ((work_cpu >= 0) && (work_cpu <= NR_CPUS)) { si->work_cpu = work_cpu; } else { dev_err(dev, "%s is not in valid range[0,%d]", buf, NR_CPUS); } return size; } /* * sysfs attributes. */ static const struct device_attribute sfe_ipv4_cpu_attr = __ATTR(stats_work_cpu, S_IWUSR | S_IRUGO, sfe_ipv4_get_cpu, sfe_ipv4_set_cpu); /* * sfe_ipv4_conn_match_hash_init() * Initialize conn match hash lists */ static void sfe_ipv4_conn_match_hash_init(struct sfe_ipv4 *si, int len) { struct hlist_head *hash_list = si->hlist_conn_match_hash_head; int i; for (i = 0; i < len; i++) { INIT_HLIST_HEAD(&hash_list[i]); } } #ifdef SFE_PROCESS_LOCAL_OUT /* * sfe_ipv4_local_out() * Called for packets from ip_local_out() - post encapsulation & other packets */ static unsigned int sfe_ipv4_local_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { struct sfe_l2_info l2_info = {0}; DEBUG_TRACE("%px: sfe: sfe_ipv4_local_out hook called.\n", skb); if (likely(skb->skb_iif)) { return sfe_ipv4_recv(skb->dev, skb, &l2_info, true) ? NF_STOLEN : NF_ACCEPT; } return NF_ACCEPT; } /* * struct nf_hook_ops sfe_ipv4_ops_local_out[] * Hooks into netfilter local out packet monitoring points. */ static struct nf_hook_ops sfe_ipv4_ops_local_out[] __read_mostly = { /* * Local out routing hook is used to monitor packets. */ { .hook = sfe_ipv4_local_out, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_FIRST, }, }; #endif /* * sfe_ipv4_init() */ int sfe_ipv4_init(void) { struct sfe_ipv4 *si = &__si; int result = -1; DEBUG_INFO("SFE IPv4 init\n"); sfe_ipv4_conn_match_hash_init(si, ARRAY_SIZE(si->hlist_conn_match_hash_head)); si->stats_pcpu = alloc_percpu_gfp(struct sfe_ipv4_stats, GFP_KERNEL | __GFP_ZERO); if (!si->stats_pcpu) { DEBUG_ERROR("failed to allocate stats memory for sfe_ipv4\n"); goto exit0; } /* * Allocate per cpu per service class memory. */ si->stats_pcpu_psc = alloc_percpu_gfp(struct sfe_ipv4_service_class_stats_db, GFP_KERNEL | __GFP_ZERO); if (!si->stats_pcpu_psc) { DEBUG_ERROR("failed to allocate per cpu per service clas stats memory\n"); goto exit1; } /* * Create sys/sfe_ipv4 */ si->sys_ipv4 = kobject_create_and_add("sfe_ipv4", NULL); if (!si->sys_ipv4) { DEBUG_ERROR("failed to register sfe_ipv4\n"); goto exit2; } /* * Create files, one for each parameter supported by this module. */ result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); if (result) { DEBUG_ERROR("failed to register debug dev file: %d\n", result); goto exit3; } result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); if (result) { DEBUG_ERROR("failed to register debug dev file: %d\n", result); goto exit4; } #ifdef CONFIG_NF_FLOW_COOKIE result = sysfs_create_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); if (result) { DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); goto exit5; } #endif /* CONFIG_NF_FLOW_COOKIE */ #ifdef SFE_PROCESS_LOCAL_OUT #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) result = nf_register_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #else result = nf_register_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #endif if (result < 0) { DEBUG_ERROR("can't register nf local out hook: %d\n", result); goto exit6; } DEBUG_INFO("Register nf local out hook success: %d\n", result); #endif /* * Register our debug char device. */ result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops); if (result < 0) { DEBUG_ERROR("Failed to register chrdev: %d\n", result); goto exit7; } si->debug_dev = result; si->work_cpu = WORK_CPU_UNBOUND; /* * Create a work to handle pull message from ecm. */ INIT_DELAYED_WORK(&(si->sync_dwork), sfe_ipv4_periodic_sync); /* * Allocate a message for stats sync many */ sfe_ipv4_sync_many_msg = kzalloc(PAGE_SIZE, GFP_KERNEL); if(!sfe_ipv4_sync_many_msg) { goto exit8; } sfe_ipv4_msg_init(sfe_ipv4_sync_many_msg, SFE_SPECIAL_INTERFACE_IPV4, SFE_TX_CONN_STATS_SYNC_MANY_MSG, sizeof(struct sfe_ipv4_conn_sync_many_msg), NULL, NULL); sfe_ipv4_sync_max_number = (PAGE_SIZE - sizeof(struct sfe_ipv4_msg)) / sizeof(struct sfe_ipv4_conn_sync); spin_lock_init(&si->lock); return 0; exit8: unregister_chrdev(si->debug_dev, "sfe_ipv4"); exit7: #ifdef SFE_PROCESS_LOCAL_OUT DEBUG_TRACE("sfe: Unregister local out hook\n"); #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) nf_unregister_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #else nf_unregister_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #endif exit6: #endif #ifdef CONFIG_NF_FLOW_COOKIE sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); exit5: #endif /* CONFIG_NF_FLOW_COOKIE */ sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); exit4: sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); exit3: kobject_put(si->sys_ipv4); exit2: free_percpu(si->stats_pcpu_psc); exit1: free_percpu(si->stats_pcpu); exit0: return result; } /* * sfe_ipv4_exit() */ void sfe_ipv4_exit(void) { struct sfe_ipv4 *si = &__si; DEBUG_INFO("SFE IPv4 exit\n"); /* * Destroy all connections. */ sfe_ipv4_destroy_all_rules_for_dev(NULL); cancel_delayed_work_sync(&si->sync_dwork); unregister_chrdev(si->debug_dev, "sfe_ipv4"); #ifdef SFE_PROCESS_LOCAL_OUT DEBUG_TRACE("sfe: Unregister local out hook\n"); #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)) nf_unregister_hooks(sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #else nf_unregister_net_hooks(&init_net, sfe_ipv4_ops_local_out, ARRAY_SIZE(sfe_ipv4_ops_local_out)); #endif #endif #ifdef CONFIG_NF_FLOW_COOKIE sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_flow_cookie_attr.attr); #endif /* CONFIG_NF_FLOW_COOKIE */ sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_debug_dev_attr.attr); sysfs_remove_file(si->sys_ipv4, &sfe_ipv4_cpu_attr.attr); kobject_put(si->sys_ipv4); free_percpu(si->stats_pcpu); free_percpu(si->stats_pcpu_psc); } #ifdef CONFIG_NF_FLOW_COOKIE EXPORT_SYMBOL(sfe_register_flow_cookie_cb); EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb); #endif