From 0b03f823c2bd08964e4f2ea3802388bc787834b0 Mon Sep 17 00:00:00 2001 From: suyuan168 <175338101@qq.com> Date: Fri, 29 Apr 2022 16:13:26 +0800 Subject: [PATCH] fix 60xx --- fast-classifier/Makefile | 92 + fast-classifier/src/Makefile | 10 + fast-classifier/src/fast-classifier.c | 1976 +++++++++ fast-classifier/src/fast-classifier.h | 57 + fast-classifier/src/nl_classifier_test.c | 281 ++ fast-classifier/src/sfe.h | 114 + fast-classifier/src/sfe_backport.h | 195 + fast-classifier/src/sfe_cm.h | 259 ++ fast-classifier/src/userspace_example.c | 232 ++ golang-protobuf/Makefile | 2 +- iproute2/Makefile | 55 +- iproute2/patches/170-ip_tiny.patch | 110 - .../patches/195-build_variant_ip_tc.patch | 22 - lcd4linux/Config.in | 490 +++ lcd4linux/Makefile | 306 ++ lcd4linux/files/lcd4linux.init | 15 + .../120-remove-as-needed-linker-option.patch | 11 + lcd4linux/patches/140-no_repnop_T6963.patch | 22 + lcd4linux/patches/150-addlibmpdclient.patch | 2624 ++++++++++++ lcd4linux/patches/160-uinput_defs.patch | 24 + .../170-add-generic-spidev-driver.patch | 195 + .../patches/173-glcd2usb-bigendian-fix.patch | 20 + .../root/etc/init.d/openmptcprouter | 4 +- modemmanager/Makefile | 4 +- modemmanager/files/modemmanager.common | 43 +- modemmanager/files/modemmanager.init | 4 - modemmanager/files/modemmanager.proto | 21 +- .../files/usr/sbin/ModemManager-wrapper | 0 netifd/Makefile | 6 +- openmptcprouter-full/Makefile | 5 +- .../files/etc/uci-defaults/2060-omr-system | 4 +- shadowsocks-v2ray-plugin/Makefile | 2 +- shortcut-fe/Makefile | 77 + shortcut-fe/files/usr/bin/sfe_dump | 35 + shortcut-fe/src/Kconfig | 15 + shortcut-fe/src/Makefile | 23 + shortcut-fe/src/sfe.h | 114 + shortcut-fe/src/sfe_backport.h | 195 + shortcut-fe/src/sfe_cm.c | 1146 ++++++ shortcut-fe/src/sfe_cm.h | 259 ++ shortcut-fe/src/sfe_ipv4.c | 3621 ++++++++++++++++ shortcut-fe/src/sfe_ipv6.c | 3628 +++++++++++++++++ 42 files changed, 16095 insertions(+), 223 deletions(-) create mode 100755 fast-classifier/Makefile create mode 100755 fast-classifier/src/Makefile create mode 100755 fast-classifier/src/fast-classifier.c create mode 100755 fast-classifier/src/fast-classifier.h create mode 100755 fast-classifier/src/nl_classifier_test.c create mode 100755 fast-classifier/src/sfe.h create mode 100755 fast-classifier/src/sfe_backport.h create mode 100755 fast-classifier/src/sfe_cm.h create mode 100755 fast-classifier/src/userspace_example.c delete mode 100644 iproute2/patches/170-ip_tiny.patch delete mode 100644 iproute2/patches/195-build_variant_ip_tc.patch create mode 100755 lcd4linux/Config.in create mode 100755 lcd4linux/Makefile create mode 100755 lcd4linux/files/lcd4linux.init create mode 100755 lcd4linux/patches/120-remove-as-needed-linker-option.patch create mode 100755 lcd4linux/patches/140-no_repnop_T6963.patch create mode 100755 lcd4linux/patches/150-addlibmpdclient.patch create mode 100755 lcd4linux/patches/160-uinput_defs.patch create mode 100755 lcd4linux/patches/170-add-generic-spidev-driver.patch create mode 100755 lcd4linux/patches/173-glcd2usb-bigendian-fix.patch mode change 100755 => 100644 modemmanager/files/modemmanager.init mode change 100644 => 100755 modemmanager/files/usr/sbin/ModemManager-wrapper create mode 100755 shortcut-fe/Makefile create mode 100755 shortcut-fe/files/usr/bin/sfe_dump create mode 100755 shortcut-fe/src/Kconfig create mode 100755 shortcut-fe/src/Makefile create mode 100755 shortcut-fe/src/sfe.h create mode 100755 shortcut-fe/src/sfe_backport.h create mode 100755 shortcut-fe/src/sfe_cm.c create mode 100755 shortcut-fe/src/sfe_cm.h create mode 100755 shortcut-fe/src/sfe_ipv4.c create mode 100755 shortcut-fe/src/sfe_ipv6.c diff --git a/fast-classifier/Makefile b/fast-classifier/Makefile new file mode 100755 index 000000000..29d024e7b --- /dev/null +++ b/fast-classifier/Makefile @@ -0,0 +1,92 @@ +include $(TOPDIR)/rules.mk +include $(INCLUDE_DIR)/kernel.mk + +PKG_NAME:=fast-classifier +PKG_RELEASE:=1 +PKG_CONFIG_DEPENDS := CONFIG_IPV6 + +include $(INCLUDE_DIR)/package.mk + +define KernelPackage/$(PKG_NAME)/Default + SECTION:=kernel + CATEGORY:=Kernel modules + SUBMENU:=Network Support + DEPENDS:=+kmod-ipt-conntrack +kmod-shortcut-fe + TITLE:=Kernel driver for FAST Classifier + FILES:=$(PKG_BUILD_DIR)/fast-classifier.ko + KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y CONFIG_NF_CONNTRACK_MARK=y + PROVIDES:=$(PKG_NAME) +endef + +define KernelPackage/$(PKG_NAME) + $(call KernelPackage/$(PKG_NAME)/Default) +endef + +define KernelPackage/$(PKG_NAME)-noload + $(call KernelPackage/$(PKG_NAME)/Default) +endef + +define KernelPackage/$(PKG_NAME)/Default/description +FAST Classifier talks to SFE to make decisions about offloading connections +endef + +define KernelPackage/$(PKG_NAME)/description +$(call KernelPackage/$(PKG_NAME)/Default/description) +endef + +define KernelPackage/$(PKG_NAME)-noload/description +$(call KernelPackage/$(PKG_NAME)/Default/description) + +This package does not load $(PKG_NAME) at boot by default +endef + +define Package/fast-classifier-example + TITLE:=Example user space program for fast-classifier + DEPENDS:=+libnl +kmod-fast-classifier +endef + +define Package/fast-classifier-example/description +Example user space program that communicates with fast +classifier kernel module +endef + +SFE_MAKE_OPTS:=SFE_SUPPORT_IPV6=$(if $(CONFIG_IPV6),y,n) + +define Build/Compile/kmod + +$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" $(SFE_MAKE_OPTS) \ + $(KERNEL_MAKE_FLAGS) \ + $(PKG_MAKE_FLAGS) \ + M="$(PKG_BUILD_DIR)" \ + CONFIG_FAST_CLASSIFIER=m \ + EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \ + modules +endef + +define Build/Compile/example + $(TARGET_CC) -o $(PKG_BUILD_DIR)/userspace_fast_classifier \ + -I $(PKG_BUILD_DIR) \ + -I$(STAGING_DIR)/usr/include/libnl \ + -I$(STAGING_DIR)/usr/include/libnl3 \ + -lnl-genl-3 -lnl-3 \ + $(PKG_BUILD_DIR)/nl_classifier_test.c +endef + +define Build/Compile + $(Build/Compile/kmod) + $(if $(CONFIG_PACKAGE_fast-classifier-example),$(Build/Compile/example)) +endef + +define Build/InstallDev + $(INSTALL_DIR) $(1)/usr/include + $(CP) $(PKG_BUILD_DIR)/fast-classifier.h $(1)/usr/include/ +endef + + +define Package/fast-classifier-example/install + $(INSTALL_DIR) $(1)/sbin + $(CP) $(PKG_BUILD_DIR)/userspace_fast_classifier $(1)/sbin/ +endef + +$(eval $(call KernelPackage,$(PKG_NAME))) +$(eval $(call KernelPackage,$(PKG_NAME)-noload)) +#$(eval $(call BuildPackage,fast-classifier-example)) diff --git a/fast-classifier/src/Makefile b/fast-classifier/src/Makefile new file mode 100755 index 000000000..58dd06e01 --- /dev/null +++ b/fast-classifier/src/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_FAST_CLASSIFIER) += fast-classifier.o + +ifeq ($(SFE_SUPPORT_IPV6),) +SFE_SUPPORT_IPV6=y +endif +ccflags-$(SFE_SUPPORT_IPV6) += -DSFE_SUPPORT_IPV6 + +ccflags-y += -I$(obj)/../shortcut-fe + +obj ?= . diff --git a/fast-classifier/src/fast-classifier.c b/fast-classifier/src/fast-classifier.c new file mode 100755 index 000000000..7ca5d973b --- /dev/null +++ b/fast-classifier/src/fast-classifier.c @@ -0,0 +1,1976 @@ +/* + * fast-classifier.c + * Shortcut forwarding engine connection manager. + * fast-classifier + * + * Copyright (c) 2013-2018 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sfe_backport.h" +#include "sfe.h" +#include "sfe_cm.h" +#include "fast-classifier.h" + +typedef enum fast_classifier_exception { + FAST_CL_EXCEPTION_PACKET_BROADCAST, + FAST_CL_EXCEPTION_PACKET_MULTICAST, + FAST_CL_EXCEPTION_NO_IIF, + FAST_CL_EXCEPTION_NO_CT, + FAST_CL_EXCEPTION_CT_NO_TRACK, + FAST_CL_EXCEPTION_CT_NO_CONFIRM, + FAST_CL_EXCEPTION_CT_IS_ALG, + FAST_CL_EXCEPTION_IS_IPV4_MCAST, + FAST_CL_EXCEPTION_IS_IPV6_MCAST, + FAST_CL_EXCEPTION_TCP_NOT_ASSURED, + FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED, + FAST_CL_EXCEPTION_UNKNOW_PROTOCOL, + FAST_CL_EXCEPTION_NO_SRC_DEV, + FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV, + FAST_CL_EXCEPTION_NO_DEST_DEV, + FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV, + FAST_CL_EXCEPTION_NO_BRIDGE, + FAST_CL_EXCEPTION_LOCAL_OUT, + FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION, + FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL, + FAST_CL_EXCEPTION_CT_DESTROY_MISS, + FAST_CL_EXCEPTION_MAX +} fast_classifier_exception_t; + +static char *fast_classifier_exception_events_string[FAST_CL_EXCEPTION_MAX] = { + "PACKET_BROADCAST", + "PACKET_MULTICAST", + "NO_IIF", + "NO_CT", + "CT_NO_TRACK", + "CT_NO_CONFIRM", + "CT_IS_ALG", + "IS_IPV4_MCAST", + "IS_IPV6_MCAST", + "TCP_NOT_ASSURED", + "TCP_NOT_ESTABLISHED", + "UNKNOW_PROTOCOL", + "NO_SRC_DEV", + "NO_SRC_XLATE_DEV", + "NO_DEST_DEV", + "NO_DEST_XLATE_DEV", + "NO_BRIDGE", + "LOCAL_OUT", + "WAIT_FOR_ACCELERATION", + "UPDATE_PROTOCOL_FAIL", + "CT_DESTROY_MISS", +}; + +/* + * Per-module structure. + */ +struct fast_classifier { + spinlock_t lock; /* Lock for SMP correctness */ + + /* + * Control state. + */ + struct kobject *sys_fast_classifier; /* sysfs linkage */ + + /* + * Callback notifiers. + */ + struct notifier_block dev_notifier; /* Device notifier */ + struct notifier_block inet_notifier; /* IPv4 notifier */ + struct notifier_block inet6_notifier; /* IPv6 notifier */ + u32 exceptions[FAST_CL_EXCEPTION_MAX]; +}; + +static struct fast_classifier __sc; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)) +static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { + [FAST_CLASSIFIER_A_TUPLE] = { + .type = NLA_UNSPEC, + .len = sizeof(struct fast_classifier_tuple) + }, +}; +#endif /*KERNEL_VERSION(5, 2, 0)*/ + +static struct genl_multicast_group fast_classifier_genl_mcgrp[] = { + { + .name = FAST_CLASSIFIER_GENL_MCGRP, + }, +}; + +static int fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info); +static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, struct netlink_callback *cb); + +static struct genl_ops fast_classifier_gnl_ops[] = { + { + .cmd = FAST_CLASSIFIER_C_OFFLOAD, + .flags = 0, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)) + .policy = fast_classifier_genl_policy, +#endif /*KERNEL_VERSION(5, 2, 0)*/ + .doit = fast_classifier_offload_genl_msg, + .dumpit = NULL, + }, + { + .cmd = FAST_CLASSIFIER_C_OFFLOADED, + .flags = 0, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)) + .policy = fast_classifier_genl_policy, +#endif /*KERNEL_VERSION(5, 2, 0)*/ + .doit = NULL, + .dumpit = fast_classifier_nl_genl_msg_DUMP, + }, + { + .cmd = FAST_CLASSIFIER_C_DONE, + .flags = 0, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)) + .policy = fast_classifier_genl_policy, +#endif /*KERNEL_VERSION(5, 2, 0)*/ + .doit = NULL, + .dumpit = fast_classifier_nl_genl_msg_DUMP, + }, +}; + +static struct genl_family fast_classifier_gnl_family = { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + .id = GENL_ID_GENERATE, +#endif /*KERNEL_VERSION(4, 10, 0)*/ + .hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE, + .name = FAST_CLASSIFIER_GENL_NAME, + .version = FAST_CLASSIFIER_GENL_VERSION, + .maxattr = FAST_CLASSIFIER_A_MAX, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + .ops = fast_classifier_gnl_ops, + .n_ops = ARRAY_SIZE(fast_classifier_gnl_ops), + .mcgrps = fast_classifier_genl_mcgrp, + .n_mcgrps = ARRAY_SIZE(fast_classifier_genl_mcgrp), +#endif /*KERNEL_VERSION(4, 10, 0)*/ +}; + +static atomic_t offload_msgs = ATOMIC_INIT(0); +static atomic_t offload_no_match_msgs = ATOMIC_INIT(0); +static atomic_t offloaded_msgs = ATOMIC_INIT(0); +static atomic_t done_msgs = ATOMIC_INIT(0); + +static atomic_t offloaded_fail_msgs = ATOMIC_INIT(0); +static atomic_t done_fail_msgs = ATOMIC_INIT(0); + +/* + * Accelerate incoming packets destined for bridge device + * If a incoming packet is ultimatly destined for + * a bridge device we will first see the packet coming + * from the phyiscal device, we can skip straight to + * processing the packet like it came from the bridge + * for some more performance gains + * + * This only works when the hook is above the bridge. We + * only implement ingress for now, because for egress we + * want to have the bridge devices qdiscs be used. + */ +static bool skip_to_bridge_ingress; + +/* + * fast_classifier_incr_exceptions() + * increase an exception counter. + */ +static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t except) +{ + struct fast_classifier *sc = &__sc; + + spin_lock_bh(&sc->lock); + sc->exceptions[except]++; + spin_unlock_bh(&sc->lock); +} + +/* + * fast_classifier_recv() + * Handle packet receives. + * + * Returns 1 if the packet is forwarded or 0 if it isn't. + */ +int fast_classifier_recv(struct sk_buff *skb) +{ + struct net_device *dev; + struct net_device *master_dev = NULL; + int ret = 0; + + /* + * We know that for the vast majority of packets we need the transport + * layer header so we may as well start to fetch it now! + */ + prefetch(skb->data + 32); + barrier(); + + dev = skb->dev; + + /* + * Process packet like it arrived on the bridge device + */ + if (skip_to_bridge_ingress && + (dev->priv_flags & IFF_BRIDGE_PORT)) { + master_dev = sfe_dev_get_master(dev); + if (!master_dev) { + DEBUG_WARN("master dev is NULL %s\n", dev->name); + goto rx_exit; + } + dev = master_dev; + } + + /* + * We're only interested in IPv4 and IPv6 packets. + */ + if (likely(htons(ETH_P_IP) == skb->protocol)) { + struct in_device *in_dev; + + /* + * Does our input device support IP processing? + */ + in_dev = (struct in_device *)dev->ip_ptr; + if (unlikely(!in_dev)) { + DEBUG_TRACE("no IP processing for device: %s\n", dev->name); + goto rx_exit; + } + + /* + * Does it have an IP address? If it doesn't then we can't do anything + * interesting here! + */ + if (unlikely(!in_dev->ifa_list)) { + DEBUG_TRACE("no IP address for device: %s\n", dev->name); + goto rx_exit; + } + + ret = sfe_ipv4_recv(dev, skb); + + } else if (likely(htons(ETH_P_IPV6) == skb->protocol)) { + struct inet6_dev *in_dev; + + /* + * Does our input device support IPv6 processing? + */ + in_dev = (struct inet6_dev *)dev->ip6_ptr; + if (unlikely(!in_dev)) { + DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); + goto rx_exit; + } + + /* + * Does it have an IPv6 address? If it doesn't then we can't do anything + * interesting here! + */ + if (unlikely(list_empty(&in_dev->addr_list))) { + DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); + goto rx_exit; + } + + ret = sfe_ipv6_recv(dev, skb); + + } else { + DEBUG_TRACE("not IP packet\n"); + } + +rx_exit: + if (master_dev) { + dev_put(master_dev); + } + + return ret; +} + +/* + * fast_classifier_find_dev_and_mac_addr() + * Find the device and MAC address for a given IPv4 address. + * + * Returns true if we find the device and MAC address, otherwise false. + * + * We look up the rtable entry for the address and, from its neighbour + * structure, obtain the hardware address. This means this function also + * works if the neighbours are routers too. + */ +static bool fast_classifier_find_dev_and_mac_addr(struct sk_buff *skb, sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4) +{ + struct neighbour *neigh; + struct rtable *rt; + struct rt6_info *rt6; + struct dst_entry *dst; + struct net_device *mac_dev; + + /* + * If we have skb provided, use it as the original code is unable + * to lookup routes that are policy routed. + */ + if (unlikely(skb)) { + dst = skb_dst(skb); + goto skip_dst_lookup; + } + + /* + * Look up the rtable entry for the IP address then get the hardware + * address from its neighbour structure. This means this works when the + * neighbours are routers too. + */ + if (likely(is_v4)) { + rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); + if (unlikely(IS_ERR(rt))) { + goto ret_fail; + } + + dst = (struct dst_entry *)rt; + } +#ifdef SFE_SUPPORT_IPV6 + else { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)) + rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0); +#else + rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); +#endif /*KERNEL_VERSION(4, 17, 0)*/ + if (!rt6) { + goto ret_fail; + } + + dst = (struct dst_entry *)rt6; + } +#endif + +skip_dst_lookup: + rcu_read_lock(); + neigh = sfe_dst_get_neighbour(dst, addr); + if (unlikely(!neigh)) { + rcu_read_unlock(); + if (likely(!skb)) + dst_release(dst); + + goto ret_fail; + } + + if (unlikely(!(neigh->nud_state & NUD_VALID))) { + rcu_read_unlock(); + neigh_release(neigh); + if (likely(!skb)) + dst_release(dst); + + goto ret_fail; + } + + mac_dev = neigh->dev; + if (!mac_dev) { + rcu_read_unlock(); + neigh_release(neigh); + if (likely(!skb)) + dst_release(dst); + + goto ret_fail; + } + + memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); + + dev_hold(mac_dev); + *dev = mac_dev; + rcu_read_unlock(); + neigh_release(neigh); + if (likely(!skb)) + dst_release(dst); + + return true; + +ret_fail: + if (is_v4) { + DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", addr); + + } else { + DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr); + } + + return false; +} + +static DEFINE_SPINLOCK(sfe_connections_lock); + +struct sfe_connection { + struct hlist_node hl; + struct sfe_connection_create *sic; + struct nf_conn *ct; + int hits; + int offload_permit; + int offloaded; + bool is_v4; + unsigned char smac[ETH_ALEN]; + unsigned char dmac[ETH_ALEN]; +}; + +static int sfe_connections_size; + +#define FC_CONN_HASH_ORDER 13 +static DEFINE_HASHTABLE(fc_conn_ht, FC_CONN_HASH_ORDER); + +static u32 fc_conn_hash(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, + unsigned short sport, unsigned short dport, bool is_v4) +{ + u32 idx, cnt = ((is_v4 ? sizeof(saddr->ip) : sizeof(saddr->ip6))/sizeof(u32)); + u32 hash = 0; + + for (idx = 0; idx < cnt; idx++) { + hash ^= ((u32 *)saddr)[idx] ^ ((u32 *)daddr)[idx]; + } + + return hash ^ (sport | (dport << 16)); +} + +/* + * fast_classifier_update_protocol() + * Update sfe_ipv4_create struct with new protocol information before we offload + */ +static int fast_classifier_update_protocol(struct sfe_connection_create *p_sic, struct nf_conn *ct) +{ + switch (p_sic->protocol) { + case IPPROTO_TCP: + p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale; + p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; + p_sic->src_td_end = ct->proto.tcp.seen[0].td_end; + p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend; + p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; + p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; + p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end; + p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; + + if (nf_ct_tcp_no_window_check + || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) + || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { + p_sic->flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; + } + + /* + * If the connection is shutting down do not manage it. + * state can not be SYN_SENT, SYN_RECV because connection is assured + * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. + */ + spin_lock(&ct->lock); + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { + spin_unlock(&ct->lock); + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED); + DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", + ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port), + &p_sic->dest_ip, ntohs(p_sic->dest_port)); + return 0; + } + spin_unlock(&ct->lock); + break; + + case IPPROTO_UDP: + break; + + default: + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); + DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol); + return 0; + } + + return 1; +} + +/* fast_classifier_send_genl_msg() + * Function to send a generic netlink message + */ +static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple *fc_msg) +{ + struct sk_buff *skb; + int rc; + int buf_len; + int total_len; + void *msg_head; + + /* + * Calculate our packet payload size. + * Start with our family header. + */ + buf_len = fast_classifier_gnl_family.hdrsize; + + /* + * Add the nla_total_size of each attribute we're going to nla_put(). + */ + buf_len += nla_total_size(sizeof(*fc_msg)); + + /* + * Lastly we need to add space for the NL message header since + * genlmsg_new only accounts for the GENL header and not the + * outer NL header. To do this, we use a NL helper function which + * calculates the total size of a netlink message given a payload size. + * Note this value does not include the GENL header, but that's + * added automatically by genlmsg_new. + */ + total_len = nlmsg_total_size(buf_len); + skb = genlmsg_new(total_len, GFP_ATOMIC); + if (!skb) + return; + + msg_head = genlmsg_put(skb, 0, 0, &fast_classifier_gnl_family, 0, msg); + if (!msg_head) { + nlmsg_free(skb); + return; + } + + rc = nla_put(skb, FAST_CLASSIFIER_A_TUPLE, sizeof(struct fast_classifier_tuple), fc_msg); + if (rc != 0) { + genlmsg_cancel(skb, msg_head); + nlmsg_free(skb); + return; + } + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 19 , 0)) + rc = genlmsg_end(skb, msg_head); + if (rc < 0) { + genlmsg_cancel(skb, msg_head); + nlmsg_free(skb); + return; + } +#else + genlmsg_end(skb, msg_head); + +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) + rc = genlmsg_multicast(&fast_classifier_gnl_family, skb, 0, 0, GFP_ATOMIC); +#else + rc = genlmsg_multicast(skb, 0, fast_classifier_genl_mcgrp[0].id, GFP_ATOMIC); +#endif + switch (msg) { + case FAST_CLASSIFIER_C_OFFLOADED: + if (rc == 0) { + atomic_inc(&offloaded_msgs); + } else { + atomic_inc(&offloaded_fail_msgs); + } + break; + case FAST_CLASSIFIER_C_DONE: + if (rc == 0) { + atomic_inc(&done_msgs); + } else { + atomic_inc(&done_fail_msgs); + } + break; + default: + DEBUG_ERROR("fast-classifer: Unknown message type sent!\n"); + break; + } + + DEBUG_TRACE("Notify NL message %d ", msg); + if (fc_msg->ethertype == AF_INET) { + DEBUG_TRACE("sip=%pI4 dip=%pI4 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); + } else { + DEBUG_TRACE("sip=%pI6 dip=%pI6 ", &fc_msg->src_saddr, &fc_msg->dst_saddr); + } + DEBUG_TRACE("protocol=%d sport=%d dport=%d smac=%pM dmac=%pM\n", + fc_msg->proto, fc_msg->sport, fc_msg->dport, fc_msg->smac, fc_msg->dmac); +} + +/* + * fast_classifier_find_conn() + * find a connection object in the hash table + * @pre the sfe_connection_lock must be held before calling this function + */ +static struct sfe_connection * +fast_classifier_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, + unsigned short sport, unsigned short dport, + unsigned char proto, bool is_v4) +{ + struct sfe_connection_create *p_sic; + struct sfe_connection *conn; + u32 key; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) + struct hlist_node *node; +#endif + + key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); + + sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { + if (conn->is_v4 != is_v4) { + continue; + } + + p_sic = conn->sic; + + if (p_sic->protocol == proto && + p_sic->src_port == sport && + p_sic->dest_port == dport && + sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && + sfe_addr_equal(&p_sic->dest_ip, daddr, is_v4)) { + return conn; + } + } + + DEBUG_TRACE("connection not found\n"); + return NULL; +} + +/* + * fast_classifier_sb_find_conn() + * find a connection object in the hash table according to information of packet + * if not found, reverse the tuple and try again. + * @pre the sfe_connection_lock must be held before calling this function + */ +static struct sfe_connection * +fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr, + unsigned short sport, unsigned short dport, + unsigned char proto, bool is_v4) +{ + struct sfe_connection_create *p_sic; + struct sfe_connection *conn; + u32 key; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) + struct hlist_node *node; +#endif + + key = fc_conn_hash(saddr, daddr, sport, dport, is_v4); + + sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { + if (conn->is_v4 != is_v4) { + continue; + } + + p_sic = conn->sic; + + if (p_sic->protocol == proto && + p_sic->src_port == sport && + p_sic->dest_port_xlate == dport && + sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) && + sfe_addr_equal(&p_sic->dest_ip_xlate, daddr, is_v4)) { + return conn; + } + } + + /* + * Reverse the tuple and try again + */ + key = fc_conn_hash(daddr, saddr, dport, sport, is_v4); + + sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) { + if (conn->is_v4 != is_v4) { + continue; + } + + p_sic = conn->sic; + + if (p_sic->protocol == proto && + p_sic->src_port == dport && + p_sic->dest_port_xlate == sport && + sfe_addr_equal(&p_sic->src_ip, daddr, is_v4) && + sfe_addr_equal(&p_sic->dest_ip_xlate, saddr, is_v4)) { + return conn; + } + } + + DEBUG_TRACE("connection not found\n"); + return NULL; +} + +/* + * fast_classifier_add_conn() + * add a connection object in the hash table if no duplicate + * @conn connection to add + * @return conn if successful, NULL if duplicate + */ +static struct sfe_connection * +fast_classifier_add_conn(struct sfe_connection *conn) +{ + struct sfe_connection_create *sic = conn->sic; + u32 key; + + spin_lock_bh(&sfe_connections_lock); + if (fast_classifier_find_conn(&sic->src_ip, &sic->dest_ip, sic->src_port, + sic->dest_port, sic->protocol, conn->is_v4)) { + spin_unlock_bh(&sfe_connections_lock); + return NULL; + } + + key = fc_conn_hash(&sic->src_ip, &sic->dest_ip, + sic->src_port, sic->dest_port, conn->is_v4); + + hash_add(fc_conn_ht, &conn->hl, key); + sfe_connections_size++; + spin_unlock_bh(&sfe_connections_lock); + + DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", sfe_connections_size); + + if (conn->is_v4) { + DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", + key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); + } else { + DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", + key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port); + } + + return conn; +} + +/* + * fast_classifier_offload_genl_msg() + * Called from user space to offload a connection + */ +static int +fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *na; + struct fast_classifier_tuple *fc_msg; + struct sfe_connection *conn; + + na = info->attrs[FAST_CLASSIFIER_A_TUPLE]; + fc_msg = nla_data(na); + + if (fc_msg->ethertype == AF_INET) { + DEBUG_TRACE("want to offload: %d-%d, %pI4, %pI4, %d, %d SMAC=%pM DMAC=%pM\n", + fc_msg->ethertype, + fc_msg->proto, + &fc_msg->src_saddr, + &fc_msg->dst_saddr, + fc_msg->sport, + fc_msg->dport, + fc_msg->smac, + fc_msg->dmac); + } else { + DEBUG_TRACE("want to offload: %d-%d, %pI6, %pI6, %d, %d SMAC=%pM DMAC=%pM\n", + fc_msg->ethertype, + fc_msg->proto, + &fc_msg->src_saddr, + &fc_msg->dst_saddr, + fc_msg->sport, + fc_msg->dport, + fc_msg->smac, + fc_msg->dmac); + } + + spin_lock_bh(&sfe_connections_lock); + conn = fast_classifier_sb_find_conn((sfe_ip_addr_t *)&fc_msg->src_saddr, + (sfe_ip_addr_t *)&fc_msg->dst_saddr, + fc_msg->sport, + fc_msg->dport, + fc_msg->proto, + (fc_msg->ethertype == AF_INET)); + if (!conn) { + spin_unlock_bh(&sfe_connections_lock); + DEBUG_TRACE("REQUEST OFFLOAD NO MATCH\n"); + atomic_inc(&offload_no_match_msgs); + return 0; + } + + conn->offload_permit = 1; + spin_unlock_bh(&sfe_connections_lock); + atomic_inc(&offload_msgs); + + DEBUG_TRACE("INFO: calling sfe rule creation!\n"); + return 0; +} + +/* + * fast_classifier_nl_genl_msg_DUMP() + * ignore fast_classifier_messages OFFLOADED and DONE + */ +static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return 0; +} + +/* auto offload connection once we have this many packets*/ +static int offload_at_pkts = 128; + +/* + * fast_classifier_post_routing() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4) +{ + int ret; + struct sfe_connection_create sic; + struct sfe_connection_create *p_sic; + struct net_device *in; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct net_device *dev; + struct net_device *src_dev; + struct net_device *dest_dev; + struct net_device *src_dev_tmp; + struct net_device *dest_dev_tmp; + struct net_device *src_br_dev = NULL; + struct net_device *dest_br_dev = NULL; + struct nf_conntrack_tuple orig_tuple; + struct nf_conntrack_tuple reply_tuple; + struct sfe_connection *conn; + struct sk_buff *tmp_skb = NULL; + + /* + * Don't process broadcast or multicast packets. + */ + if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_BROADCAST); + DEBUG_TRACE("broadcast, ignoring\n"); + return NF_ACCEPT; + } + if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_MULTICAST); + DEBUG_TRACE("multicast, ignoring\n"); + return NF_ACCEPT; + } + + /* + * Don't process packets that are not being forwarded. + */ + in = dev_get_by_index(&init_net, skb->skb_iif); + if (!in) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_IIF); + DEBUG_TRACE("packet not forwarding\n"); + return NF_ACCEPT; + } + + dev_put(in); + + /* + * Don't process packets that aren't being tracked by conntrack. + */ + ct = nf_ct_get(skb, &ctinfo); + if (unlikely(!ct)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_CT); + DEBUG_TRACE("no conntrack connection, ignoring\n"); + return NF_ACCEPT; + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) + /* + * Don't process untracked connections. + */ + if (unlikely(nf_ct_is_untracked(ct))) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_TRACK); + DEBUG_TRACE("untracked connection\n"); + return NF_ACCEPT; + } +#endif /*KERNEL_VERSION(4, 12, 0)*/ + + /* + * Unconfirmed connection may be dropped by Linux at the final step, + * So we don't process unconfirmed connections. + */ + if (!nf_ct_is_confirmed(ct)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_CONFIRM); + DEBUG_TRACE("unconfirmed connection\n"); + return NF_ACCEPT; + } + + /* + * Don't process connections that require support from a 'helper' (typically a NAT ALG). + */ + if (unlikely(nfct_help(ct))) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_IS_ALG); + DEBUG_TRACE("connection has helper\n"); + return NF_ACCEPT; + } + + memset(&sic, 0, sizeof(sic)); + + /* + * Look up the details of our connection in conntrack. + * + * Note that the data we get from conntrack is for the "ORIGINAL" direction + * but our packet may actually be in the "REPLY" direction. + */ + orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + sic.protocol = (s32)orig_tuple.dst.protonum; + + sic.flags = 0; + + /* + * Get addressing information, non-NAT first + */ + if (likely(is_v4)) { + u32 dscp; + + sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; + sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; + + if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV4_MCAST); + DEBUG_TRACE("multicast address\n"); + return NF_ACCEPT; + } + + /* + * NAT'ed addresses - note these are as seen from the 'reply' direction + * When NAT does not apply to this connection these will be identical to the above. + */ + sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; + sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; + + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; + if (dscp) { + sic.dest_dscp = dscp; + sic.src_dscp = sic.dest_dscp; + sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; + } + } else { + u32 dscp; + + sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); + sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); + + if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || + ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV6_MCAST); + DEBUG_TRACE("multicast address\n"); + return NF_ACCEPT; + } + + /* + * NAT'ed addresses - note these are as seen from the 'reply' direction + * When NAT does not apply to this connection these will be identical to the above. + */ + sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); + sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); + + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; + if (dscp) { + sic.dest_dscp = dscp; + sic.src_dscp = sic.dest_dscp; + sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; + } + } + + switch (sic.protocol) { + case IPPROTO_TCP: + sic.src_port = orig_tuple.src.u.tcp.port; + sic.dest_port = orig_tuple.dst.u.tcp.port; + sic.src_port_xlate = reply_tuple.dst.u.tcp.port; + sic.dest_port_xlate = reply_tuple.src.u.tcp.port; + + /* + * Don't try to manage a non-established connection. + */ + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ASSURED); + DEBUG_TRACE("non-established connection\n"); + return NF_ACCEPT; + } + + break; + + case IPPROTO_UDP: + sic.src_port = orig_tuple.src.u.udp.port; + sic.dest_port = orig_tuple.dst.u.udp.port; + sic.src_port_xlate = reply_tuple.dst.u.udp.port; + sic.dest_port_xlate = reply_tuple.src.u.udp.port; + + /* + * Somehow, SFE is not playing nice with IPSec traffic. + * Do not accelerate for now. + */ + if (ntohs(sic.dest_port) == 4500 || ntohs(sic.dest_port) == 500) { + if (likely(is_v4)) + DEBUG_TRACE("quarkysg:: IPsec bypass: %pI4:%d(%pI4:%d) to %pI4:%d(%pI4:%d)\n", + &sic.src_ip.ip, ntohs(sic.src_port), &sic.src_ip_xlate.ip, ntohs(sic.src_port_xlate), + &sic.dest_ip.ip, ntohs(sic.dest_port), &sic.dest_ip_xlate.ip, ntohs(sic.dest_port_xlate)); + else + DEBUG_TRACE("quarkysg:: IPsec bypass: %pI6:%d to %pI6:%d\n", + &sic.src_ip.ip6, ntohs(sic.src_port), &sic.dest_ip.ip6, ntohs(sic.dest_port)); + return NF_ACCEPT; + } + break; + + default: + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL); + DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); + return NF_ACCEPT; + } + +#ifdef CONFIG_XFRM + sic.original_accel = 1; + sic.reply_accel = 1; +#endif + + /* + * Get QoS information + */ + if (skb->priority) { + sic.dest_priority = skb->priority; + sic.src_priority = sic.dest_priority; + sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; + } + + if (is_v4) { + DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", + sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); + } else { + DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", + sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port); + } + + /* + * If we already have this connection in our list, skip it + * XXX: this may need to be optimized + */ + spin_lock_bh(&sfe_connections_lock); + + conn = fast_classifier_find_conn(&sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port, sic.protocol, is_v4); + if (conn) { + conn->hits++; + + if (!conn->offloaded) { + if (conn->offload_permit || conn->hits >= offload_at_pkts) { + DEBUG_TRACE("OFFLOADING CONNECTION, TOO MANY HITS\n"); + + if (fast_classifier_update_protocol(conn->sic, conn->ct) == 0) { + spin_unlock_bh(&sfe_connections_lock); + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL); + DEBUG_TRACE("UNKNOWN PROTOCOL OR CONNECTION CLOSING, SKIPPING\n"); + return NF_ACCEPT; + } + + DEBUG_TRACE("INFO: calling sfe rule creation!\n"); + spin_unlock_bh(&sfe_connections_lock); + + ret = is_v4 ? sfe_ipv4_create_rule(conn->sic) : sfe_ipv6_create_rule(conn->sic); + if ((ret == 0) || (ret == -EADDRINUSE)) { + struct fast_classifier_tuple fc_msg; + + if (is_v4) { + fc_msg.ethertype = AF_INET; + fc_msg.src_saddr.in = *((struct in_addr *)&sic.src_ip); + fc_msg.dst_saddr.in = *((struct in_addr *)&sic.dest_ip_xlate); + } else { + fc_msg.ethertype = AF_INET6; + fc_msg.src_saddr.in6 = *((struct in6_addr *)&sic.src_ip); + fc_msg.dst_saddr.in6 = *((struct in6_addr *)&sic.dest_ip_xlate); + } + + fc_msg.proto = sic.protocol; + fc_msg.sport = sic.src_port; + fc_msg.dport = sic.dest_port_xlate; + memcpy(fc_msg.smac, conn->smac, ETH_ALEN); + memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); + fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_OFFLOADED, &fc_msg); + conn->offloaded = 1; + } + + return NF_ACCEPT; + } + } + + spin_unlock_bh(&sfe_connections_lock); + if (conn->offloaded) { + is_v4 ? sfe_ipv4_update_rule(conn->sic) : sfe_ipv6_update_rule(conn->sic); + } + + DEBUG_TRACE("FOUND, SKIPPING\n"); + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION); + return NF_ACCEPT; + } + + spin_unlock_bh(&sfe_connections_lock); + + /* + * Get the net device and MAC addresses that correspond to the various source and + * destination host addresses. + */ + if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_DEV); + return NF_ACCEPT; + } + src_dev = src_dev_tmp; + + if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV); + goto done1; + } + dev_put(dev); + + if (unlikely(!is_v4)) + tmp_skb = skb; + + if (!fast_classifier_find_dev_and_mac_addr(tmp_skb, &sic.dest_ip, &dev, sic.dest_mac, is_v4)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_DEV); + goto done1; + } + dev_put(dev); + + if (!fast_classifier_find_dev_and_mac_addr(skb, &sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV); + goto done1; + } + dest_dev = dest_dev_tmp; + + /* + * Our devices may actually be part of a bridge interface. If that's + * the case then find the bridge interface instead. + */ + if (src_dev->priv_flags & IFF_BRIDGE_PORT) { + src_br_dev = sfe_dev_get_master(src_dev); + if (!src_br_dev) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); + DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); + goto done2; + } + src_dev = src_br_dev; + } + + if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { + dest_br_dev = sfe_dev_get_master(dest_dev); + if (!dest_br_dev) { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE); + DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); + goto done3; + } + dest_dev = dest_br_dev; + } + + sic.src_dev = src_dev; + sic.dest_dev = dest_dev; + + sic.src_mtu = src_dev->mtu; + sic.dest_mtu = dest_dev->mtu; + + if (skb->mark) { + DEBUG_TRACE("SKB MARK NON ZERO %x\n", skb->mark); + } + sic.mark = skb->mark; + + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (!conn) { + printk(KERN_CRIT "ERROR: no memory for sfe\n"); + goto done4; + } + conn->hits = 0; + conn->offload_permit = 0; + conn->offloaded = 0; + conn->is_v4 = is_v4; + DEBUG_TRACE("Source MAC=%pM\n", sic.src_mac); + memcpy(conn->smac, sic.src_mac, ETH_ALEN); + memcpy(conn->dmac, sic.dest_mac_xlate, ETH_ALEN); + + p_sic = kmalloc(sizeof(*p_sic), GFP_ATOMIC); + if (!p_sic) { + printk(KERN_CRIT "ERROR: no memory for sfe\n"); + kfree(conn); + goto done4; + } + + memcpy(p_sic, &sic, sizeof(sic)); + conn->sic = p_sic; + conn->ct = ct; + + if (!fast_classifier_add_conn(conn)) { + kfree(conn->sic); + kfree(conn); + } + + /* + * If we had bridge ports then release them too. + */ +done4: + if (dest_br_dev) { + dev_put(dest_br_dev); + } +done3: + if (src_br_dev) { + dev_put(src_br_dev); + } +done2: + dev_put(dest_dev_tmp); +done1: + dev_put(src_dev_tmp); + + return NF_ACCEPT; +} + +/* + * fast_classifier_ipv4_post_routing_hook() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +fast_classifier_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) +{ + return fast_classifier_post_routing(skb, true); +} + +/* + * fast_classifier_ipv6_post_routing_hook() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +fast_classifier_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) +{ + return fast_classifier_post_routing(skb, false); +} + +/* + * fast_classifier_update_mark() + * updates the mark for a fast-classifier connection + */ +static void fast_classifier_update_mark(struct sfe_connection_mark *mark, bool is_v4) +{ + struct sfe_connection *conn; + + spin_lock_bh(&sfe_connections_lock); + + conn = fast_classifier_find_conn(&mark->src_ip, &mark->dest_ip, + mark->src_port, mark->dest_port, + mark->protocol, is_v4); + if (conn) { + conn->sic->mark = mark->mark; + } + + spin_unlock_bh(&sfe_connections_lock); +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +/* + * fast_classifier_conntrack_event() + * Callback event invoked when a conntrack connection's state changes. + */ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS +static int fast_classifier_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +#else +static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item) +#endif +{ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + struct nf_ct_event *item = ptr; +#endif + struct sfe_connection_destroy sid; + struct nf_conn *ct = item->ct; + struct nf_conntrack_tuple orig_tuple; + struct sfe_connection *conn; + struct fast_classifier_tuple fc_msg; + int offloaded = 0; + bool is_v4; + + /* + * If we don't have a conntrack entry then we're done. + */ + if (unlikely(!ct)) { + DEBUG_WARN("no ct in conntrack event callback\n"); + return NOTIFY_DONE; + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) + /* + * If this is an untracked connection then we can't have any state either. + */ + if (unlikely(nf_ct_is_untracked(ct))) { + DEBUG_TRACE("ignoring untracked conn\n"); + return NOTIFY_DONE; + } +#endif /*KERNEL_VERSION(4, 12, 0)*/ + + orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + sid.protocol = (s32)orig_tuple.dst.protonum; + + /* + * Extract information from the conntrack connection. We're only interested + * in nominal connection information (i.e. we're ignoring any NAT information). + */ + if (likely(nf_ct_l3num(ct) == AF_INET)) { + sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; + sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; + is_v4 = true; + } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { + sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); + sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); + is_v4 = false; + } else { + DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); + return NOTIFY_DONE; + } + + switch (sid.protocol) { + case IPPROTO_TCP: + sid.src_port = orig_tuple.src.u.tcp.port; + sid.dest_port = orig_tuple.dst.u.tcp.port; + break; + + case IPPROTO_UDP: + sid.src_port = orig_tuple.src.u.udp.port; + sid.dest_port = orig_tuple.dst.u.udp.port; + break; + + default: + DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); + return NOTIFY_DONE; + } + + /* + * Check for an updated mark + */ + if ((events & (1 << IPCT_MARK)) && (ct->mark != 0)) { + struct sfe_connection_mark mark; + + mark.protocol = sid.protocol; + mark.src_ip = sid.src_ip; + mark.dest_ip = sid.dest_ip; + mark.src_port = sid.src_port; + mark.dest_port = sid.dest_port; + mark.mark = ct->mark; + + is_v4 ? sfe_ipv4_mark_rule(&mark) : sfe_ipv6_mark_rule(&mark); + fast_classifier_update_mark(&mark, is_v4); + } + + /* + * We're only interested in destroy events at this point + */ + if (unlikely(!(events & (1 << IPCT_DESTROY)))) { + DEBUG_TRACE("ignoring non-destroy event\n"); + return NOTIFY_DONE; + } + + if (is_v4) { + DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n", + sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port)); + } else { + DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n", + sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port)); + } + + spin_lock_bh(&sfe_connections_lock); + + conn = fast_classifier_find_conn(&sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port, sid.protocol, is_v4); + if (conn && conn->offloaded) { + if (is_v4) { + fc_msg.ethertype = AF_INET; + fc_msg.src_saddr.in = *((struct in_addr *)&conn->sic->src_ip); + fc_msg.dst_saddr.in = *((struct in_addr *)&conn->sic->dest_ip_xlate); + } else { + fc_msg.ethertype = AF_INET6; + fc_msg.src_saddr.in6 = *((struct in6_addr *)&conn->sic->src_ip); + fc_msg.dst_saddr.in6 = *((struct in6_addr *)&conn->sic->dest_ip_xlate); + } + + fc_msg.proto = conn->sic->protocol; + fc_msg.sport = conn->sic->src_port; + fc_msg.dport = conn->sic->dest_port_xlate; + memcpy(fc_msg.smac, conn->smac, ETH_ALEN); + memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN); + offloaded = 1; + } + + if (conn) { + DEBUG_TRACE("Free connection\n"); + + hash_del(&conn->hl); + sfe_connections_size--; + kfree(conn->sic); + kfree(conn); + } else { + fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_DESTROY_MISS); + } + + spin_unlock_bh(&sfe_connections_lock); + + is_v4 ? sfe_ipv4_destroy_rule(&sid) : sfe_ipv6_destroy_rule(&sid); + + if (offloaded) { + fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_DONE, &fc_msg); + } + + return NOTIFY_DONE; +} + +/* + * Netfilter conntrack event system to monitor connection tracking changes + */ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS +static struct notifier_block fast_classifier_conntrack_notifier = { + .notifier_call = fast_classifier_conntrack_event, +}; +#else +static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = { + .fcn = fast_classifier_conntrack_event, +}; +#endif +#endif + +/* + * Structure to establish a hook into the post routing netfilter point - this + * will pick up local outbound and packets going from one interface to another. + * + * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. + * We want to examine packets after NAT translation and any ALG processing. + */ +static struct nf_hook_ops fast_classifier_ops_post_routing[] __read_mostly = { + SFE_IPV4_NF_POST_ROUTING_HOOK(__fast_classifier_ipv4_post_routing_hook), + SFE_IPV6_NF_POST_ROUTING_HOOK(__fast_classifier_ipv6_post_routing_hook), +}; + +/* + * fast_classifier_sync_rule() + * Synchronize a connection's state. + */ +static void fast_classifier_sync_rule(struct sfe_connection_sync *sis) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + SFE_NF_CONN_ACCT(acct); + + /* + * Create a tuple so as to be able to look up a connection + */ + memset(&tuple, 0, sizeof(tuple)); + tuple.src.u.all = (__be16)sis->src_port; + tuple.dst.dir = IP_CT_DIR_ORIGINAL; + tuple.dst.protonum = (u8)sis->protocol; + tuple.dst.u.all = (__be16)sis->dest_port; + + if (sis->is_v6) { + tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); + tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); + tuple.src.l3num = AF_INET6; + + DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", + (int)tuple.dst.protonum, + &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), + &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); + } else { + tuple.src.u3.ip = sis->src_ip.ip; + tuple.dst.u3.ip = sis->dest_ip.ip; + tuple.src.l3num = AF_INET; + + DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", + (int)tuple.dst.protonum, + &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), + &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); + } + + /* + * Update packet count for ingress on bridge device + */ + if (skip_to_bridge_ingress) { + struct rtnl_link_stats64 nlstats; + nlstats.tx_packets = 0; + nlstats.tx_bytes = 0; + + if (sis->src_dev && IFF_EBRIDGE && + (sis->src_new_packet_count || sis->src_new_byte_count)) { + nlstats.rx_packets = sis->src_new_packet_count; + nlstats.rx_bytes = sis->src_new_byte_count; + spin_lock_bh(&sfe_connections_lock); + br_dev_update_stats(sis->src_dev, &nlstats); + spin_unlock_bh(&sfe_connections_lock); + } + if (sis->dest_dev && IFF_EBRIDGE && + (sis->dest_new_packet_count || sis->dest_new_byte_count)) { + nlstats.rx_packets = sis->dest_new_packet_count; + nlstats.rx_bytes = sis->dest_new_byte_count; + spin_lock_bh(&sfe_connections_lock); + br_dev_update_stats(sis->dest_dev, &nlstats); + spin_unlock_bh(&sfe_connections_lock); + } + } + + /* + * Look up conntrack connection + */ + h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); + if (unlikely(!h)) { + DEBUG_TRACE("no connection found\n"); + return; + } + + ct = nf_ct_tuplehash_to_ctrack(h); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) + NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); +#endif /*KERNEL_VERSION(4, 9, 0)*/ + + /* + * Only update if this is not a fixed timeout + */ + if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_lock_bh(&ct->lock); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) + ct->timeout += sis->delta_jiffies; +#else + ct->timeout.expires += sis->delta_jiffies; +#endif /*KERNEL_VERSION(4, 9, 0)*/ + spin_unlock_bh(&ct->lock); + } + + acct = nf_conn_acct_find(ct); + if (acct) { + spin_lock_bh(&ct->lock); + atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); + atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); + atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); + atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); + spin_unlock_bh(&ct->lock); + } + + switch (sis->protocol) { + case IPPROTO_TCP: + spin_lock_bh(&ct->lock); + if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { + ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; + } + if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { + ct->proto.tcp.seen[0].td_end = sis->src_td_end; + } + if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { + ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; + } + if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { + ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; + } + if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { + ct->proto.tcp.seen[1].td_end = sis->dest_td_end; + } + if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { + ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; + } + spin_unlock_bh(&ct->lock); + break; + } + + /* + * Release connection + */ + nf_ct_put(ct); +} + +/* + * fast_classifier_device_event() + */ +static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv4_destroy_all_rules_for_dev(dev); + sfe_ipv6_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * fast_classifier_inet_event() + */ +static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv4_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * fast_classifier_inet6_event() + */ +static int fast_classifier_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv6_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * fast_classifier_get_offload_at_pkts() + */ +static ssize_t fast_classifier_get_offload_at_pkts(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", offload_at_pkts); +} + +/* + * fast_classifier_set_offload_at_pkts() + */ +static ssize_t fast_classifier_set_offload_at_pkts(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + long new; + int ret; + + ret = kstrtol(buf, 0, &new); + if (ret == -EINVAL || ((int)new != new)) + return -EINVAL; + + offload_at_pkts = new; + + return size; +} + +/* + * fast_classifier_get_debug_info() + */ +static ssize_t fast_classifier_get_debug_info(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + size_t len = 0; + struct sfe_connection *conn; + u32 i; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)) + struct hlist_node *node; +#endif + + spin_lock_bh(&sfe_connections_lock); + len += scnprintf(buf, PAGE_SIZE - len, "size=%d offload=%d offload_no_match=%d" + " offloaded=%d done=%d offloaded_fail=%d done_fail=%d\n", + sfe_connections_size, + atomic_read(&offload_msgs), + atomic_read(&offload_no_match_msgs), + atomic_read(&offloaded_msgs), + atomic_read(&done_msgs), + atomic_read(&offloaded_fail_msgs), + atomic_read(&done_fail_msgs)); + sfe_hash_for_each(fc_conn_ht, i, node, conn, hl) { + len += scnprintf(buf + len, PAGE_SIZE - len, + (conn->is_v4 ? "o=%d, p=%d [%pM]:%pI4:%u %pI4:%u:[%pM] m=%08x h=%d\n" : "o=%d, p=%d [%pM]:%pI6:%u %pI6:%u:[%pM] m=%08x h=%d\n"), + conn->offloaded, + conn->sic->protocol, + conn->sic->src_mac, + &conn->sic->src_ip, + ntohs(conn->sic->src_port), + &conn->sic->dest_ip, + ntohs(conn->sic->dest_port), + conn->sic->dest_mac_xlate, + conn->sic->mark, + conn->hits); + } + spin_unlock_bh(&sfe_connections_lock); + + return len; +} + +/* + * fast_classifier_get_skip_bridge_ingress() + */ +static ssize_t fast_classifier_get_skip_bridge_ingress(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", skip_to_bridge_ingress); +} + +/* + * fast_classifier_set_skip_bridge_ingress() + */ +static ssize_t fast_classifier_set_skip_bridge_ingress(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + long new; + int ret; + + ret = kstrtol(buf, 0, &new); + if (ret == -EINVAL || ((int)new != new)) + return -EINVAL; + + skip_to_bridge_ingress = new ? 1 : 0; + + return size; +} + +/* + * fast_classifier_get_exceptions + * dump exception counters + */ +static ssize_t fast_classifier_get_exceptions(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int idx, len; + struct fast_classifier *sc = &__sc; + + spin_lock_bh(&sc->lock); + for (len = 0, idx = 0; idx < FAST_CL_EXCEPTION_MAX; idx++) { + if (sc->exceptions[idx]) { + len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", fast_classifier_exception_events_string[idx], sc->exceptions[idx]); + } + } + spin_unlock_bh(&sc->lock); + + return len; +} + +/* + * sysfs attributes. + */ +static const struct device_attribute fast_classifier_offload_at_pkts_attr = + __ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts); +static const struct device_attribute fast_classifier_debug_info_attr = + __ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL); +static const struct device_attribute fast_classifier_skip_bridge_ingress = + __ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress); +static const struct device_attribute fast_classifier_exceptions_attr = + __ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL); + +/* + * fast_classifier_init() + */ +static int __init fast_classifier_init(void) +{ + struct fast_classifier *sc = &__sc; + int result = -1; + + printk(KERN_ALERT "fast-classifier (PBR safe v2.1.4a): starting up\n"); + DEBUG_INFO("SFE CM init\n"); + + hash_init(fc_conn_ht); + + /* + * Create sys/fast_classifier + */ + sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL); + if (!sc->sys_fast_classifier) { + DEBUG_ERROR("failed to register fast_classifier\n"); + goto exit1; + } + + result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); + if (result) { + DEBUG_ERROR("failed to register offload at pkgs: %d\n", result); + goto exit2; + } + + result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); + if (result) { + DEBUG_ERROR("failed to register debug dev: %d\n", result); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); + goto exit2; + } + + result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); + if (result) { + DEBUG_ERROR("failed to register skip bridge on ingress: %d\n", result); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); + goto exit2; + } + + result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); + if (result) { + DEBUG_ERROR("failed to register exceptions file: %d\n", result); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); + goto exit2; + } + + sc->dev_notifier.notifier_call = fast_classifier_device_event; + sc->dev_notifier.priority = 1; + register_netdevice_notifier(&sc->dev_notifier); + + sc->inet_notifier.notifier_call = fast_classifier_inet_event; + sc->inet_notifier.priority = 1; + register_inetaddr_notifier(&sc->inet_notifier); + + sc->inet6_notifier.notifier_call = fast_classifier_inet6_event; + sc->inet6_notifier.priority = 1; + register_inet6addr_notifier(&sc->inet6_notifier); + + /* + * Register our netfilter hooks. + */ + result = nf_register_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); + if (result < 0) { + DEBUG_ERROR("can't register nf post routing hook: %d\n", result); + goto exit3; + } + + /* + * Register a notifier hook to get fast notifications of expired connections. + */ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + result = nf_conntrack_register_chain_notifier(&init_net, &fast_classifier_conntrack_notifier); +#else + result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier); + if (result < 0) { + DEBUG_ERROR("can't register nf notifier hook: %d\n", result); + goto exit4; + } +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + result = genl_register_family(&fast_classifier_gnl_family); + if (result) { + DEBUG_ERROR("failed to register genl family: %d\n", result); + goto exit5; + } +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) + result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family, + fast_classifier_gnl_ops, + fast_classifier_genl_mcgrp); + if (result) { + DEBUG_ERROR("failed to register genl ops: %d\n", result); + goto exit5; + } +#else + result = genl_register_family(&fast_classifier_gnl_family); + if (result) { + printk(KERN_CRIT "unable to register genl family\n"); + goto exit5; + } + + result = genl_register_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); + if (result) { + printk(KERN_CRIT "unable to register ops\n"); + goto exit6; + } + + result = genl_register_mc_group(&fast_classifier_gnl_family, + fast_classifier_genl_mcgrp); + if (result) { + printk(KERN_CRIT "unable to register multicast group\n"); + goto exit6; + } +#endif + + printk(KERN_ALERT "fast-classifier: registered\n"); + + spin_lock_init(&sc->lock); + + /* + * Hook the receive path in the network stack. + */ + BUG_ON(athrs_fast_nat_recv); + RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv); + + /* + * Hook the shortcut sync callback. + */ + sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule); + sfe_ipv6_register_sync_rule_callback(fast_classifier_sync_rule); + return 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) +exit6: + genl_unregister_family(&fast_classifier_gnl_family); +#endif + +exit5: +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier); +#else + nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); +#endif + +exit4: +#endif + nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); + +exit3: + unregister_inetaddr_notifier(&sc->inet_notifier); + unregister_inet6addr_notifier(&sc->inet6_notifier); + unregister_netdevice_notifier(&sc->dev_notifier); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr); + sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr); + +exit2: + kobject_put(sc->sys_fast_classifier); + +exit1: + return result; +} + +/* + * fast_classifier_exit() + */ +static void __exit fast_classifier_exit(void) +{ + struct fast_classifier *sc = &__sc; + int result = -1; + + DEBUG_INFO("SFE CM exit\n"); + printk(KERN_ALERT "fast-classifier: shutting down\n"); + + /* + * Unregister our sync callback. + */ + sfe_ipv4_register_sync_rule_callback(NULL); + sfe_ipv6_register_sync_rule_callback(NULL); + + /* + * Unregister our receive callback. + */ + RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); + + /* + * Wait for all callbacks to complete. + */ + rcu_barrier(); + + /* + * Destroy all connections. + */ + sfe_ipv4_destroy_all_rules_for_dev(NULL); + sfe_ipv6_destroy_all_rules_for_dev(NULL); + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) + result = genl_unregister_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops); + if (result != 0) { + printk(KERN_CRIT "Unable to unreigster genl_ops\n"); + } +#endif + + result = genl_unregister_family(&fast_classifier_gnl_family); + if (result != 0) { + printk(KERN_CRIT "Unable to unregister genl_family\n"); + } + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier); +#else + nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier); +#endif +#endif + nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing)); + + unregister_inet6addr_notifier(&sc->inet6_notifier); + unregister_inetaddr_notifier(&sc->inet_notifier); + unregister_netdevice_notifier(&sc->dev_notifier); + + kobject_put(sc->sys_fast_classifier); +} + +module_init(fast_classifier_init) +module_exit(fast_classifier_exit) + +MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fast-classifier/src/fast-classifier.h b/fast-classifier/src/fast-classifier.h new file mode 100755 index 000000000..6b7a18cf6 --- /dev/null +++ b/fast-classifier/src/fast-classifier.h @@ -0,0 +1,57 @@ +/* + * User space header to send message to the fast classifier + * + * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#define FAST_CLASSIFIER_GENL_VERSION (1) +#define FAST_CLASSIFIER_GENL_NAME "FC" +#define FAST_CLASSIFIER_GENL_MCGRP "FC_MCGRP" +#define FAST_CLASSIFIER_GENL_HDRSIZE (0) + +enum { + FAST_CLASSIFIER_A_UNSPEC, + FAST_CLASSIFIER_A_TUPLE, + __FAST_CLASSIFIER_A_MAX, +}; + +#define FAST_CLASSIFIER_A_MAX (__FAST_CLASSIFIER_A_MAX - 1) + +enum { + FAST_CLASSIFIER_C_UNSPEC, + FAST_CLASSIFIER_C_OFFLOAD, + FAST_CLASSIFIER_C_OFFLOADED, + FAST_CLASSIFIER_C_DONE, + __FAST_CLASSIFIER_C_MAX, +}; + +#define FAST_CLASSIFIER_C_MAX (__FAST_CLASSIFIER_C_MAX - 1) + +struct fast_classifier_tuple { + unsigned short ethertype; + unsigned char proto; + union { + struct in_addr in; + struct in6_addr in6; + } src_saddr; + union { + struct in_addr in; + struct in6_addr in6; + } dst_saddr; + unsigned short sport; + unsigned short dport; + unsigned char smac[ETH_ALEN]; + unsigned char dmac[ETH_ALEN]; +}; diff --git a/fast-classifier/src/nl_classifier_test.c b/fast-classifier/src/nl_classifier_test.c new file mode 100755 index 000000000..639417964 --- /dev/null +++ b/fast-classifier/src/nl_classifier_test.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#define NL_CLASSIFIER_GENL_VERSION 1 +#define NL_CLASSIFIER_GENL_FAMILY "FC" +#define NL_CLASSIFIER_GENL_GROUP "FC_MCGRP" +#define NL_CLASSIFIER_GENL_HDRSIZE 0 + +enum NL_CLASSIFIER_CMD { + NL_CLASSIFIER_CMD_UNSPEC, + NL_CLASSIFIER_CMD_ACCEL, + NL_CLASSIFIER_CMD_ACCEL_OK, + NL_CLASSIFIER_CMD_CONNECTION_CLOSED, + NL_CLASSIFIER_CMD_MAX, +}; + +enum NL_CLASSIFIER_ATTR { + NL_CLASSIFIER_ATTR_UNSPEC, + NL_CLASSIFIER_ATTR_TUPLE, + NL_CLASSIFIER_ATTR_MAX, +}; + +union nl_classifier_tuple_ip { + struct in_addr in; + struct in6_addr in6; +}; + +struct nl_classifier_tuple { + unsigned short af; + unsigned char proto; + union nl_classifier_tuple_ip src_ip; + union nl_classifier_tuple_ip dst_ip; + unsigned short sport; + unsigned short dport; + unsigned char smac[6]; + unsigned char dmac[6]; +}; + +struct nl_classifier_instance { + struct nl_sock *sock; + int family_id; + int group_id; + int stop; +}; + +struct nl_classifier_instance nl_cls_inst; + +static struct nla_policy nl_classifier_genl_policy[(NL_CLASSIFIER_ATTR_MAX+1)] = { + [NL_CLASSIFIER_ATTR_TUPLE] = { .type = NLA_UNSPEC }, +}; + +void nl_classifier_dump_nl_tuple(struct nl_classifier_tuple *tuple) +{ + char ip_str[64]; + + printf("protocol = %s\n", (tuple->proto == IPPROTO_UDP) ? "udp" : ((tuple->proto == IPPROTO_TCP) ? "tcp" : "unknown")); + printf("source ip = %s\n", inet_ntop(tuple->af, &tuple->src_ip, ip_str, sizeof(ip_str))); + printf("destination ip = %s\n", inet_ntop(tuple->af, &tuple->dst_ip, ip_str, sizeof(ip_str))); + printf("source port = %d\n", ntohs(tuple->sport)); + printf("destination port = %d\n", ntohs(tuple->dport)); +} + +int nl_classifier_msg_recv(struct nl_msg *msg, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = nlmsg_data(nlh); + struct nlattr *attrs[(NL_CLASSIFIER_ATTR_MAX+1)]; + + genlmsg_parse(nlh, NL_CLASSIFIER_GENL_HDRSIZE, attrs, NL_CLASSIFIER_ATTR_MAX, nl_classifier_genl_policy); + + switch (gnlh->cmd) { + case NL_CLASSIFIER_CMD_ACCEL_OK: + printf("Acceleration successful:\n"); + nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); + return NL_OK; + case NL_CLASSIFIER_CMD_CONNECTION_CLOSED: + printf("Connection is closed:\n"); + nl_classifier_dump_nl_tuple(nla_data(attrs[NL_CLASSIFIER_ATTR_TUPLE])); + return NL_OK; + default: + printf("nl classifier received unknow message %d\n", gnlh->cmd); + } + + return NL_SKIP; +} + +void nl_classifier_offload(struct nl_classifier_instance *inst, + unsigned char proto, unsigned long *src_saddr, + unsigned long *dst_saddr, unsigned short sport, + unsigned short dport, int af) +{ + struct nl_msg *msg; + int ret; + struct nl_classifier_tuple classifier_msg; + + memset(&classifier_msg, 0, sizeof(classifier_msg)); + classifier_msg.af = af; + classifier_msg.proto = proto; + memcpy(&classifier_msg.src_ip, src_saddr, (af == AF_INET ? 4 : 16)); + memcpy(&classifier_msg.dst_ip, dst_saddr, (af == AF_INET ? 4 : 16)); + classifier_msg.sport = sport; + classifier_msg.dport = dport; + + msg = nlmsg_alloc(); + if (!msg) { + printf("Unable to allocate message\n"); + return; + } + + genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, inst->family_id, + NL_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, + NL_CLASSIFIER_CMD_ACCEL, NL_CLASSIFIER_GENL_VERSION); + nla_put(msg, NL_CLASSIFIER_ATTR_TUPLE, sizeof(classifier_msg), &classifier_msg); + + ret = nl_send_auto(inst->sock, msg); + if (ret < 0) { + printf("send netlink message failed.\n"); + nlmsg_free(msg); + return; + } + + nlmsg_free(msg); + printf("nl classifier offload connection successful\n"); +} + +int nl_classifier_init(struct nl_classifier_instance *inst) +{ + int ret; + + inst->sock = nl_socket_alloc(); + if (!inst->sock) { + printf("Unable to allocation socket.\n"); + return -1; + } + genl_connect(inst->sock); + + inst->family_id = genl_ctrl_resolve(inst->sock, NL_CLASSIFIER_GENL_FAMILY); + if (inst->family_id < 0) { + printf("Unable to resolve family %s\n", NL_CLASSIFIER_GENL_FAMILY); + goto init_failed; + } + + inst->group_id = genl_ctrl_resolve_grp(inst->sock, NL_CLASSIFIER_GENL_FAMILY, NL_CLASSIFIER_GENL_GROUP); + if (inst->group_id < 0) { + printf("Unable to resolve mcast group %s\n", NL_CLASSIFIER_GENL_GROUP); + goto init_failed; + } + + ret = nl_socket_add_membership(inst->sock, inst->group_id); + if (ret < 0) { + printf("Unable to add membership\n"); + goto init_failed; + } + + nl_socket_disable_seq_check(inst->sock); + nl_socket_modify_cb(inst->sock, NL_CB_VALID, NL_CB_CUSTOM, nl_classifier_msg_recv, NULL); + + printf("nl classifier init successful\n"); + return 0; + +init_failed: + if (inst->sock) { + nl_close(inst->sock); + nl_socket_free(inst->sock); + inst->sock = NULL; + } + return -1; +} + +void nl_classifier_exit(struct nl_classifier_instance *inst) +{ + if (inst->sock) { + nl_close(inst->sock); + nl_socket_free(inst->sock); + inst->sock = NULL; + } + printf("nl classifier exit successful\n"); +} + +int nl_classifier_parse_arg(int argc, char *argv[], unsigned char *proto, unsigned long *src_saddr, + unsigned long *dst_saddr, unsigned short *sport, unsigned short *dport, int *af) +{ + int ret; + unsigned short port; + + if (argc < 7) { + printf("help: nl_classifier \n"); + return -1; + } + + if (0 == strncmp(argv[1], "v4", 2)) { + *af = AF_INET; + } else if (0 == strncmp(argv[1], "v6", 2)) { + *af = AF_INET6; + } else { + printf("Address family is not supported"); + return -1; + } + + if (0 == strncmp(argv[2], "udp", 3)) { + *proto = IPPROTO_UDP; + } else if (0 == strncmp(argv[2], "tcp", 3)) { + *proto = IPPROTO_TCP; + } else { + printf("Protocol is not supported"); + return -1; + } + + ret = inet_pton(*af, argv[3], src_saddr); + if (ret <= 0) { + printf("source ip has wrong format\n"); + return -1; + } + + ret = inet_pton(*af, argv[4], dst_saddr); + if (ret <= 0) { + printf("destination ip has wrong format\n"); + return -1; + } + + port = strtol(argv[5], NULL, 0); + *sport = htons(port); + port = strtol(argv[6], NULL, 0); + *dport = htons(port); + + printf("nl classifier parse arguments successful\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct nl_classifier_instance *inst = &nl_cls_inst; + unsigned char proto; + unsigned long src_addr[4]; + unsigned long dst_addr[4]; + unsigned short sport; + unsigned short dport; + int af; + int ret; + + ret = nl_classifier_parse_arg(argc, argv, &proto, src_addr, dst_addr, &sport, &dport, &af); + if (ret < 0) { + printf("Failed to parse arguments\n"); + return ret; + } + + ret = nl_classifier_init(inst); + if (ret < 0) { + printf("Unable to init generic netlink\n"); + return ret; + } + + nl_classifier_offload(inst, proto, src_addr, dst_addr, sport, dport, af); + + /* main loop to listen on message */ + while (!inst->stop) { + nl_recvmsgs_default(inst->sock); + } + + nl_classifier_exit(inst); + + return 0; +} diff --git a/fast-classifier/src/sfe.h b/fast-classifier/src/sfe.h new file mode 100755 index 000000000..279e7b3dc --- /dev/null +++ b/fast-classifier/src/sfe.h @@ -0,0 +1,114 @@ +/* + * sfe.h + * Shortcut forwarding engine. + * + * Copyright (c) 2013-2017 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + + +/* + * The following are debug macros used throughout the SFE. + * + * The DEBUG_LEVEL enables the followings based on its value, + * when dynamic debug option is disabled. + * + * 0 = OFF + * 1 = ASSERTS / ERRORS + * 2 = 1 + WARN + * 3 = 2 + INFO + * 4 = 3 + TRACE + */ +#define DEBUG_LEVEL 2 + +#if (DEBUG_LEVEL < 1) +#define DEBUG_ASSERT(s, ...) +#define DEBUG_ERROR(s, ...) +#else +#define DEBUG_ASSERT(c, s, ...) if (!(c)) { pr_emerg("ASSERT: %s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__); BUG(); } +#define DEBUG_ERROR(s, ...) pr_err("%s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* + * Compile messages for dynamic enable/disable + */ +#define DEBUG_WARN(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#define DEBUG_INFO(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#define DEBUG_TRACE(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#else + +/* + * Statically compile messages at different levels + */ +#if (DEBUG_LEVEL < 2) +#define DEBUG_WARN(s, ...) +#else +#define DEBUG_WARN(s, ...) pr_warn("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if (DEBUG_LEVEL < 3) +#define DEBUG_INFO(s, ...) +#else +#define DEBUG_INFO(s, ...) pr_notice("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if (DEBUG_LEVEL < 4) +#define DEBUG_TRACE(s, ...) +#else +#define DEBUG_TRACE(s, ...) pr_info("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif +#endif + +#ifdef CONFIG_NF_FLOW_COOKIE +typedef int (*flow_cookie_set_func_t)(u32 protocol, __be32 src_ip, __be16 src_port, + __be32 dst_ip, __be16 dst_port, u16 flow_cookie); +/* + * sfe_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb); + +/* + * sfe_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb); + +typedef int (*sfe_ipv6_flow_cookie_set_func_t)(u32 protocol, __be32 src_ip[4], __be16 src_port, + __be32 dst_ip[4], __be16 dst_port, u16 flow_cookie); + +/* + * sfe_ipv6_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); + +/* + * sfe_ipv6_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); + +#endif /*CONFIG_NF_FLOW_COOKIE*/ diff --git a/fast-classifier/src/sfe_backport.h b/fast-classifier/src/sfe_backport.h new file mode 100755 index 000000000..2f8c8ca3c --- /dev/null +++ b/fast-classifier/src/sfe_backport.h @@ -0,0 +1,195 @@ +/* + * sfe_backport.h + * Shortcut forwarding engine compatible header file. + * + * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) +#include +#else +enum udp_conntrack { + UDP_CT_UNREPLIED, + UDP_CT_REPLIED, + UDP_CT_MAX +}; + +static inline unsigned int * +nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, + struct nf_conntrack_l4proto *l4proto) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_conn_timeout *timeout_ext; + unsigned int *timeouts; + + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + + return timeouts; +#else + return l4proto->get_timeouts(net); +#endif /*CONFIG_NF_CONNTRACK_TIMEOUT*/ +} +#endif /*KERNEL_VERSION(3, 7, 0)*/ +#endif /*KERNEL_VERSION(3, 4, 0)*/ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(void *priv, \ + struct sk_buff *SKB, \ + const struct nf_hook_state *state) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(const struct nf_hook_ops *OPS, \ + struct sk_buff *SKB, \ + const struct net_device *UNUSED, \ + const struct net_device *OUT, \ + int (*OKFN)(struct sk_buff *)) +#else +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(unsigned int HOOKNUM, \ + struct sk_buff *SKB, \ + const struct net_device *UNUSED, \ + const struct net_device *OUT, \ + int (*OKFN)(struct sk_buff *)) +#endif + +#define sfe_cm_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__sfe_cm_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define sfe_cm_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__sfe_cm_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define fast_classifier_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__fast_classifier_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define fast_classifier_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__fast_classifier_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .pf = NFPROTO_IPV4, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#else +#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .owner = THIS_MODULE, \ + .pf = NFPROTO_IPV4, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .pf = NFPROTO_IPV6, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#else +#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .owner = THIS_MODULE, \ + .pf = NFPROTO_IPV6, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP6_PRI_NAT_SRC + 1, \ + } +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)) +#define SFE_NF_CT_DEFAULT_ZONE (&nf_ct_zone_dflt) +#else +#define SFE_NF_CT_DEFAULT_ZONE NF_CT_DEFAULT_ZONE +#endif + +/* + * sfe_dev_get_master + * get master of bridge port, and hold it + */ +static inline struct net_device *sfe_dev_get_master(struct net_device *dev) +{ + struct net_device *master; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + if (master) + dev_hold(master); + + rcu_read_unlock(); +#else + master = dev->master; + if (master) + dev_hold(master); +#endif + return master; +} + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) +#define SFE_DEV_EVENT_PTR(PTR) netdev_notifier_info_to_dev(PTR) +#else +#define SFE_DEV_EVENT_PTR(PTR) (struct net_device *)(PTR) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define SFE_NF_CONN_ACCT(NM) struct nf_conn_acct *NM +#else +#define SFE_NF_CONN_ACCT(NM) struct nf_conn_counter *NM +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define SFE_ACCT_COUNTER(NM) ((NM)->counter) +#else +#define SFE_ACCT_COUNTER(NM) (NM) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#define sfe_hash_for_each_possible(name, obj, node, member, key) \ + hash_for_each_possible(name, obj, member, key) +#else +#define sfe_hash_for_each_possible(name, obj, node, member, key) \ + hash_for_each_possible(name, obj, node, member, key) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#define sfe_hash_for_each(name, bkt, node, obj, member) \ + hash_for_each(name, bkt, obj, member) +#else +#define sfe_hash_for_each(name, bkt, node, obj, member) \ + hash_for_each(name, bkt, node, obj, member) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) +#define sfe_dst_get_neighbour(dst, daddr) dst_neigh_lookup(dst, daddr) +#else +static inline struct neighbour * +sfe_dst_get_neighbour(struct dst_entry *dst, void *daddr) +{ + struct neighbour *neigh = dst_get_neighbour_noref(dst); + + if (neigh) + neigh_hold(neigh); + + return neigh; +} +#endif diff --git a/fast-classifier/src/sfe_cm.h b/fast-classifier/src/sfe_cm.h new file mode 100755 index 000000000..23cbde859 --- /dev/null +++ b/fast-classifier/src/sfe_cm.h @@ -0,0 +1,259 @@ +/* + * sfe_cm.h + * Shortcut forwarding engine. + * + * Copyright (c) 2013-2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * connection flags. + */ +#define SFE_CREATE_FLAG_NO_SEQ_CHECK BIT(0) + /* Indicates that we should not check sequence numbers */ +#define SFE_CREATE_FLAG_REMARK_PRIORITY BIT(1) + /* Indicates that we should remark priority of skb */ +#define SFE_CREATE_FLAG_REMARK_DSCP BIT(2) + /* Indicates that we should remark DSCP of packet */ + +/* + * IPv6 address structure + */ +struct sfe_ipv6_addr { + __be32 addr[4]; +}; + +typedef union { + __be32 ip; + struct sfe_ipv6_addr ip6[1]; +} sfe_ip_addr_t; + +/* + * connection creation structure. + */ +struct sfe_connection_create { + int protocol; + struct net_device *src_dev; + struct net_device *dest_dev; + u32 flags; + u32 src_mtu; + u32 dest_mtu; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t src_ip_xlate; + sfe_ip_addr_t dest_ip; + sfe_ip_addr_t dest_ip_xlate; + __be16 src_port; + __be16 src_port_xlate; + __be16 dest_port; + __be16 dest_port_xlate; + u8 src_mac[ETH_ALEN]; + u8 src_mac_xlate[ETH_ALEN]; + u8 dest_mac[ETH_ALEN]; + u8 dest_mac_xlate[ETH_ALEN]; + u8 src_td_window_scale; + u32 src_td_max_window; + u32 src_td_end; + u32 src_td_max_end; + u8 dest_td_window_scale; + u32 dest_td_max_window; + u32 dest_td_end; + u32 dest_td_max_end; + u32 mark; +#ifdef CONFIG_XFRM + u32 original_accel; + u32 reply_accel; +#endif + u32 src_priority; + u32 dest_priority; + u32 src_dscp; + u32 dest_dscp; +}; + +/* + * connection destruction structure. + */ +struct sfe_connection_destroy { + int protocol; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t dest_ip; + __be16 src_port; + __be16 dest_port; +}; + +typedef enum sfe_sync_reason { + SFE_SYNC_REASON_STATS, /* Sync is to synchronize stats */ + SFE_SYNC_REASON_FLUSH, /* Sync is to flush a entry */ + SFE_SYNC_REASON_DESTROY /* Sync is to destroy a entry(requested by connection manager) */ +} sfe_sync_reason_t; + +/* + * Structure used to sync connection stats/state back within the system. + * + * NOTE: The addresses here are NON-NAT addresses, i.e. the true endpoint addressing. + * 'src' is the creator of the connection. + */ +struct sfe_connection_sync { + struct net_device *src_dev; + struct net_device *dest_dev; + int is_v6; /* Is it for ipv6? */ + int protocol; /* IP protocol number (IPPROTO_...) */ + sfe_ip_addr_t src_ip; /* Non-NAT source address, i.e. the creator of the connection */ + sfe_ip_addr_t src_ip_xlate; /* NATed source address */ + __be16 src_port; /* Non-NAT source port */ + __be16 src_port_xlate; /* NATed source port */ + sfe_ip_addr_t dest_ip; /* Non-NAT destination address, i.e. to whom the connection was created */ + sfe_ip_addr_t dest_ip_xlate; /* NATed destination address */ + __be16 dest_port; /* Non-NAT destination port */ + __be16 dest_port_xlate; /* NATed destination port */ + u32 src_td_max_window; + u32 src_td_end; + u32 src_td_max_end; + u64 src_packet_count; + u64 src_byte_count; + u32 src_new_packet_count; + u32 src_new_byte_count; + u32 dest_td_max_window; + u32 dest_td_end; + u32 dest_td_max_end; + u64 dest_packet_count; + u64 dest_byte_count; + u32 dest_new_packet_count; + u32 dest_new_byte_count; + u32 reason; /* reason for stats sync message, i.e. destroy, flush, period sync */ + u64 delta_jiffies; /* Time to be added to the current timeout to keep the connection alive */ +}; + +/* + * connection mark structure + */ +struct sfe_connection_mark { + int protocol; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t dest_ip; + __be16 src_port; + __be16 dest_port; + u32 mark; +}; + +/* + * Expose the hook for the receive processing. + */ +extern int (*athrs_fast_nat_recv)(struct sk_buff *skb); + +/* + * Expose what should be a static flag in the TCP connection tracker. + */ +extern int nf_ct_tcp_no_window_check; + +/* + * This callback will be called in a timer + * at 100 times per second to sync stats back to + * Linux connection track. + * + * A RCU lock is taken to prevent this callback + * from unregistering. + */ +typedef void (*sfe_sync_rule_callback_t)(struct sfe_connection_sync *); + +/* + * IPv4 APIs used by connection manager + */ +int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb); +int sfe_ipv4_create_rule(struct sfe_connection_create *sic); +void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid); +void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev); +void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t callback); +void sfe_ipv4_update_rule(struct sfe_connection_create *sic); +void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark); + +#ifdef SFE_SUPPORT_IPV6 +/* + * IPv6 APIs used by connection manager + */ +int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb); +int sfe_ipv6_create_rule(struct sfe_connection_create *sic); +void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid); +void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev); +void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback); +void sfe_ipv6_update_rule(struct sfe_connection_create *sic); +void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark); +#else +static inline int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) +{ + return 0; +} + +static inline int sfe_ipv6_create_rule(struct sfe_connection_create *sic) +{ + return 0; +} + +static inline void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) +{ + return; +} + +static inline void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) +{ + return; +} + +static inline void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback) +{ + return; +} + +static inline void sfe_ipv6_update_rule(struct sfe_connection_create *sic) +{ + return; +} + +static inline void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) +{ + return; +} +#endif + +/* + * sfe_ipv6_addr_equal() + * compare ipv6 address + * + * return: 1, equal; 0, no equal + */ +static inline int sfe_ipv6_addr_equal(struct sfe_ipv6_addr *a, + struct sfe_ipv6_addr *b) +{ + return a->addr[0] == b->addr[0] && + a->addr[1] == b->addr[1] && + a->addr[2] == b->addr[2] && + a->addr[3] == b->addr[3]; +} + +/* + * sfe_ipv4_addr_equal() + * compare ipv4 address + * + * return: 1, equal; 0, no equal + */ +#define sfe_ipv4_addr_equal(a, b) ((u32)(a) == (u32)(b)) + +/* + * sfe_addr_equal() + * compare ipv4 or ipv6 address + * + * return: 1, equal; 0, no equal + */ +static inline int sfe_addr_equal(sfe_ip_addr_t *a, + sfe_ip_addr_t *b, int is_v4) +{ + return is_v4 ? sfe_ipv4_addr_equal(a->ip, b->ip) : sfe_ipv6_addr_equal(a->ip6, b->ip6); +} diff --git a/fast-classifier/src/userspace_example.c b/fast-classifier/src/userspace_example.c new file mode 100755 index 000000000..4f4113d99 --- /dev/null +++ b/fast-classifier/src/userspace_example.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2013,2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +static struct nl_sock *sock; +static struct nl_sock *sock_event; +static int family; +static int grp_id; + +static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = { + [FAST_CLASSIFIER_A_TUPLE] = { .type = NLA_UNSPEC }, +}; + +void dump_fc_tuple(struct fast_classifier_tuple *fc_msg) +{ + char src_str[INET_ADDRSTRLEN]; + char dst_str[INET_ADDRSTRLEN]; + + printf("TUPLE: %d, %s, %s, %d, %d" + " SMAC=%02x:%02x:%02x:%02x:%02x:%02x", + " DMAC=%02x:%02x:%02x:%02x:%02x:%02x\n", + fc_msg->proto, + inet_ntop(AF_INET, + &fc_msg->src_saddr.in.s_addr, + src_str, + INET_ADDRSTRLEN), + inet_ntop(AF_INET, + &fc_msg->dst_saddr.in.s_addr, + dst_str, + INET_ADDRSTRLEN), + fc_msg->sport, fc_msg->dport, + fc_msg->smac[0], fc_msg->smac[1], fc_msg->smac[2], + fc_msg->smac[3], fc_msg->smac[4], fc_msg->smac[5], + fc_msg->dmac[0], fc_msg->dmac[1], fc_msg->dmac[2], + fc_msg->dmac[3], fc_msg->dmac[4], fc_msg->dmac[5]); +} + +static int parse_cb(struct nl_msg *msg, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = nlmsg_data(nlh); + struct nlattr *attrs[FAST_CLASSIFIER_A_MAX]; + + genlmsg_parse(nlh, 0, attrs, FAST_CLASSIFIER_A_MAX, fast_classifier_genl_policy); + + switch (gnlh->cmd) { + case FAST_CLASSIFIER_C_OFFLOADED: + printf("Got a offloaded message\n"); + dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); + return NL_OK; + case FAST_CLASSIFIER_C_DONE: + printf("Got a done message\n"); + dump_fc_tuple(nla_data(attrs[FAST_CLASSIFIER_A_TUPLE])); + return NL_OK; + } + + return NL_SKIP; +} + +int fast_classifier_init(void) +{ + int err; + + sock = nl_socket_alloc(); + if (!sock) { + printf("Unable to allocation socket.\n"); + return -1; + } + genl_connect(sock); + + sock_event = nl_socket_alloc(); + if (!sock_event) { + nl_close(sock); + nl_socket_free(sock); + printf("Unable to allocation socket.\n"); + return -1; + } + genl_connect(sock_event); + + family = genl_ctrl_resolve(sock, FAST_CLASSIFIER_GENL_NAME); + if (family < 0) { + nl_close(sock_event); + nl_close(sock); + nl_socket_free(sock); + nl_socket_free(sock_event); + printf("Unable to resolve family\n"); + return -1; + } + + grp_id = genl_ctrl_resolve_grp(sock, FAST_CLASSIFIER_GENL_NAME, + FAST_CLASSIFIER_GENL_MCGRP); + if (grp_id < 0) { + printf("Unable to resolve mcast group\n"); + return -1; + } + + err = nl_socket_add_membership(sock_event, grp_id); + if (err < 0) { + printf("Unable to add membership\n"); + return -1; + } + + nl_socket_disable_seq_check(sock_event); + nl_socket_modify_cb(sock_event, NL_CB_VALID, NL_CB_CUSTOM, parse_cb, NULL); + + return 0; +} + +void fast_classifier_close(void) +{ + nl_close(sock_event); + nl_close(sock); + nl_socket_free(sock_event); + nl_socket_free(sock); +} + +void fast_classifier_ipv4_offload(unsigned char proto, unsigned long src_saddr, + unsigned long dst_saddr, unsigned short sport, + unsigned short dport) +{ + struct nl_msg *msg; + int ret; +#ifdef DEBUG + char src_str[INET_ADDRSTRLEN]; + char dst_str[INET_ADDRSTRLEN]; +#endif + struct fast_classifier_tuple fc_msg; + +#ifdef DEBUG + printf("DEBUG: would offload: %d, %s, %s, %d, %d\n", proto, + inet_ntop(AF_INET, &src_saddr, src_str, INET_ADDRSTRLEN), + inet_ntop(AF_INET, &dst_saddr, dst_str, INET_ADDRSTRLEN), + sport, dport); +#endif + + fc_msg.proto = proto; + fc_msg.src_saddr.in.s_addr = src_saddr; + fc_msg.dst_saddr.in.s_addr = dst_saddr; + fc_msg.sport = sport; + fc_msg.dport = dport; + fc_msg.smac[0] = 'a'; + fc_msg.smac[1] = 'b'; + fc_msg.smac[2] = 'c'; + fc_msg.smac[3] = 'd'; + fc_msg.smac[4] = 'e'; + fc_msg.smac[5] = 'f'; + fc_msg.dmac[0] = 'f'; + fc_msg.dmac[1] = 'e'; + fc_msg.dmac[2] = 'd'; + fc_msg.dmac[3] = 'c'; + fc_msg.dmac[4] = 'b'; + fc_msg.dmac[5] = 'a'; + + if (fast_classifier_init() < 0) { + printf("Unable to init generic netlink\n"); + exit(1); + } + + msg = nlmsg_alloc(); + if (!msg) { + nl_socket_free(sock); + printf("Unable to allocate message\n"); + return; + } + + genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, family, + FAST_CLASSIFIER_GENL_HDRSIZE, NLM_F_REQUEST, + FAST_CLASSIFIER_C_OFFLOAD, FAST_CLASSIFIER_GENL_VERSION); + nla_put(msg, 1, sizeof(fc_msg), &fc_msg); + + ret = nl_send_auto_complete(sock, msg); + + nlmsg_free(msg); + if (ret < 0) { + printf("nlmsg_free failed"); + nl_close(sock); + nl_socket_free(sock); + return; + } + + ret = nl_wait_for_ack(sock); + if (ret < 0) { + printf("wait for ack failed"); + nl_close(sock); + nl_socket_free(sock); + return; + } +} + +void fast_classifier_listen_for_messages(void) +{ + printf("waiting for netlink events\n"); + + while (1) { + nl_recvmsgs_default(sock_event); + } +} + +int main(int argc, char *argv[]) +{ + if (fast_classifier_init() < 0) { + printf("Unable to init generic netlink\n"); + exit(1); + } + + fast_classifier_ipv4_offload('a', 0, 0, 0, 0); + + /* this never returns */ + fast_classifier_listen_for_messages(); + + fast_classifier_close(); + + return 0; +} diff --git a/golang-protobuf/Makefile b/golang-protobuf/Makefile index 1dd992c0f..04cc930f4 100755 --- a/golang-protobuf/Makefile +++ b/golang-protobuf/Makefile @@ -25,7 +25,7 @@ GO_PKG:=github.com/golang/protobuf GO_PKG_SOURCE_ONLY:=1 include $(INCLUDE_DIR)/package.mk -include $(TOPDIR)/feeds/packages/lang/golang/golang-package.mk +include ../golang/golang-package.mk define Package/golang-protobuf-dev $(call GoPackage/GoSubMenu) diff --git a/iproute2/Makefile b/iproute2/Makefile index 55c00a0d6..1405f0752 100644 --- a/iproute2/Makefile +++ b/iproute2/Makefile @@ -8,12 +8,13 @@ include $(TOPDIR)/rules.mk PKG_NAME:=iproute2 -PKG_VERSION:=5.15.0 PKG_RELEASE:=$(AUTORELEASE) -PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz -PKG_SOURCE_URL:=@KERNEL/linux/utils/net/iproute2 -PKG_HASH:=38e3e4a5f9a7f5575c015027a10df097c149111eeb739993128e5b2b35b291ff +PKG_SOURCE_PROTO:=git +PKG_SOURCE_URL:=git://git.kernel.org/pub/scm/network/iproute2/iproute2.git +PKG_SOURCE_VERSION:=29da83f89f6e1fe528c59131a01f5d43bcd0a000 +PKG_VERSION:=5.16.0-$(PKG_SOURCE_VERSION) + PKG_BUILD_PARALLEL:=1 PKG_BUILD_DEPENDS:=iptables PKG_LICENSE:=GPL-2.0 @@ -57,16 +58,7 @@ $(call Package/iproute2/Default) DEFAULT_VARIANT:=1 PROVIDES:=tc ALTERNATIVES:=200:/sbin/tc:/usr/libexec/tc-tiny - DEPENDS:=+kmod-sched-core +(PACKAGE_devlink||PACKAGE_rdma):libmnl -endef - -define Package/tc-bpf -$(call Package/iproute2/Default) - TITLE:=Traffic control utility (bpf) - VARIANT:=tcbpf - PROVIDES:=tc - ALTERNATIVES:=300:/sbin/tc:/usr/libexec/tc-bpf - DEPENDS:=+kmod-sched-core +(PACKAGE_devlink||PACKAGE_rdma):libmnl +libbpf + DEPENDS:=+kmod-sched-core +libxtables +tc-mod-iptables +(PACKAGE_devlink||PACKAGE_rdma):libmnl endef define Package/tc-full @@ -74,14 +66,13 @@ $(call Package/iproute2/Default) TITLE:=Traffic control utility (full) VARIANT:=tcfull PROVIDES:=tc - ALTERNATIVES:=400:/sbin/tc:/usr/libexec/tc-full - DEPENDS:=+kmod-sched-core +(PACKAGE_devlink||PACKAGE_rdma):libmnl +libbpf +libxtables +tc-mod-iptables + ALTERNATIVES:=300:/sbin/tc:/usr/libexec/tc-full + DEPENDS:=+kmod-sched-core +libxtables +tc-mod-iptables +libbpf +(PACKAGE_devlink||PACKAGE_rdma):libmnl endef define Package/tc-mod-iptables $(call Package/iproute2/Default) TITLE:=Traffic control module - iptables action - VARIANT:=tcfull DEPENDS:=+libxtables endef @@ -133,29 +124,13 @@ endif ifeq ($(BUILD_VARIANT),tctiny) LIBBPF_FORCE:=off -endif - -ifeq ($(BUILD_VARIANT),tcbpf) - HAVE_ELF:=y - LIBBPF_FORCE:=on SHARED_LIBS:=y endif ifeq ($(BUILD_VARIANT),tcfull) - #enable iptables/xtables requirement only if tciptables variant is selected - TC_CONFIG_XT:=y - TC_CONFIG_XT_OLD:=y - TC_CONFIG_XT_OLD_H:=y - TC_CONFIG_IPSET:=y HAVE_ELF:=y LIBBPF_FORCE:=on SHARED_LIBS:=y -else - #disable iptables requirement by default - TC_CONFIG_XT:=n - TC_CONFIG_XT_OLD:=n - TC_CONFIG_XT_OLD_H:=n - TC_CONFIG_IPSET:=n endif ifdef CONFIG_PACKAGE_devlink @@ -186,10 +161,6 @@ MAKE_FLAGS += \ HAVE_CAP=$(HAVE_CAP) \ IPT_LIB_DIR=/usr/lib/iptables \ XT_LIB_DIR=/usr/lib/iptables \ - TC_CONFIG_XT=$(TC_CONFIG_XT) \ - TC_CONFIG_XT_OLD=$(TC_CONFIG_XT_OLD) \ - TC_CONFIG_XT_OLD_H=$(TC_CONFIG_XT_OLD_H) \ - TC_CONFIG_IPSET=$(TC_CONFIG_IPSET) \ FPIC="$(FPIC)" \ $(if $(findstring c,$(OPENWRT_VERBOSE)),V=1,V='') @@ -220,11 +191,6 @@ define Package/tc-tiny/install $(INSTALL_BIN) $(PKG_BUILD_DIR)/tc/tc $(1)/usr/libexec/tc-tiny endef -define Package/tc-bpf/install - $(INSTALL_DIR) $(1)/usr/libexec - $(INSTALL_BIN) $(PKG_BUILD_DIR)/tc/tc $(1)/usr/libexec/tc-bpf -endef - define Package/tc-full/install $(INSTALL_DIR) $(1)/usr/libexec $(INSTALL_BIN) $(PKG_BUILD_DIR)/tc/tc $(1)/usr/libexec/tc-full @@ -265,13 +231,12 @@ define Package/rdma/install $(INSTALL_BIN) $(PKG_BUILD_DIR)/rdma/rdma $(1)/usr/sbin/ endef -$(eval $(call BuildPackage,ip-tiny)) +#$(eval $(call BuildPackage,ip-tiny)) $(eval $(call BuildPackage,ip-full)) # build tc-mod-iptables before its dependents, to avoid # spurious rebuilds when building multiple variants. $(eval $(call BuildPackage,tc-mod-iptables)) -$(eval $(call BuildPackage,tc-tiny)) -$(eval $(call BuildPackage,tc-bpf)) +#$(eval $(call BuildPackage,tc-tiny)) $(eval $(call BuildPackage,tc-full)) $(eval $(call BuildPackage,genl)) $(eval $(call BuildPackage,ip-bridge)) diff --git a/iproute2/patches/170-ip_tiny.patch b/iproute2/patches/170-ip_tiny.patch deleted file mode 100644 index cd687e760..000000000 --- a/iproute2/patches/170-ip_tiny.patch +++ /dev/null @@ -1,110 +0,0 @@ ---- a/ip/Makefile -+++ b/ip/Makefile -@@ -17,6 +17,13 @@ RTMONOBJ=rtmon.o - - include ../config.mk - -+STATIC_SYM_FILTER:= -+ifeq ($(IP_CONFIG_TINY),y) -+ STATIC_SYM_FILTER:=iplink_can.c iplink_ipoib.c iplink_vxlan.c -+ CFLAGS += -DIPROUTE2_TINY -+endif -+STATIC_SYM_SOURCES:=$(filter-out $(STATIC_SYM_FILTER),$(wildcard *.c)) -+ - ALLOBJ=$(IPOBJ) $(RTMONOBJ) - SCRIPTS=ifcfg rtpr routel routef - TARGETS=ip rtmon -@@ -46,7 +53,7 @@ else - - ip: static-syms.o - static-syms.o: static-syms.h --static-syms.h: $(wildcard *.c) -+static-syms.h: $(STATIC_SYM_SOURCES) - files="$^" ; \ - for s in `grep -B 3 '\ +# +# This is free software, licensed under the GNU General Public License v2. +# See /LICENSE for more information. +# + +include $(TOPDIR)/rules.mk + +PKG_NAME:=lcd4linux +PKG_REV:=f13470faf00e52d1458f2a88d498716240edc272 +PKG_VERSION:=r$(PKG_REV) +PKG_RELEASE:=4 + +PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.bz2 +#PKG_SOURCE_URL:=https://ssl.bulix.org/svn/lcd4linux/trunk/ +PKG_SOURCE_URL:=https://github.com/redblue-pkt/lcd4linux.git +#PKG_SOURCE_SUBDIR:=lcd4linux-$(PKG_VERSION) +PKG_SOURCE_VERSION:=$(PKG_REV) +PKG_SOURCE_PROTO:=git + +LCD4LINUX_DRIVERS:= \ + ASTUSB \ + BeckmannEgle \ + BWCT \ + CrystalFontz \ + Curses \ + Cwlinux \ + D4D \ + DPF \ + EA232graphic \ + EFN \ + FutabaVFD \ + FW8888 \ + GLCD2USB \ + IRLCD \ + $(if $(CONFIG_BROKEN),HD44780) \ + $(if $(CONFIG_BROKEN),HD44780-I2C) \ + LCD2USB \ + $(if $(CONFIG_BROKEN),LCDLinux) \ + LCDTerm \ + LEDMatrix \ + LPH7508 \ + $(if $(CONFIG_BROKEN),LUIse) \ + LW_ABP \ + M50530 \ + MatrixOrbital \ + MatrixOrbitalGX \ + MilfordInstruments \ + Newhaven \ + Noritake \ + NULL \ + Pertelian \ + PHAnderson \ + PICGraphic \ + picoLCD \ + picoLCDGraphic \ + PNG \ + PPM \ + $(if $(CONFIG_TARGET_rb532),RouterBoard) \ + $(if $(CONFIG_BROKEN),SamsungSPF) \ + ShuttleVFD \ + SimpleLCD \ + st2205 \ + T6963 \ + TeakLCM \ + $(if $(CONFIG_TARGET_ar71xx),TEW673GRU) \ + Trefon \ + USBHUB \ + USBLCD \ + VNC \ + WincorNixdorf \ + serdisplib \ +# G15 \ +# ULA200 \ +# X11 \ + +LCD4LINUX_PLUGINS:= \ + apm \ + asterisk \ + button_exec \ + cpuinfo \ + dbus \ + diskstats \ + dvb \ + event \ + exec \ + fifo \ + file \ + gps \ + hddtemp \ + huawei \ + i2c_sensors \ + iconv \ + imon \ + isdn \ + kvv \ + loadavg \ + netdev \ + netinfo \ + meminfo \ + mpris_dbus \ + netdev \ + pop3 \ + ppp \ + proc_stat \ + qnaplog \ + seti \ + statfs \ + uname \ + uptime \ + w1retap \ + $(if $(CONFIG_BROKEN),wireless) \ + xmms \ +# mpd \ +# mysql \ +# python \ + +PKG_FIXUP:=autoreconf +PKG_INSTALL:=1 + +PKG_BUILD_DIR:=$(BUILD_DIR)/$(PKG_NAME)-$(BUILD_VARIANT)/$(PKG_NAME)-$(PKG_VERSION) + +PKG_BUILD_DEPENDS:= \ +# ppp \ +# libftdi \ +# libX11 \ +# python \ + +PKG_CONFIG_DEPENDS:= \ + $(patsubst %,CONFIG_LCD4LINUX_CUSTOM_DRIVER_%,$(LCD4LINUX_DRIVERS)) \ + $(patsubst %,CONFIG_LCD4LINUX_CUSTOM_PLUGIN_%,$(LCD4LINUX_PLUGINS)) \ + +include $(INCLUDE_DIR)/package.mk +include $(INCLUDE_DIR)/nls.mk + +define Package/lcd4linux/Default + SECTION:=utils + CATEGORY:=Utilities + PKG_MAINTAINER:=Jonathan McCrohan + TITLE:=LCD display utility + URL:=http://lcd4linux.bulix.org/ +endef + +define Package/lcd4linux/Default/description + LCD4Linux is a small program that grabs information from the kernel and + some subsystems and displays it on an external liquid crystal display. +endef + + +define Package/lcd4linux-custom +$(call Package/lcd4linux/Default) + DEPENDS:= \ + +LCD4LINUX_CUSTOM_NEEDS_libdbus:libdbus \ + +LCD4LINUX_CUSTOM_NEEDS_libgd:libgd \ + $(if $(ICONV_FULL),+LCD4LINUX_CUSTOM_NEEDS_libiconv:libiconv-full) \ + +LCD4LINUX_CUSTOM_NEEDS_libjpeg:libjpeg \ + +LCD4LINUX_CUSTOM_NEEDS_libncurses:libncurses \ + +LCD4LINUX_CUSTOM_NEEDS_libsqlite3:libsqlite3 \ + +LCD4LINUX_CUSTOM_NEEDS_libusb:libusb-compat \ +# +LCD4LINUX_CUSTOM_NEEDS_libmpdclient:libmpdclient \ +# +LCD4LINUX_CUSTOM_NEEDS_libmysqlclient:libmysqlclient \ +# +LCD4LINUX_CUSTOM_NEEDS_libftdi:libftdi \ +# +LCD4LINUX_CUSTOM_NEEDS_libX11:libX11 \ +# +LCD4LINUX_CUSTOM_NEEDS_python:python + MENU:=1 + PROVIDES:=lcd4linux + VARIANT=custom +endef + +define Package/lcd4linux-custom/config + source "$(SOURCE)/Config.in" +endef + +define Package/lcd4linux-custom/description +$(call Package/lcd4linux/Default/description) + . + This package contains a customized version of LCD4Linux. +endef + + +define Package/lcd4linux-full +$(call Package/lcd4linux/Default) + DEPENDS:= \ + +libdbus \ + +libgd \ + $(if $(ICONV_FULL),+libiconv-full) \ + +libncurses \ + +libsqlite3 \ + +libusb-compat \ + +serdisplib +# +libmpdclient \ +# +libmysqlclient \ +# +libftdi \ +# +libX11 \ +# +python + PROVIDES:=lcd4linux + VARIANT=full +endef + +define Package/lcd4linux-full/description +$(call Package/lcd4linux/Default/description) + . + This package contains a version of LCD4Linux built with all supported + drivers and plugins. +endef + + +CONFIGURE_ARGS+= \ + --disable-rpath \ + +EXTRA_LDFLAGS+= -Wl,-rpath-link,$(STAGING_DIR)/usr/lib + +ifeq ($(BUILD_VARIANT),custom) + + LCD4LINUX_CUSTOM_DRIVERS:= $(strip $(foreach c, $(LCD4LINUX_DRIVERS), \ + $(if $(CONFIG_LCD4LINUX_CUSTOM_DRIVER_$(c)),$(c),) \ + )) + ifeq ($(LCD4LINUX_CUSTOM_DRIVERS),) + LCD4LINUX_CUSTOM_DRIVERS:=Sample + endif + + LCD4LINUX_CUSTOM_PLUGINS:= $(strip $(foreach c, $(LCD4LINUX_PLUGINS), \ + $(if $(CONFIG_LCD4LINUX_CUSTOM_PLUGIN_$(c)),$(c)) \ + )) + ifeq ($(LCD4LINUX_CUSTOM_PLUGINS),) + LCD4LINUX_CUSTOM_PLUGINS:=sample + endif + + CONFIGURE_ARGS+= \ + --with-drivers="$(LCD4LINUX_CUSTOM_DRIVERS)" \ + --with-plugins="$(LCD4LINUX_CUSTOM_PLUGINS)" \ + + ifneq ($(CONFIG_LCD4LINUX_CUSTOM_NEEDS_libiconv),) + CONFIGURE_ARGS+= --with-libiconv-prefix="$(ICONV_PREFIX)" + else + CONFIGURE_ARGS+= --without-libiconv-prefix + endif + + ifneq ($(CONFIG_LCD4LINUX_CUSTOM_NEEDS_libmysqlclient),) + EXTRA_LDFLAGS+= -L$(STAGING_DIR)/usr/lib/mysql + endif + +# ifneq ($(CONFIG_LCD4LINUX_CUSTOM_NEEDS_python),) +# CONFIGURE_ARGS+= --with-python +# else + CONFIGURE_ARGS+= --without-python +# endif + +# ifneq ($(CONFIG_LCD4LINUX_CUSTOM_NEEDS_libX11),) +# CONFIGURE_ARGS+= --with-x +# else + CONFIGURE_ARGS+= --without-x +# endif + +endif + +ifeq ($(BUILD_VARIANT),full) + + LCD4LINUX_FULL_DRIVERS:= $(strip $(foreach c, $(LCD4LINUX_DRIVERS), \ + $(c) \ + )) + + LCD4LINUX_FULL_PLUGINS:= $(strip $(foreach c, $(LCD4LINUX_PLUGINS), \ + $(c) \ + )) + + CONFIGURE_ARGS+= \ + --with-drivers="$(LCD4LINUX_FULL_DRIVERS)" \ + --with-plugins="$(LCD4LINUX_FULL_PLUGINS)" \ + --with-libiconv-prefix="$(ICONV_PREFIX)" \ + --without-python \ + --without-x \ + + EXTRA_LDFLAGS+= -L$(STAGING_DIR)/usr/lib/mysql + +endif + + +define Package/lcd4linux/conffiles +/etc/lcd4linux.conf +endef + +define Package/lcd4linux/install + $(INSTALL_DIR) $(1)/usr/bin + $(CP) $(PKG_INSTALL_DIR)/usr/bin/lcd4linux $(1)/usr/bin/ + $(INSTALL_DIR) $(1)/etc + $(INSTALL_CONF) $(PKG_BUILD_DIR)/lcd4linux.conf.sample $(1)/etc/lcd4linux.conf + $(INSTALL_DIR) $(1)/etc/init.d + $(INSTALL_BIN) ./files/lcd4linux.init $(1)/etc/init.d/lcd4linux + $(SED) "s|^\(Display 'GLCD2USB'\)|#\1|g" \ + -e "s|^\(Layout 'TestLayer'\)|#\1|g" \ + -e "s|^#\(Display 'Image'\)|\1|g" \ + -e "s|^#\(Layout 'Default'\)|\1|g" \ + $(1)/etc/lcd4linux.conf +endef + +Package/lcd4linux-custom/conffiles = $(Package/lcd4linux/conffiles) +Package/lcd4linux-custom/install = $(Package/lcd4linux/install) + +Package/lcd4linux-full/conffiles = $(Package/lcd4linux/conffiles) +Package/lcd4linux-full/install = $(Package/lcd4linux/install) + +$(eval $(call BuildPackage,lcd4linux-custom)) +$(eval $(call BuildPackage,lcd4linux-full)) diff --git a/lcd4linux/files/lcd4linux.init b/lcd4linux/files/lcd4linux.init new file mode 100755 index 000000000..25033f382 --- /dev/null +++ b/lcd4linux/files/lcd4linux.init @@ -0,0 +1,15 @@ +#!/bin/sh /etc/rc.common +# Copyright (C) 2007-2015 OpenWrt.org + +START=98 + +SERVICE_USE_PID=1 + +start() { + service_start /usr/bin/lcd4linux -o /tmp/lcd4linux.png -q +} + +stop() { + service_stop /usr/bin/lcd4linux +} + diff --git a/lcd4linux/patches/120-remove-as-needed-linker-option.patch b/lcd4linux/patches/120-remove-as-needed-linker-option.patch new file mode 100755 index 000000000..b5e56fe7b --- /dev/null +++ b/lcd4linux/patches/120-remove-as-needed-linker-option.patch @@ -0,0 +1,11 @@ +--- a/Makefile.am ++++ b/Makefile.am +@@ -18,7 +18,7 @@ ACLOCAL_AMFLAGS=-I m4 + # use this for lots of warnings + #AM_CFLAGS = -D_GNU_SOURCE -std=c99 -m64 -Wall -W -pedantic -Wno-variadic-macros -fno-strict-aliasing + +-lcd4linux_LDFLAGS ="-Wl,--as-needed" ++lcd4linux_LDFLAGS = + lcd4linux_LDADD = @DRIVERS@ @PLUGINS@ @DRVLIBS@ @PLUGINLIBS@ + lcd4linux_DEPENDENCIES = @DRIVERS@ @PLUGINS@ + diff --git a/lcd4linux/patches/140-no_repnop_T6963.patch b/lcd4linux/patches/140-no_repnop_T6963.patch new file mode 100755 index 000000000..85be2c376 --- /dev/null +++ b/lcd4linux/patches/140-no_repnop_T6963.patch @@ -0,0 +1,22 @@ +--- a/drv_T6963.c ++++ b/drv_T6963.c +@@ -114,7 +114,9 @@ static void drv_T6_status1(void) + /* wait for STA0=1 and STA1=1 */ + n = 0; + do { ++#if 0 + rep_nop(); ++#endif + if (++n > 1000) { + debug("hang in status1"); + bug = 1; +@@ -150,7 +152,9 @@ static void drv_T6_status2(void) + /* wait for STA3=1 */ + n = 0; + do { ++#if 0 + rep_nop(); ++#endif + if (++n > 1000) { + debug("hang in status2"); + bug = 1; diff --git a/lcd4linux/patches/150-addlibmpdclient.patch b/lcd4linux/patches/150-addlibmpdclient.patch new file mode 100755 index 000000000..0e51f6760 --- /dev/null +++ b/lcd4linux/patches/150-addlibmpdclient.patch @@ -0,0 +1,2624 @@ +--- /dev/null ++++ b/libmpdclient.c +@@ -0,0 +1,1957 @@ ++/* libmpdclient ++ (c)2003-2006 by Warren Dukes (warren.dukes@gmail.com) ++ This project's homepage is: http://www.musicpd.org ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ - Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ - Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ - Neither the name of the Music Player Daemon nor the names of its ++ contributors may be used to endorse or promote products derived from ++ this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR ++ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#include "libmpdclient.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef WIN32 ++# include ++# include ++#else ++# include ++# include ++# include ++# include ++#endif ++ ++/* (bits+1)/3 (plus the sign character) */ ++#define INTLEN ((sizeof(int) * CHAR_BIT + 1) / 3 + 1) ++#define LONGLONGLEN ((sizeof(long long) * CHAR_BIT + 1) / 3 + 1) ++ ++#define COMMAND_LIST 1 ++#define COMMAND_LIST_OK 2 ++ ++#ifndef MPD_NO_GAI ++# ifdef AI_ADDRCONFIG ++# define MPD_HAVE_GAI ++# endif ++#endif ++ ++#ifndef MSG_DONTWAIT ++# define MSG_DONTWAIT 0 ++#endif ++ ++#ifdef WIN32 ++# define SELECT_ERRNO_IGNORE (errno == WSAEINTR || errno == WSAEINPROGRESS) ++# define SENDRECV_ERRNO_IGNORE SELECT_ERRNO_IGNORE ++#else ++# define SELECT_ERRNO_IGNORE (errno == EINTR) ++# define SENDRECV_ERRNO_IGNORE (errno == EINTR || errno == EAGAIN) ++# define winsock_dll_error(c) 0 ++# define closesocket(s) close(s) ++# define WSACleanup() do { /* nothing */ } while (0) ++#endif ++ ++#ifdef WIN32 ++static int winsock_dll_error(mpd_Connection * connection) ++{ ++ WSADATA wsaData; ++ if ((WSAStartup(MAKEWORD(2, 2), &wsaData)) != 0 || LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 2) { ++ strcpy(connection->errorStr, "Could not find usable WinSock DLL."); ++ connection->error = MPD_ERROR_SYSTEM; ++ return 1; ++ } ++ return 0; ++} ++ ++static int do_connect_fail(mpd_Connection * connection, const struct sockaddr *serv_addr, int addrlen) ++{ ++ int iMode = 1; /* 0 = blocking, else non-blocking */ ++ ioctlsocket(connection->sock, FIONBIO, (u_long FAR *) & iMode); ++ return (connect(connection->sock, serv_addr, addrlen) == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK); ++} ++#else /* !WIN32 (sane operating systems) */ ++static int do_connect_fail(mpd_Connection * connection, const struct sockaddr *serv_addr, int addrlen) ++{ ++ int flags = fcntl(connection->sock, F_GETFL, 0); ++ fcntl(connection->sock, F_SETFL, flags | O_NONBLOCK); ++ return (connect(connection->sock, serv_addr, addrlen) < 0 && errno != EINPROGRESS); ++} ++#endif /* !WIN32 */ ++ ++#ifdef MPD_HAVE_GAI ++static int mpd_connect(mpd_Connection * connection, const char *host, int port, float timeout) ++{ ++ int error; ++ char service[INTLEN + 1]; ++ struct addrinfo hints; ++ struct addrinfo *res = NULL; ++ struct addrinfo *addrinfo = NULL; ++ ++ /** ++ * Setup hints ++ */ ++ hints.ai_flags = AI_ADDRCONFIG; ++ hints.ai_family = PF_UNSPEC; ++ hints.ai_socktype = SOCK_STREAM; ++ hints.ai_protocol = IPPROTO_TCP; ++ hints.ai_addrlen = 0; ++ hints.ai_addr = NULL; ++ hints.ai_canonname = NULL; ++ hints.ai_next = NULL; ++ ++ snprintf(service, sizeof(service), "%i", port); ++ ++ error = getaddrinfo(host, service, &hints, &addrinfo); ++ ++ if (error) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "host \"%s\" not found: %s", host, gai_strerror(error)); ++ connection->error = MPD_ERROR_UNKHOST; ++ return -1; ++ } ++ ++ for (res = addrinfo; res; res = res->ai_next) { ++ /* create socket */ ++ connection->sock = socket(res->ai_family, SOCK_STREAM, res->ai_protocol); ++ if (connection->sock < 0) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "problems creating socket: %s", strerror(errno)); ++ connection->error = MPD_ERROR_SYSTEM; ++ freeaddrinfo(addrinfo); ++ return -1; ++ } ++ ++ mpd_setConnectionTimeout(connection, timeout); ++ ++ /* connect stuff */ ++ if (do_connect_fail(connection, res->ai_addr, res->ai_addrlen)) { ++ /* try the next address family */ ++ closesocket(connection->sock); ++ connection->sock = -1; ++ continue; ++ } ++ } ++ ++ freeaddrinfo(addrinfo); ++ ++ if (connection->sock < 0) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, ++ "problems connecting to \"%s\" on port %i: %s", host, port, strerror(errno)); ++ connection->error = MPD_ERROR_CONNPORT; ++ ++ return -1; ++ } ++ ++ return 0; ++} ++#else /* !MPD_HAVE_GAI */ ++static int mpd_connect(mpd_Connection * connection, const char *host, int port, float timeout) ++{ ++ struct hostent *he; ++ struct sockaddr *dest; ++ int destlen; ++ struct sockaddr_in sin; ++ ++ if (!(he = gethostbyname(host))) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "host \"%s\" not found", host); ++ connection->error = MPD_ERROR_UNKHOST; ++ return -1; ++ } ++ ++ memset(&sin, 0, sizeof(struct sockaddr_in)); ++ /*dest.sin_family = he->h_addrtype; */ ++ sin.sin_family = AF_INET; ++ sin.sin_port = htons(port); ++ ++ switch (he->h_addrtype) { ++ case AF_INET: ++ memcpy((char *) &sin.sin_addr.s_addr, (char *) he->h_addr, he->h_length); ++ dest = (struct sockaddr *) &sin; ++ destlen = sizeof(struct sockaddr_in); ++ break; ++ default: ++ strcpy(connection->errorStr, "address type is not IPv4"); ++ connection->error = MPD_ERROR_SYSTEM; ++ return -1; ++ break; ++ } ++ ++ if ((connection->sock = socket(dest->sa_family, SOCK_STREAM, 0)) < 0) { ++ strcpy(connection->errorStr, "problems creating socket"); ++ connection->error = MPD_ERROR_SYSTEM; ++ return -1; ++ } ++ ++ mpd_setConnectionTimeout(connection, timeout); ++ ++ /* connect stuff */ ++ if (do_connect_fail(connection, dest, destlen)) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, ++ "problems connecting to \"%s\" on port" " %i", host, port); ++ connection->error = MPD_ERROR_CONNPORT; ++ return -1; ++ } ++ ++ return 0; ++} ++#endif /* !MPD_HAVE_GAI */ ++ ++char *mpdTagItemKeys[MPD_TAG_NUM_OF_ITEM_TYPES] = { ++ "Artist", ++ "Album", ++ "Title", ++ "Track", ++ "Name", ++ "Genre", ++ "Date", ++ "Composer", ++ "Performer", ++ "Comment", ++ "Disc", ++ "Filename", ++ "Any" ++}; ++ ++static char *mpd_sanitizeArg(const char *arg) ++{ ++ size_t i; ++ char *ret; ++ register const char *c; ++ register char *rc; ++ ++ /* instead of counting in that loop above, just ++ * use a bit more memory and half running time ++ */ ++ ret = malloc(strlen(arg) * 2 + 1); ++ ++ c = arg; ++ rc = ret; ++ for (i = strlen(arg) + 1; i != 0; --i) { ++ if (*c == '"' || *c == '\\') ++ *rc++ = '\\'; ++ *(rc++) = *(c++); ++ } ++ ++ return ret; ++} ++ ++static mpd_ReturnElement *mpd_newReturnElement(const char *name, const char *value) ++{ ++ mpd_ReturnElement *ret = malloc(sizeof(mpd_ReturnElement)); ++ ++ ret->name = strdup(name); ++ ret->value = strdup(value); ++ ++ return ret; ++} ++ ++static void mpd_freeReturnElement(mpd_ReturnElement * re) ++{ ++ free(re->name); ++ free(re->value); ++ free(re); ++} ++ ++void mpd_setConnectionTimeout(mpd_Connection * connection, float timeout) ++{ ++ connection->timeout.tv_sec = (int) timeout; ++ connection->timeout.tv_usec = (int) (timeout * 1e6 - connection->timeout.tv_sec * 1000000 + 0.5); ++} ++ ++static int mpd_parseWelcome(mpd_Connection * connection, const char *host, int port, char *rt, char *output) ++{ ++ char *tmp; ++ char *test; ++ int i; ++ ++ if (strncmp(output, MPD_WELCOME_MESSAGE, strlen(MPD_WELCOME_MESSAGE))) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, ++ "mpd not running on port %i on host \"%s\"", port, host); ++ connection->error = MPD_ERROR_NOTMPD; ++ return 1; ++ } ++ ++ tmp = &output[strlen(MPD_WELCOME_MESSAGE)]; ++ ++ for (i = 0; i < 3; i++) { ++ if (tmp) ++ connection->version[i] = strtol(tmp, &test, 10); ++ ++ if (!tmp || (test[0] != '.' && test[0] != '\0')) { ++ snprintf(connection->errorStr, ++ MPD_ERRORSTR_MAX_LENGTH, ++ "error parsing version number at " "\"%s\"", &output[strlen(MPD_WELCOME_MESSAGE)]); ++ connection->error = MPD_ERROR_NOTMPD; ++ return 1; ++ } ++ tmp = ++test; ++ } ++ ++ return 0; ++} ++ ++mpd_Connection *mpd_newConnection(const char *host, int port, float timeout) ++{ ++ int err; ++ char *rt; ++ char *output = NULL; ++ mpd_Connection *connection = malloc(sizeof(mpd_Connection)); ++ struct timeval tv; ++ fd_set fds; ++ strcpy(connection->buffer, ""); ++ connection->buflen = 0; ++ connection->bufstart = 0; ++ strcpy(connection->errorStr, ""); ++ connection->error = 0; ++ connection->doneProcessing = 0; ++ connection->commandList = 0; ++ connection->listOks = 0; ++ connection->doneListOk = 0; ++ connection->returnElement = NULL; ++ connection->request = NULL; ++ ++ if (winsock_dll_error(connection)) ++ return connection; ++ ++ if (mpd_connect(connection, host, port, timeout) < 0) ++ return connection; ++ ++ while (!(rt = strstr(connection->buffer, "\n"))) { ++ tv.tv_sec = connection->timeout.tv_sec; ++ tv.tv_usec = connection->timeout.tv_usec; ++ FD_ZERO(&fds); ++ FD_SET(connection->sock, &fds); ++ if ((err = select(connection->sock + 1, &fds, NULL, NULL, &tv)) == 1) { ++ int readed; ++ readed = recv(connection->sock, ++ &(connection->buffer[connection->buflen]), MPD_BUFFER_MAX_LENGTH - connection->buflen, 0); ++ if (readed <= 0) { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, ++ "problems getting a response from" " \"%s\" on port %i : %s", host, port, strerror(errno)); ++ connection->error = MPD_ERROR_NORESPONSE; ++ return connection; ++ } ++ connection->buflen += readed; ++ connection->buffer[connection->buflen] = '\0'; ++ } else if (err < 0) { ++ if (SELECT_ERRNO_IGNORE) ++ continue; ++ snprintf(connection->errorStr, ++ MPD_ERRORSTR_MAX_LENGTH, "problems connecting to \"%s\" on port" " %i", host, port); ++ connection->error = MPD_ERROR_CONNPORT; ++ return connection; ++ } else { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, ++ "timeout in attempting to get a response from" " \"%s\" on port %i", host, port); ++ connection->error = MPD_ERROR_NORESPONSE; ++ return connection; ++ } ++ } ++ ++ *rt = '\0'; ++ output = strdup(connection->buffer); ++ strcpy(connection->buffer, rt + 1); ++ connection->buflen = strlen(connection->buffer); ++ ++ if (mpd_parseWelcome(connection, host, port, rt, output) == 0) ++ connection->doneProcessing = 1; ++ ++ free(output); ++ ++ return connection; ++} ++ ++void mpd_clearError(mpd_Connection * connection) ++{ ++ connection->error = 0; ++ connection->errorStr[0] = '\0'; ++} ++ ++void mpd_closeConnection(mpd_Connection * connection) ++{ ++ closesocket(connection->sock); ++ if (connection->returnElement) ++ free(connection->returnElement); ++ if (connection->request) ++ free(connection->request); ++ free(connection); ++ WSACleanup(); ++} ++ ++static void mpd_executeCommand(mpd_Connection * connection, char *command) ++{ ++ int ret; ++ struct timeval tv; ++ fd_set fds; ++ char *commandPtr = command; ++ int commandLen = strlen(command); ++ ++ if (!connection->doneProcessing && !connection->commandList) { ++ strcpy(connection->errorStr, "not done processing current command"); ++ connection->error = 1; ++ return; ++ } ++ ++ mpd_clearError(connection); ++ ++ FD_ZERO(&fds); ++ FD_SET(connection->sock, &fds); ++ tv.tv_sec = connection->timeout.tv_sec; ++ tv.tv_usec = connection->timeout.tv_usec; ++ ++ while ((ret = select(connection->sock + 1, NULL, &fds, NULL, &tv) == 1) || (ret == -1 && SELECT_ERRNO_IGNORE)) { ++ ret = send(connection->sock, commandPtr, commandLen, MSG_DONTWAIT); ++ if (ret <= 0) { ++ if (SENDRECV_ERRNO_IGNORE) ++ continue; ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "problems giving command \"%s\"", command); ++ connection->error = MPD_ERROR_SENDING; ++ return; ++ } else { ++ commandPtr += ret; ++ commandLen -= ret; ++ } ++ ++ if (commandLen <= 0) ++ break; ++ } ++ ++ if (commandLen > 0) { ++ perror(""); ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "timeout sending command \"%s\"", command); ++ connection->error = MPD_ERROR_TIMEOUT; ++ return; ++ } ++ ++ if (!connection->commandList) ++ connection->doneProcessing = 0; ++ else if (connection->commandList == COMMAND_LIST_OK) { ++ connection->listOks++; ++ } ++} ++ ++static void mpd_getNextReturnElement(mpd_Connection * connection) ++{ ++ char *output = NULL; ++ char *rt = NULL; ++ char *name = NULL; ++ char *value = NULL; ++ fd_set fds; ++ struct timeval tv; ++ char *tok = NULL; ++ int readed; ++ char *bufferCheck = NULL; ++ int err; ++ int pos; ++ ++ if (connection->returnElement) ++ mpd_freeReturnElement(connection->returnElement); ++ connection->returnElement = NULL; ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ strcpy(connection->errorStr, "already done processing current command"); ++ connection->error = 1; ++ return; ++ } ++ ++ bufferCheck = connection->buffer + connection->bufstart; ++ while (connection->bufstart >= connection->buflen || !(rt = strchr(bufferCheck, '\n'))) { ++ if (connection->buflen >= MPD_BUFFER_MAX_LENGTH) { ++ memmove(connection->buffer, ++ connection->buffer + connection->bufstart, connection->buflen - connection->bufstart + 1); ++ connection->buflen -= connection->bufstart; ++ connection->bufstart = 0; ++ } ++ if (connection->buflen >= MPD_BUFFER_MAX_LENGTH) { ++ strcpy(connection->errorStr, "buffer overrun"); ++ connection->error = MPD_ERROR_BUFFEROVERRUN; ++ connection->doneProcessing = 1; ++ connection->doneListOk = 0; ++ return; ++ } ++ bufferCheck = connection->buffer + connection->buflen; ++ tv.tv_sec = connection->timeout.tv_sec; ++ tv.tv_usec = connection->timeout.tv_usec; ++ FD_ZERO(&fds); ++ FD_SET(connection->sock, &fds); ++ if ((err = select(connection->sock + 1, &fds, NULL, NULL, &tv) == 1)) { ++ readed = recv(connection->sock, ++ connection->buffer + connection->buflen, ++ MPD_BUFFER_MAX_LENGTH - connection->buflen, MSG_DONTWAIT); ++ if (readed < 0 && SENDRECV_ERRNO_IGNORE) { ++ continue; ++ } ++ if (readed <= 0) { ++ strcpy(connection->errorStr, "connection" " closed"); ++ connection->error = MPD_ERROR_CONNCLOSED; ++ connection->doneProcessing = 1; ++ connection->doneListOk = 0; ++ return; ++ } ++ connection->buflen += readed; ++ connection->buffer[connection->buflen] = '\0'; ++ } else if (err < 0 && SELECT_ERRNO_IGNORE) ++ continue; ++ else { ++ strcpy(connection->errorStr, "connection timeout"); ++ connection->error = MPD_ERROR_TIMEOUT; ++ connection->doneProcessing = 1; ++ connection->doneListOk = 0; ++ return; ++ } ++ } ++ ++ *rt = '\0'; ++ output = connection->buffer + connection->bufstart; ++ connection->bufstart = rt - connection->buffer + 1; ++ ++ if (strcmp(output, "OK") == 0) { ++ if (connection->listOks > 0) { ++ strcpy(connection->errorStr, "expected more list_OK's"); ++ connection->error = 1; ++ } ++ connection->listOks = 0; ++ connection->doneProcessing = 1; ++ connection->doneListOk = 0; ++ return; ++ } ++ ++ if (strcmp(output, "list_OK") == 0) { ++ if (!connection->listOks) { ++ strcpy(connection->errorStr, "got an unexpected list_OK"); ++ connection->error = 1; ++ } else { ++ connection->doneListOk = 1; ++ connection->listOks--; ++ } ++ return; ++ } ++ ++ if (strncmp(output, "ACK", strlen("ACK")) == 0) { ++ char *test; ++ char *needle; ++ int val; ++ ++ strcpy(connection->errorStr, output); ++ connection->error = MPD_ERROR_ACK; ++ connection->errorCode = MPD_ACK_ERROR_UNK; ++ connection->errorAt = MPD_ERROR_AT_UNK; ++ connection->doneProcessing = 1; ++ connection->doneListOk = 0; ++ ++ needle = strchr(output, '['); ++ if (!needle) ++ return; ++ val = strtol(needle + 1, &test, 10); ++ if (*test != '@') ++ return; ++ connection->errorCode = val; ++ val = strtol(test + 1, &test, 10); ++ if (*test != ']') ++ return; ++ connection->errorAt = val; ++ return; ++ } ++ ++ tok = strchr(output, ':'); ++ if (!tok) ++ return; ++ pos = tok - output; ++ value = ++tok; ++ name = output; ++ name[pos] = '\0'; ++ ++ if (value[0] == ' ') { ++ connection->returnElement = mpd_newReturnElement(name, &(value[1])); ++ } else { ++ snprintf(connection->errorStr, MPD_ERRORSTR_MAX_LENGTH, "error parsing: %s:%s", name, value); ++ connection->error = 1; ++ } ++} ++ ++void mpd_finishCommand(mpd_Connection * connection) ++{ ++ while (!connection->doneProcessing) { ++ if (connection->doneListOk) ++ connection->doneListOk = 0; ++ mpd_getNextReturnElement(connection); ++ } ++} ++ ++static void mpd_finishListOkCommand(mpd_Connection * connection) ++{ ++ while (!connection->doneProcessing && connection->listOks && !connection->doneListOk) { ++ mpd_getNextReturnElement(connection); ++ } ++} ++ ++int mpd_nextListOkCommand(mpd_Connection * connection) ++{ ++ mpd_finishListOkCommand(connection); ++ if (!connection->doneProcessing) ++ connection->doneListOk = 0; ++ if (connection->listOks == 0 || connection->doneProcessing) ++ return -1; ++ return 0; ++} ++ ++void mpd_sendStatusCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "status\n"); ++} ++ ++mpd_Status *mpd_getStatus(mpd_Connection * connection) ++{ ++ mpd_Status *status; ++ ++ /*mpd_executeCommand(connection,"status\n"); ++ ++ if(connection->error) return NULL; */ ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ if (!connection->returnElement) ++ mpd_getNextReturnElement(connection); ++ ++ status = malloc(sizeof(mpd_Status)); ++ status->volume = -1; ++ status->repeat = 0; ++ status->random = 0; ++ status->playlist = -1; ++ status->playlistLength = -1; ++ status->state = -1; ++ status->song = 0; ++ status->songid = 0; ++ status->elapsedTime = 0; ++ status->totalTime = 0; ++ status->bitRate = 0; ++ status->sampleRate = 0; ++ status->bits = 0; ++ status->channels = 0; ++ status->crossfade = -1; ++ status->error = NULL; ++ status->updatingDb = 0; ++ ++ if (connection->error) { ++ free(status); ++ return NULL; ++ } ++ while (connection->returnElement) { ++ mpd_ReturnElement *re = connection->returnElement; ++ if (strcmp(re->name, "volume") == 0) { ++ status->volume = atoi(re->value); ++ } else if (strcmp(re->name, "repeat") == 0) { ++ status->repeat = atoi(re->value); ++ } else if (strcmp(re->name, "random") == 0) { ++ status->random = atoi(re->value); ++ } else if (strcmp(re->name, "playlist") == 0) { ++ status->playlist = strtol(re->value, NULL, 10); ++ } else if (strcmp(re->name, "playlistlength") == 0) { ++ status->playlistLength = atoi(re->value); ++ } else if (strcmp(re->name, "bitrate") == 0) { ++ status->bitRate = atoi(re->value); ++ } else if (strcmp(re->name, "state") == 0) { ++ if (strcmp(re->value, "play") == 0) { ++ status->state = MPD_STATUS_STATE_PLAY; ++ } else if (strcmp(re->value, "stop") == 0) { ++ status->state = MPD_STATUS_STATE_STOP; ++ } else if (strcmp(re->value, "pause") == 0) { ++ status->state = MPD_STATUS_STATE_PAUSE; ++ } else { ++ status->state = MPD_STATUS_STATE_UNKNOWN; ++ } ++ } else if (strcmp(re->name, "song") == 0) { ++ status->song = atoi(re->value); ++ } else if (strcmp(re->name, "songid") == 0) { ++ status->songid = atoi(re->value); ++ } else if (strcmp(re->name, "time") == 0) { ++ char *tok = strchr(re->value, ':'); ++ /* the second strchr below is a safety check */ ++ if (tok && (strchr(tok, 0) > (tok + 1))) { ++ /* atoi stops at the first non-[0-9] char: */ ++ status->elapsedTime = atoi(re->value); ++ status->totalTime = atoi(tok + 1); ++ } ++ } else if (strcmp(re->name, "error") == 0) { ++ status->error = strdup(re->value); ++ } else if (strcmp(re->name, "xfade") == 0) { ++ status->crossfade = atoi(re->value); ++ } else if (strcmp(re->name, "updating_db") == 0) { ++ status->updatingDb = atoi(re->value); ++ } else if (strcmp(re->name, "audio") == 0) { ++ char *tok = strchr(re->value, ':'); ++ if (tok && (strchr(tok, 0) > (tok + 1))) { ++ status->sampleRate = atoi(re->value); ++ status->bits = atoi(++tok); ++ tok = strchr(tok, ':'); ++ if (tok && (strchr(tok, 0) > (tok + 1))) ++ status->channels = atoi(tok + 1); ++ } ++ } ++ ++ mpd_getNextReturnElement(connection); ++ if (connection->error) { ++ free(status); ++ return NULL; ++ } ++ } ++ ++ if (connection->error) { ++ free(status); ++ return NULL; ++ } else if (status->state < 0) { ++ strcpy(connection->errorStr, "state not found"); ++ connection->error = 1; ++ free(status); ++ return NULL; ++ } ++ ++ return status; ++} ++ ++void mpd_freeStatus(mpd_Status * status) ++{ ++ if (status->error) ++ free(status->error); ++ free(status); ++} ++ ++void mpd_sendStatsCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "stats\n"); ++} ++ ++mpd_Stats *mpd_getStats(mpd_Connection * connection) ++{ ++ mpd_Stats *stats; ++ ++ /*mpd_executeCommand(connection,"stats\n"); ++ ++ if(connection->error) return NULL; */ ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ if (!connection->returnElement) ++ mpd_getNextReturnElement(connection); ++ ++ stats = malloc(sizeof(mpd_Stats)); ++ stats->numberOfArtists = 0; ++ stats->numberOfAlbums = 0; ++ stats->numberOfSongs = 0; ++ stats->uptime = 0; ++ stats->dbUpdateTime = 0; ++ stats->playTime = 0; ++ stats->dbPlayTime = 0; ++ ++ if (connection->error) { ++ free(stats); ++ return NULL; ++ } ++ while (connection->returnElement) { ++ mpd_ReturnElement *re = connection->returnElement; ++ if (strcmp(re->name, "artists") == 0) { ++ stats->numberOfArtists = atoi(re->value); ++ } else if (strcmp(re->name, "albums") == 0) { ++ stats->numberOfAlbums = atoi(re->value); ++ } else if (strcmp(re->name, "songs") == 0) { ++ stats->numberOfSongs = atoi(re->value); ++ } else if (strcmp(re->name, "uptime") == 0) { ++ stats->uptime = strtol(re->value, NULL, 10); ++ } else if (strcmp(re->name, "db_update") == 0) { ++ stats->dbUpdateTime = strtol(re->value, NULL, 10); ++ } else if (strcmp(re->name, "playtime") == 0) { ++ stats->playTime = strtol(re->value, NULL, 10); ++ } else if (strcmp(re->name, "db_playtime") == 0) { ++ stats->dbPlayTime = strtol(re->value, NULL, 10); ++ } ++ ++ mpd_getNextReturnElement(connection); ++ if (connection->error) { ++ free(stats); ++ return NULL; ++ } ++ } ++ ++ if (connection->error) { ++ free(stats); ++ return NULL; ++ } ++ ++ return stats; ++} ++ ++void mpd_freeStats(mpd_Stats * stats) ++{ ++ free(stats); ++} ++ ++mpd_SearchStats *mpd_getSearchStats(mpd_Connection * connection) ++{ ++ mpd_SearchStats *stats; ++ mpd_ReturnElement *re; ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ if (!connection->returnElement) ++ mpd_getNextReturnElement(connection); ++ ++ if (connection->error) ++ return NULL; ++ ++ stats = malloc(sizeof(mpd_SearchStats)); ++ stats->numberOfSongs = 0; ++ stats->playTime = 0; ++ ++ while (connection->returnElement) { ++ re = connection->returnElement; ++ ++ if (strcmp(re->name, "songs") == 0) { ++ stats->numberOfSongs = atoi(re->value); ++ } else if (strcmp(re->name, "playtime") == 0) { ++ stats->playTime = strtol(re->value, NULL, 10); ++ } ++ ++ mpd_getNextReturnElement(connection); ++ if (connection->error) { ++ free(stats); ++ return NULL; ++ } ++ } ++ ++ if (connection->error) { ++ free(stats); ++ return NULL; ++ } ++ ++ return stats; ++} ++ ++void mpd_freeSearchStats(mpd_SearchStats * stats) ++{ ++ free(stats); ++} ++ ++static void mpd_initSong(mpd_Song * song) ++{ ++ song->file = NULL; ++ song->artist = NULL; ++ song->album = NULL; ++ song->track = NULL; ++ song->title = NULL; ++ song->name = NULL; ++ song->date = NULL; ++ /* added by Qball */ ++ song->genre = NULL; ++ song->composer = NULL; ++ song->performer = NULL; ++ song->disc = NULL; ++ song->comment = NULL; ++ ++ song->time = MPD_SONG_NO_TIME; ++ song->pos = MPD_SONG_NO_NUM; ++ song->id = MPD_SONG_NO_ID; ++} ++ ++static void mpd_finishSong(mpd_Song * song) ++{ ++ if (song->file) ++ free(song->file); ++ if (song->artist) ++ free(song->artist); ++ if (song->album) ++ free(song->album); ++ if (song->title) ++ free(song->title); ++ if (song->track) ++ free(song->track); ++ if (song->name) ++ free(song->name); ++ if (song->date) ++ free(song->date); ++ if (song->genre) ++ free(song->genre); ++ if (song->composer) ++ free(song->composer); ++ if (song->disc) ++ free(song->disc); ++ if (song->comment) ++ free(song->comment); ++} ++ ++mpd_Song *mpd_newSong(void) ++{ ++ mpd_Song *ret = malloc(sizeof(mpd_Song)); ++ ++ mpd_initSong(ret); ++ ++ return ret; ++} ++ ++void mpd_freeSong(mpd_Song * song) ++{ ++ mpd_finishSong(song); ++ free(song); ++} ++ ++mpd_Song *mpd_songDup(mpd_Song * song) ++{ ++ mpd_Song *ret = mpd_newSong(); ++ ++ if (song->file) ++ ret->file = strdup(song->file); ++ if (song->artist) ++ ret->artist = strdup(song->artist); ++ if (song->album) ++ ret->album = strdup(song->album); ++ if (song->title) ++ ret->title = strdup(song->title); ++ if (song->track) ++ ret->track = strdup(song->track); ++ if (song->name) ++ ret->name = strdup(song->name); ++ if (song->date) ++ ret->date = strdup(song->date); ++ if (song->genre) ++ ret->genre = strdup(song->genre); ++ if (song->composer) ++ ret->composer = strdup(song->composer); ++ if (song->disc) ++ ret->disc = strdup(song->disc); ++ if (song->comment) ++ ret->comment = strdup(song->comment); ++ ret->time = song->time; ++ ret->pos = song->pos; ++ ret->id = song->id; ++ ++ return ret; ++} ++ ++static void mpd_initDirectory(mpd_Directory * directory) ++{ ++ directory->path = NULL; ++} ++ ++static void mpd_finishDirectory(mpd_Directory * directory) ++{ ++ if (directory->path) ++ free(directory->path); ++} ++ ++mpd_Directory *mpd_newDirectory(void) ++{ ++ mpd_Directory *directory = malloc(sizeof(mpd_Directory));; ++ ++ mpd_initDirectory(directory); ++ ++ return directory; ++} ++ ++void mpd_freeDirectory(mpd_Directory * directory) ++{ ++ mpd_finishDirectory(directory); ++ ++ free(directory); ++} ++ ++mpd_Directory *mpd_directoryDup(mpd_Directory * directory) ++{ ++ mpd_Directory *ret = mpd_newDirectory(); ++ ++ if (directory->path) ++ ret->path = strdup(directory->path); ++ ++ return ret; ++} ++ ++static void mpd_initPlaylistFile(mpd_PlaylistFile * playlist) ++{ ++ playlist->path = NULL; ++} ++ ++static void mpd_finishPlaylistFile(mpd_PlaylistFile * playlist) ++{ ++ if (playlist->path) ++ free(playlist->path); ++} ++ ++mpd_PlaylistFile *mpd_newPlaylistFile(void) ++{ ++ mpd_PlaylistFile *playlist = malloc(sizeof(mpd_PlaylistFile)); ++ ++ mpd_initPlaylistFile(playlist); ++ ++ return playlist; ++} ++ ++void mpd_freePlaylistFile(mpd_PlaylistFile * playlist) ++{ ++ mpd_finishPlaylistFile(playlist); ++ free(playlist); ++} ++ ++mpd_PlaylistFile *mpd_playlistFileDup(mpd_PlaylistFile * playlist) ++{ ++ mpd_PlaylistFile *ret = mpd_newPlaylistFile(); ++ ++ if (playlist->path) ++ ret->path = strdup(playlist->path); ++ ++ return ret; ++} ++ ++static void mpd_initInfoEntity(mpd_InfoEntity * entity) ++{ ++ entity->info.directory = NULL; ++} ++ ++static void mpd_finishInfoEntity(mpd_InfoEntity * entity) ++{ ++ if (entity->info.directory) { ++ if (entity->type == MPD_INFO_ENTITY_TYPE_DIRECTORY) { ++ mpd_freeDirectory(entity->info.directory); ++ } else if (entity->type == MPD_INFO_ENTITY_TYPE_SONG) { ++ mpd_freeSong(entity->info.song); ++ } else if (entity->type == MPD_INFO_ENTITY_TYPE_PLAYLISTFILE) { ++ mpd_freePlaylistFile(entity->info.playlistFile); ++ } ++ } ++} ++ ++mpd_InfoEntity *mpd_newInfoEntity(void) ++{ ++ mpd_InfoEntity *entity = malloc(sizeof(mpd_InfoEntity)); ++ ++ mpd_initInfoEntity(entity); ++ ++ return entity; ++} ++ ++void mpd_freeInfoEntity(mpd_InfoEntity * entity) ++{ ++ mpd_finishInfoEntity(entity); ++ free(entity); ++} ++ ++static void mpd_sendInfoCommand(mpd_Connection * connection, char *command) ++{ ++ mpd_executeCommand(connection, command); ++} ++ ++mpd_InfoEntity *mpd_getNextInfoEntity(mpd_Connection * connection) ++{ ++ mpd_InfoEntity *entity = NULL; ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ if (!connection->returnElement) ++ mpd_getNextReturnElement(connection); ++ ++ if (connection->returnElement) { ++ if (strcmp(connection->returnElement->name, "file") == 0) { ++ entity = mpd_newInfoEntity(); ++ entity->type = MPD_INFO_ENTITY_TYPE_SONG; ++ entity->info.song = mpd_newSong(); ++ entity->info.song->file = strdup(connection->returnElement->value); ++ } else if (strcmp(connection->returnElement->name, "directory") == 0) { ++ entity = mpd_newInfoEntity(); ++ entity->type = MPD_INFO_ENTITY_TYPE_DIRECTORY; ++ entity->info.directory = mpd_newDirectory(); ++ entity->info.directory->path = strdup(connection->returnElement->value); ++ } else if (strcmp(connection->returnElement->name, "playlist") == 0) { ++ entity = mpd_newInfoEntity(); ++ entity->type = MPD_INFO_ENTITY_TYPE_PLAYLISTFILE; ++ entity->info.playlistFile = mpd_newPlaylistFile(); ++ entity->info.playlistFile->path = strdup(connection->returnElement->value); ++ } else if (strcmp(connection->returnElement->name, "cpos") == 0) { ++ entity = mpd_newInfoEntity(); ++ entity->type = MPD_INFO_ENTITY_TYPE_SONG; ++ entity->info.song = mpd_newSong(); ++ entity->info.song->pos = atoi(connection->returnElement->value); ++ } else { ++ connection->error = 1; ++ strcpy(connection->errorStr, "problem parsing song info"); ++ return NULL; ++ } ++ } else ++ return NULL; ++ ++ mpd_getNextReturnElement(connection); ++ while (connection->returnElement) { ++ mpd_ReturnElement *re = connection->returnElement; ++ ++ if (strcmp(re->name, "file") == 0) ++ return entity; ++ else if (strcmp(re->name, "directory") == 0) ++ return entity; ++ else if (strcmp(re->name, "playlist") == 0) ++ return entity; ++ else if (strcmp(re->name, "cpos") == 0) ++ return entity; ++ ++ if (entity->type == MPD_INFO_ENTITY_TYPE_SONG && strlen(re->value)) { ++ if (!entity->info.song->artist && strcmp(re->name, "Artist") == 0) { ++ entity->info.song->artist = strdup(re->value); ++ } else if (!entity->info.song->album && strcmp(re->name, "Album") == 0) { ++ entity->info.song->album = strdup(re->value); ++ } else if (!entity->info.song->title && strcmp(re->name, "Title") == 0) { ++ entity->info.song->title = strdup(re->value); ++ } else if (!entity->info.song->track && strcmp(re->name, "Track") == 0) { ++ entity->info.song->track = strdup(re->value); ++ } else if (!entity->info.song->name && strcmp(re->name, "Name") == 0) { ++ entity->info.song->name = strdup(re->value); ++ } else if (entity->info.song->time == MPD_SONG_NO_TIME && strcmp(re->name, "Time") == 0) { ++ entity->info.song->time = atoi(re->value); ++ } else if (entity->info.song->pos == MPD_SONG_NO_NUM && strcmp(re->name, "Pos") == 0) { ++ entity->info.song->pos = atoi(re->value); ++ } else if (entity->info.song->id == MPD_SONG_NO_ID && strcmp(re->name, "Id") == 0) { ++ entity->info.song->id = atoi(re->value); ++ } else if (!entity->info.song->date && strcmp(re->name, "Date") == 0) { ++ entity->info.song->date = strdup(re->value); ++ } else if (!entity->info.song->genre && strcmp(re->name, "Genre") == 0) { ++ entity->info.song->genre = strdup(re->value); ++ } else if (!entity->info.song->composer && strcmp(re->name, "Composer") == 0) { ++ entity->info.song->composer = strdup(re->value); ++ } else if (!entity->info.song->performer && strcmp(re->name, "Performer") == 0) { ++ entity->info.song->performer = strdup(re->value); ++ } else if (!entity->info.song->disc && strcmp(re->name, "Disc") == 0) { ++ entity->info.song->disc = strdup(re->value); ++ } else if (!entity->info.song->comment && strcmp(re->name, "Comment") == 0) { ++ entity->info.song->comment = strdup(re->value); ++ } ++ } else if (entity->type == MPD_INFO_ENTITY_TYPE_DIRECTORY) { ++ } else if (entity->type == MPD_INFO_ENTITY_TYPE_PLAYLISTFILE) { ++ } ++ ++ mpd_getNextReturnElement(connection); ++ } ++ ++ return entity; ++} ++ ++static char *mpd_getNextReturnElementNamed(mpd_Connection * connection, const char *name) ++{ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ mpd_getNextReturnElement(connection); ++ while (connection->returnElement) { ++ mpd_ReturnElement *re = connection->returnElement; ++ ++ if (strcmp(re->name, name) == 0) ++ return strdup(re->value); ++ mpd_getNextReturnElement(connection); ++ } ++ ++ return NULL; ++} ++ ++char *mpd_getNextTag(mpd_Connection * connection, int type) ++{ ++ if (type < 0 || type >= MPD_TAG_NUM_OF_ITEM_TYPES || type == MPD_TAG_ITEM_ANY) ++ return NULL; ++ if (type == MPD_TAG_ITEM_FILENAME) ++ return mpd_getNextReturnElementNamed(connection, "file"); ++ return mpd_getNextReturnElementNamed(connection, mpdTagItemKeys[type]); ++} ++ ++char *mpd_getNextArtist(mpd_Connection * connection) ++{ ++ return mpd_getNextReturnElementNamed(connection, "Artist"); ++} ++ ++char *mpd_getNextAlbum(mpd_Connection * connection) ++{ ++ return mpd_getNextReturnElementNamed(connection, "Album"); ++} ++ ++void mpd_sendPlaylistInfoCommand(mpd_Connection * connection, int songPos) ++{ ++ int len = strlen("playlistinfo") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistinfo \"%i\"\n", songPos); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendPlaylistIdCommand(mpd_Connection * connection, int id) ++{ ++ int len = strlen("playlistid") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistid \"%i\"\n", id); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendPlChangesCommand(mpd_Connection * connection, long long playlist) ++{ ++ int len = strlen("plchanges") + 2 + LONGLONGLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "plchanges \"%lld\"\n", playlist); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendPlChangesPosIdCommand(mpd_Connection * connection, long long playlist) ++{ ++ int len = strlen("plchangesposid") + 2 + LONGLONGLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "plchangesposid \"%lld\"\n", playlist); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendListallCommand(mpd_Connection * connection, const char *dir) ++{ ++ char *sDir = mpd_sanitizeArg(dir); ++ int len = strlen("listall") + 2 + strlen(sDir) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "listall \"%s\"\n", sDir); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++ free(sDir); ++} ++ ++void mpd_sendListallInfoCommand(mpd_Connection * connection, const char *dir) ++{ ++ char *sDir = mpd_sanitizeArg(dir); ++ int len = strlen("listallinfo") + 2 + strlen(sDir) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "listallinfo \"%s\"\n", sDir); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++ free(sDir); ++} ++ ++void mpd_sendLsInfoCommand(mpd_Connection * connection, const char *dir) ++{ ++ char *sDir = mpd_sanitizeArg(dir); ++ int len = strlen("lsinfo") + 2 + strlen(sDir) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "lsinfo \"%s\"\n", sDir); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++ free(sDir); ++} ++ ++void mpd_sendCurrentSongCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "currentsong\n"); ++} ++ ++void mpd_sendSearchCommand(mpd_Connection * connection, int table, const char *str) ++{ ++ mpd_startSearch(connection, 0); ++ mpd_addConstraintSearch(connection, table, str); ++ mpd_commitSearch(connection); ++} ++ ++void mpd_sendFindCommand(mpd_Connection * connection, int table, const char *str) ++{ ++ mpd_startSearch(connection, 1); ++ mpd_addConstraintSearch(connection, table, str); ++ mpd_commitSearch(connection); ++} ++ ++void mpd_sendListCommand(mpd_Connection * connection, int table, const char *arg1) ++{ ++ char st[10]; ++ int len; ++ char *string; ++ if (table == MPD_TABLE_ARTIST) ++ strcpy(st, "artist"); ++ else if (table == MPD_TABLE_ALBUM) ++ strcpy(st, "album"); ++ else { ++ connection->error = 1; ++ strcpy(connection->errorStr, "unknown table for list"); ++ return; ++ } ++ if (arg1) { ++ char *sanitArg1 = mpd_sanitizeArg(arg1); ++ len = strlen("list") + 1 + strlen(sanitArg1) + 2 + strlen(st) + 3; ++ string = malloc(len); ++ snprintf(string, len, "list %s \"%s\"\n", st, sanitArg1); ++ free(sanitArg1); ++ } else { ++ len = strlen("list") + 1 + strlen(st) + 2; ++ string = malloc(len); ++ snprintf(string, len, "list %s\n", st); ++ } ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendAddCommand(mpd_Connection * connection, const char *file) ++{ ++ char *sFile = mpd_sanitizeArg(file); ++ int len = strlen("add") + 2 + strlen(sFile) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "add \"%s\"\n", sFile); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sFile); ++} ++ ++int mpd_sendAddIdCommand(mpd_Connection * connection, const char *file) ++{ ++ int retval = -1; ++ char *sFile = mpd_sanitizeArg(file); ++ int len = strlen("addid") + 2 + strlen(sFile) + 3; ++ char *string = malloc(len); ++ ++ snprintf(string, len, "addid \"%s\"\n", sFile); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++ free(sFile); ++ ++ string = mpd_getNextReturnElementNamed(connection, "Id"); ++ if (string) { ++ retval = atoi(string); ++ free(string); ++ } ++ ++ return retval; ++} ++ ++void mpd_sendDeleteCommand(mpd_Connection * connection, int songPos) ++{ ++ int len = strlen("delete") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "delete \"%i\"\n", songPos); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendDeleteIdCommand(mpd_Connection * connection, int id) ++{ ++ int len = strlen("deleteid") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "deleteid \"%i\"\n", id); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSaveCommand(mpd_Connection * connection, const char *name) ++{ ++ char *sName = mpd_sanitizeArg(name); ++ int len = strlen("save") + 2 + strlen(sName) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "save \"%s\"\n", sName); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sName); ++} ++ ++void mpd_sendLoadCommand(mpd_Connection * connection, const char *name) ++{ ++ char *sName = mpd_sanitizeArg(name); ++ int len = strlen("load") + 2 + strlen(sName) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "load \"%s\"\n", sName); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sName); ++} ++ ++void mpd_sendRmCommand(mpd_Connection * connection, const char *name) ++{ ++ char *sName = mpd_sanitizeArg(name); ++ int len = strlen("rm") + 2 + strlen(sName) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "rm \"%s\"\n", sName); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sName); ++} ++ ++void mpd_sendRenameCommand(mpd_Connection * connection, const char *from, const char *to) ++{ ++ char *sFrom = mpd_sanitizeArg(from); ++ char *sTo = mpd_sanitizeArg(to); ++ int len = strlen("rename") + 2 + strlen(sFrom) + 3 + strlen(sTo) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "rename \"%s\" \"%s\"\n", sFrom, sTo); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sFrom); ++ free(sTo); ++} ++ ++void mpd_sendShuffleCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "shuffle\n"); ++} ++ ++void mpd_sendClearCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "clear\n"); ++} ++ ++void mpd_sendPlayCommand(mpd_Connection * connection, int songPos) ++{ ++ int len = strlen("play") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "play \"%i\"\n", songPos); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendPlayIdCommand(mpd_Connection * connection, int id) ++{ ++ int len = strlen("playid") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playid \"%i\"\n", id); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendStopCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "stop\n"); ++} ++ ++void mpd_sendPauseCommand(mpd_Connection * connection, int pauseMode) ++{ ++ int len = strlen("pause") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "pause \"%i\"\n", pauseMode); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendNextCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "next\n"); ++} ++ ++void mpd_sendMoveCommand(mpd_Connection * connection, int from, int to) ++{ ++ int len = strlen("move") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "move \"%i\" \"%i\"\n", from, to); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendMoveIdCommand(mpd_Connection * connection, int id, int to) ++{ ++ int len = strlen("moveid") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "moveid \"%i\" \"%i\"\n", id, to); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSwapCommand(mpd_Connection * connection, int song1, int song2) ++{ ++ int len = strlen("swap") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "swap \"%i\" \"%i\"\n", song1, song2); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSwapIdCommand(mpd_Connection * connection, int id1, int id2) ++{ ++ int len = strlen("swapid") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "swapid \"%i\" \"%i\"\n", id1, id2); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSeekCommand(mpd_Connection * connection, int song, int time) ++{ ++ int len = strlen("seek") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "seek \"%i\" \"%i\"\n", song, time); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSeekIdCommand(mpd_Connection * connection, int id, int time) ++{ ++ int len = strlen("seekid") + 2 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "seekid \"%i\" \"%i\"\n", id, time); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendUpdateCommand(mpd_Connection * connection, char *path) ++{ ++ char *sPath = mpd_sanitizeArg(path); ++ int len = strlen("update") + 2 + strlen(sPath) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "update \"%s\"\n", sPath); ++ mpd_sendInfoCommand(connection, string); ++ free(string); ++ free(sPath); ++} ++ ++int mpd_getUpdateId(mpd_Connection * connection) ++{ ++ char *jobid; ++ int ret = 0; ++ ++ jobid = mpd_getNextReturnElementNamed(connection, "updating_db"); ++ if (jobid) { ++ ret = atoi(jobid); ++ free(jobid); ++ } ++ ++ return ret; ++} ++ ++void mpd_sendPrevCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "previous\n"); ++} ++ ++void mpd_sendRepeatCommand(mpd_Connection * connection, int repeatMode) ++{ ++ int len = strlen("repeat") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "repeat \"%i\"\n", repeatMode); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendRandomCommand(mpd_Connection * connection, int randomMode) ++{ ++ int len = strlen("random") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "random \"%i\"\n", randomMode); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendSetvolCommand(mpd_Connection * connection, int volumeChange) ++{ ++ int len = strlen("setvol") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "setvol \"%i\"\n", volumeChange); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendVolumeCommand(mpd_Connection * connection, int volumeChange) ++{ ++ int len = strlen("volume") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "volume \"%i\"\n", volumeChange); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendCrossfadeCommand(mpd_Connection * connection, int seconds) ++{ ++ int len = strlen("crossfade") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "crossfade \"%i\"\n", seconds); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendPasswordCommand(mpd_Connection * connection, const char *pass) ++{ ++ char *sPass = mpd_sanitizeArg(pass); ++ int len = strlen("password") + 2 + strlen(sPass) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "password \"%s\"\n", sPass); ++ mpd_executeCommand(connection, string); ++ free(string); ++ free(sPass); ++} ++ ++void mpd_sendCommandListBegin(mpd_Connection * connection) ++{ ++ if (connection->commandList) { ++ strcpy(connection->errorStr, "already in command list mode"); ++ connection->error = 1; ++ return; ++ } ++ connection->commandList = COMMAND_LIST; ++ mpd_executeCommand(connection, "command_list_begin\n"); ++} ++ ++void mpd_sendCommandListOkBegin(mpd_Connection * connection) ++{ ++ if (connection->commandList) { ++ strcpy(connection->errorStr, "already in command list mode"); ++ connection->error = 1; ++ return; ++ } ++ connection->commandList = COMMAND_LIST_OK; ++ mpd_executeCommand(connection, "command_list_ok_begin\n"); ++ connection->listOks = 0; ++} ++ ++void mpd_sendCommandListEnd(mpd_Connection * connection) ++{ ++ if (!connection->commandList) { ++ strcpy(connection->errorStr, "not in command list mode"); ++ connection->error = 1; ++ return; ++ } ++ connection->commandList = 0; ++ mpd_executeCommand(connection, "command_list_end\n"); ++} ++ ++void mpd_sendOutputsCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "outputs\n"); ++} ++ ++mpd_OutputEntity *mpd_getNextOutput(mpd_Connection * connection) ++{ ++ mpd_OutputEntity *output = NULL; ++ ++ if (connection->doneProcessing || (connection->listOks && connection->doneListOk)) { ++ return NULL; ++ } ++ ++ if (connection->error) ++ return NULL; ++ ++ output = malloc(sizeof(mpd_OutputEntity)); ++ output->id = -10; ++ output->name = NULL; ++ output->enabled = 0; ++ ++ if (!connection->returnElement) ++ mpd_getNextReturnElement(connection); ++ ++ while (connection->returnElement) { ++ mpd_ReturnElement *re = connection->returnElement; ++ if (strcmp(re->name, "outputid") == 0) { ++ if (output != NULL && output->id >= 0) ++ return output; ++ output->id = atoi(re->value); ++ } else if (strcmp(re->name, "outputname") == 0) { ++ output->name = strdup(re->value); ++ } else if (strcmp(re->name, "outputenabled") == 0) { ++ output->enabled = atoi(re->value); ++ } ++ ++ mpd_getNextReturnElement(connection); ++ if (connection->error) { ++ free(output); ++ return NULL; ++ } ++ ++ } ++ ++ return output; ++} ++ ++void mpd_sendEnableOutputCommand(mpd_Connection * connection, int outputId) ++{ ++ int len = strlen("enableoutput") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "enableoutput \"%i\"\n", outputId); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_sendDisableOutputCommand(mpd_Connection * connection, int outputId) ++{ ++ int len = strlen("disableoutput") + 2 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "disableoutput \"%i\"\n", outputId); ++ mpd_executeCommand(connection, string); ++ free(string); ++} ++ ++void mpd_freeOutputElement(mpd_OutputEntity * output) ++{ ++ free(output->name); ++ free(output); ++} ++ ++/** ++ * mpd_sendNotCommandsCommand ++ * odd naming, but it gets the not allowed commands ++ */ ++ ++void mpd_sendNotCommandsCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "notcommands\n"); ++} ++ ++/** ++ * mpd_sendCommandsCommand ++ * odd naming, but it gets the allowed commands ++ */ ++void mpd_sendCommandsCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "commands\n"); ++} ++ ++/** ++ * Get the next returned command ++ */ ++char *mpd_getNextCommand(mpd_Connection * connection) ++{ ++ return mpd_getNextReturnElementNamed(connection, "command"); ++} ++ ++void mpd_sendUrlHandlersCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "urlhandlers\n"); ++} ++ ++char *mpd_getNextHandler(mpd_Connection * connection) ++{ ++ return mpd_getNextReturnElementNamed(connection, "handler"); ++} ++ ++void mpd_sendTagTypesCommand(mpd_Connection * connection) ++{ ++ mpd_executeCommand(connection, "tagtypes\n"); ++} ++ ++char *mpd_getNextTagType(mpd_Connection * connection) ++{ ++ return mpd_getNextReturnElementNamed(connection, "tagtype"); ++} ++ ++void mpd_startSearch(mpd_Connection * connection, int exact) ++{ ++ if (connection->request) { ++ strcpy(connection->errorStr, "search already in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ if (exact) ++ connection->request = strdup("find"); ++ else ++ connection->request = strdup("search"); ++} ++ ++void mpd_startStatsSearch(mpd_Connection * connection) ++{ ++ if (connection->request) { ++ strcpy(connection->errorStr, "search already in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ connection->request = strdup("count"); ++} ++ ++void mpd_startPlaylistSearch(mpd_Connection * connection, int exact) ++{ ++ if (connection->request) { ++ strcpy(connection->errorStr, "search already in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ if (exact) ++ connection->request = strdup("playlistfind"); ++ else ++ connection->request = strdup("playlistsearch"); ++} ++ ++void mpd_startFieldSearch(mpd_Connection * connection, int type) ++{ ++ char *strtype; ++ int len; ++ ++ if (connection->request) { ++ strcpy(connection->errorStr, "search already in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ if (type < 0 || type >= MPD_TAG_NUM_OF_ITEM_TYPES) { ++ strcpy(connection->errorStr, "invalid type specified"); ++ connection->error = 1; ++ return; ++ } ++ ++ strtype = mpdTagItemKeys[type]; ++ ++ len = 5 + strlen(strtype) + 1; ++ connection->request = malloc(len); ++ ++ snprintf(connection->request, len, "list %c%s", tolower(strtype[0]), strtype + 1); ++} ++ ++void mpd_addConstraintSearch(mpd_Connection * connection, int type, const char *name) ++{ ++ char *strtype; ++ char *arg; ++ int len; ++ char *string; ++ ++ if (!connection->request) { ++ strcpy(connection->errorStr, "no search in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ if (type < 0 || type >= MPD_TAG_NUM_OF_ITEM_TYPES) { ++ strcpy(connection->errorStr, "invalid type specified"); ++ connection->error = 1; ++ return; ++ } ++ ++ if (name == NULL) { ++ strcpy(connection->errorStr, "no name specified"); ++ connection->error = 1; ++ return; ++ } ++ ++ string = strdup(connection->request); ++ strtype = mpdTagItemKeys[type]; ++ arg = mpd_sanitizeArg(name); ++ ++ len = strlen(string) + 1 + strlen(strtype) + 2 + strlen(arg) + 2; ++ connection->request = realloc(connection->request, len); ++ snprintf(connection->request, len, "%s %c%s \"%s\"", string, tolower(strtype[0]), strtype + 1, arg); ++ ++ free(string); ++ free(arg); ++} ++ ++void mpd_commitSearch(mpd_Connection * connection) ++{ ++ int len; ++ ++ if (!connection->request) { ++ strcpy(connection->errorStr, "no search in progress"); ++ connection->error = 1; ++ return; ++ } ++ ++ len = strlen(connection->request) + 2; ++ connection->request = realloc(connection->request, len); ++ connection->request[len - 2] = '\n'; ++ connection->request[len - 1] = '\0'; ++ mpd_sendInfoCommand(connection, connection->request); ++ ++ free(connection->request); ++ connection->request = NULL; ++} ++ ++/** ++ * @param connection a MpdConnection ++ * @param path the path to the playlist. ++ * ++ * List the content, with full metadata, of a stored playlist. ++ * ++ */ ++void mpd_sendListPlaylistInfoCommand(mpd_Connection * connection, char *path) ++{ ++ char *arg = mpd_sanitizeArg(path); ++ int len = strlen("listplaylistinfo") + 2 + strlen(arg) + 3; ++ char *query = malloc(len); ++ snprintf(query, len, "listplaylistinfo \"%s\"\n", arg); ++ mpd_sendInfoCommand(connection, query); ++ free(arg); ++ free(query); ++} ++ ++/** ++ * @param connection a MpdConnection ++ * @param path the path to the playlist. ++ * ++ * List the content of a stored playlist. ++ * ++ */ ++void mpd_sendListPlaylistCommand(mpd_Connection * connection, char *path) ++{ ++ char *arg = mpd_sanitizeArg(path); ++ int len = strlen("listplaylist") + 2 + strlen(arg) + 3; ++ char *query = malloc(len); ++ snprintf(query, len, "listplaylist \"%s\"\n", arg); ++ mpd_sendInfoCommand(connection, query); ++ free(arg); ++ free(query); ++} ++ ++void mpd_sendPlaylistClearCommand(mpd_Connection * connection, char *path) ++{ ++ char *sPath = mpd_sanitizeArg(path); ++ int len = strlen("playlistclear") + 2 + strlen(sPath) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistclear \"%s\"\n", sPath); ++ mpd_executeCommand(connection, string); ++ free(sPath); ++ free(string); ++} ++ ++void mpd_sendPlaylistAddCommand(mpd_Connection * connection, char *playlist, char *path) ++{ ++ char *sPlaylist = mpd_sanitizeArg(playlist); ++ char *sPath = mpd_sanitizeArg(path); ++ int len = strlen("playlistadd") + 2 + strlen(sPlaylist) + 3 + strlen(sPath) + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistadd \"%s\" \"%s\"\n", sPlaylist, sPath); ++ mpd_executeCommand(connection, string); ++ free(sPlaylist); ++ free(sPath); ++ free(string); ++} ++ ++void mpd_sendPlaylistMoveCommand(mpd_Connection * connection, char *playlist, int from, int to) ++{ ++ char *sPlaylist = mpd_sanitizeArg(playlist); ++ int len = strlen("playlistmove") + 2 + strlen(sPlaylist) + 3 + INTLEN + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistmove \"%s\" \"%i\" \"%i\"\n", sPlaylist, from, to); ++ mpd_executeCommand(connection, string); ++ free(sPlaylist); ++ free(string); ++} ++ ++void mpd_sendPlaylistDeleteCommand(mpd_Connection * connection, char *playlist, int pos) ++{ ++ char *sPlaylist = mpd_sanitizeArg(playlist); ++ int len = strlen("playlistdelete") + 2 + strlen(sPlaylist) + 3 + INTLEN + 3; ++ char *string = malloc(len); ++ snprintf(string, len, "playlistdelete \"%s\" \"%i\"\n", sPlaylist, pos); ++ mpd_executeCommand(connection, string); ++ free(sPlaylist); ++ free(string); ++} +--- /dev/null ++++ b/libmpdclient.h +@@ -0,0 +1,661 @@ ++/* libmpdclient ++ (c)2003-2006 by Warren Dukes (warren.dukes@gmail.com) ++ This project's homepage is: http://www.musicpd.org ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ - Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ - Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ - Neither the name of the Music Player Daemon nor the names of its ++ contributors may be used to endorse or promote products derived from ++ this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR ++ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#ifndef LIBMPDCLIENT_H ++#define LIBMPDCLIENT_H ++ ++#ifdef WIN32 ++# define __W32API_USE_DLLIMPORT__ 1 ++#endif ++ ++#include ++#include ++#define MPD_BUFFER_MAX_LENGTH 50000 ++#define MPD_ERRORSTR_MAX_LENGTH 1000 ++#define MPD_WELCOME_MESSAGE "OK MPD " ++ ++#define MPD_ERROR_TIMEOUT 10 /* timeout trying to talk to mpd */ ++#define MPD_ERROR_SYSTEM 11 /* system error */ ++#define MPD_ERROR_UNKHOST 12 /* unknown host */ ++#define MPD_ERROR_CONNPORT 13 /* problems connecting to port on host */ ++#define MPD_ERROR_NOTMPD 14 /* mpd not running on port at host */ ++#define MPD_ERROR_NORESPONSE 15 /* no response on attempting to connect */ ++#define MPD_ERROR_SENDING 16 /* error sending command */ ++#define MPD_ERROR_CONNCLOSED 17 /* connection closed by mpd */ ++#define MPD_ERROR_ACK 18 /* ACK returned! */ ++#define MPD_ERROR_BUFFEROVERRUN 19 /* Buffer was overrun! */ ++ ++#define MPD_ACK_ERROR_UNK -1 ++#define MPD_ERROR_AT_UNK -1 ++ ++#define MPD_ACK_ERROR_NOT_LIST 1 ++#define MPD_ACK_ERROR_ARG 2 ++#define MPD_ACK_ERROR_PASSWORD 3 ++#define MPD_ACK_ERROR_PERMISSION 4 ++#define MPD_ACK_ERROR_UNKNOWN_CMD 5 ++ ++#define MPD_ACK_ERROR_NO_EXIST 50 ++#define MPD_ACK_ERROR_PLAYLIST_MAX 51 ++#define MPD_ACK_ERROR_SYSTEM 52 ++#define MPD_ACK_ERROR_PLAYLIST_LOAD 53 ++#define MPD_ACK_ERROR_UPDATE_ALREADY 54 ++#define MPD_ACK_ERROR_PLAYER_SYNC 55 ++#define MPD_ACK_ERROR_EXIST 56 ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++ typedef enum mpd_TagItems { ++ MPD_TAG_ITEM_ARTIST, ++ MPD_TAG_ITEM_ALBUM, ++ MPD_TAG_ITEM_TITLE, ++ MPD_TAG_ITEM_TRACK, ++ MPD_TAG_ITEM_NAME, ++ MPD_TAG_ITEM_GENRE, ++ MPD_TAG_ITEM_DATE, ++ MPD_TAG_ITEM_COMPOSER, ++ MPD_TAG_ITEM_PERFORMER, ++ MPD_TAG_ITEM_COMMENT, ++ MPD_TAG_ITEM_DISC, ++ MPD_TAG_ITEM_FILENAME, ++ MPD_TAG_ITEM_ANY, ++ MPD_TAG_NUM_OF_ITEM_TYPES ++ } mpd_TagItems; ++ ++ extern char *mpdTagItemKeys[MPD_TAG_NUM_OF_ITEM_TYPES]; ++ ++/* internal stuff don't touch this struct */ ++ typedef struct _mpd_ReturnElement { ++ char *name; ++ char *value; ++ } mpd_ReturnElement; ++ ++/* mpd_Connection ++ * holds info about connection to mpd ++ * use error, and errorStr to detect errors ++ */ ++ typedef struct _mpd_Connection { ++ /* use this to check the version of mpd */ ++ int version[3]; ++ /* IMPORTANT, you want to get the error messages from here */ ++ char errorStr[MPD_ERRORSTR_MAX_LENGTH + 1]; ++ int errorCode; ++ int errorAt; ++ /* this will be set to MPD_ERROR_* if there is an error, 0 if not */ ++ int error; ++ /* DON'T TOUCH any of the rest of this stuff */ ++ int sock; ++ char buffer[MPD_BUFFER_MAX_LENGTH + 1]; ++ int buflen; ++ int bufstart; ++ int doneProcessing; ++ int listOks; ++ int doneListOk; ++ int commandList; ++ mpd_ReturnElement *returnElement; ++ struct timeval timeout; ++ char *request; ++ } mpd_Connection; ++ ++/* mpd_newConnection ++ * use this to open a new connection ++ * you should use mpd_closeConnection, when your done with the connection, ++ * even if an error has occurred ++ * _timeout_ is the connection timeout period in seconds ++ */ ++ mpd_Connection *mpd_newConnection(const char *host, int port, float timeout); ++ ++ void mpd_setConnectionTimeout(mpd_Connection * connection, float timeout); ++ ++/* mpd_closeConnection ++ * use this to close a connection and free'ing subsequent memory ++ */ ++ void mpd_closeConnection(mpd_Connection * connection); ++ ++/* mpd_clearError ++ * clears error ++ */ ++ void mpd_clearError(mpd_Connection * connection); ++ ++/* STATUS STUFF */ ++ ++/* use these with status.state to determine what state the player is in */ ++#define MPD_STATUS_STATE_UNKNOWN 0 ++#define MPD_STATUS_STATE_STOP 1 ++#define MPD_STATUS_STATE_PLAY 2 ++#define MPD_STATUS_STATE_PAUSE 3 ++ ++/* us this with status.volume to determine if mpd has volume support */ ++#define MPD_STATUS_NO_VOLUME -1 ++ ++/* mpd_Status ++ * holds info return from status command ++ */ ++ typedef struct mpd_Status { ++ /* 0-100, or MPD_STATUS_NO_VOLUME when there is no volume support */ ++ int volume; ++ /* 1 if repeat is on, 0 otherwise */ ++ int repeat; ++ /* 1 if random is on, 0 otherwise */ ++ int random; ++ /* playlist length */ ++ int playlistLength; ++ /* playlist, use this to determine when the playlist has changed */ ++ long long playlist; ++ /* use with MPD_STATUS_STATE_* to determine state of player */ ++ int state; ++ /* crossfade setting in seconds */ ++ int crossfade; ++ /* if a song is currently selected (always the case when state is ++ * PLAY or PAUSE), this is the position of the currently ++ * playing song in the playlist, beginning with 0 ++ */ ++ int song; ++ /* Song ID of the currently selected song */ ++ int songid; ++ /* time in seconds that have elapsed in the currently playing/paused ++ * song ++ */ ++ int elapsedTime; ++ /* length in seconds of the currently playing/paused song */ ++ int totalTime; ++ /* current bit rate in kbs */ ++ int bitRate; ++ /* audio sample rate */ ++ unsigned int sampleRate; ++ /* audio bits */ ++ int bits; ++ /* audio channels */ ++ int channels; ++ /* 1 if mpd is updating, 0 otherwise */ ++ int updatingDb; ++ /* error */ ++ char *error; ++ } mpd_Status; ++ ++ void mpd_sendStatusCommand(mpd_Connection * connection); ++ ++/* mpd_getStatus ++ * returns status info, be sure to free it with mpd_freeStatus() ++ * call this after mpd_sendStatusCommand() ++ */ ++ mpd_Status *mpd_getStatus(mpd_Connection * connection); ++ ++/* mpd_freeStatus ++ * free's status info malloc'd and returned by mpd_getStatus ++ */ ++ void mpd_freeStatus(mpd_Status * status); ++ ++ typedef struct _mpd_Stats { ++ int numberOfArtists; ++ int numberOfAlbums; ++ int numberOfSongs; ++ unsigned long uptime; ++ unsigned long dbUpdateTime; ++ unsigned long playTime; ++ unsigned long dbPlayTime; ++ } mpd_Stats; ++ ++ typedef struct _mpd_SearchStats { ++ int numberOfSongs; ++ unsigned long playTime; ++ } mpd_SearchStats; ++ ++ void mpd_sendStatsCommand(mpd_Connection * connection); ++ ++ mpd_Stats *mpd_getStats(mpd_Connection * connection); ++ ++ void mpd_freeStats(mpd_Stats * stats); ++ ++ mpd_SearchStats *mpd_getSearchStats(mpd_Connection * connection); ++ ++ void mpd_freeSearchStats(mpd_SearchStats * stats); ++ ++/* SONG STUFF */ ++ ++#define MPD_SONG_NO_TIME -1 ++#define MPD_SONG_NO_NUM -1 ++#define MPD_SONG_NO_ID -1 ++ ++/* mpd_Song ++ * for storing song info returned by mpd ++ */ ++ typedef struct _mpd_Song { ++ /* filename of song */ ++ char *file; ++ /* artist, maybe NULL if there is no tag */ ++ char *artist; ++ /* title, maybe NULL if there is no tag */ ++ char *title; ++ /* album, maybe NULL if there is no tag */ ++ char *album; ++ /* track, maybe NULL if there is no tag */ ++ char *track; ++ /* name, maybe NULL if there is no tag; it's the name of the current ++ * song, f.e. the icyName of the stream */ ++ char *name; ++ /* date */ ++ char *date; ++ ++ /* added by qball */ ++ /* Genre */ ++ char *genre; ++ /* Composer */ ++ char *composer; ++ /* Performer */ ++ char *performer; ++ /* Disc */ ++ char *disc; ++ /* Comment */ ++ char *comment; ++ ++ /* length of song in seconds, check that it is not MPD_SONG_NO_TIME */ ++ int time; ++ /* if plchanges/playlistinfo/playlistid used, is the position of the ++ * song in the playlist */ ++ int pos; ++ /* song id for a song in the playlist */ ++ int id; ++ } mpd_Song; ++ ++/* mpd_newSong ++ * use to allocate memory for a new mpd_Song ++ * file, artist, etc all initialized to NULL ++ * if your going to assign values to file, artist, etc ++ * be sure to malloc or strdup the memory ++ * use mpd_freeSong to free the memory for the mpd_Song, it will also ++ * free memory for file, artist, etc, so don't do it yourself ++ */ ++ mpd_Song *mpd_newSong(void); ++ ++/* mpd_freeSong ++ * use to free memory allocated by mpd_newSong ++ * also it will free memory pointed to by file, artist, etc, so be careful ++ */ ++ void mpd_freeSong(mpd_Song * song); ++ ++/* mpd_songDup ++ * works like strDup, but for a mpd_Song ++ */ ++ mpd_Song *mpd_songDup(mpd_Song * song); ++ ++/* DIRECTORY STUFF */ ++ ++/* mpd_Directory ++ * used to store info fro directory (right now that just the path) ++ */ ++ typedef struct _mpd_Directory { ++ char *path; ++ } mpd_Directory; ++ ++/* mpd_newDirectory ++ * allocates memory for a new directory ++ * use mpd_freeDirectory to free this memory ++ */ ++ mpd_Directory *mpd_newDirectory(void); ++ ++/* mpd_freeDirectory ++ * used to free memory allocated with mpd_newDirectory, and it frees ++ * path of mpd_Directory, so be careful ++ */ ++ void mpd_freeDirectory(mpd_Directory * directory); ++ ++/* mpd_directoryDup ++ * works like strdup, but for mpd_Directory ++ */ ++ mpd_Directory *mpd_directoryDup(mpd_Directory * directory); ++ ++/* PLAYLISTFILE STUFF */ ++ ++/* mpd_PlaylistFile ++ * stores info about playlist file returned by lsinfo ++ */ ++ typedef struct _mpd_PlaylistFile { ++ char *path; ++ } mpd_PlaylistFile; ++ ++/* mpd_newPlaylistFile ++ * allocates memory for new mpd_PlaylistFile, path is set to NULL ++ * free this memory with mpd_freePlaylistFile ++ */ ++ mpd_PlaylistFile *mpd_newPlaylistFile(void); ++ ++/* mpd_freePlaylist ++ * free memory allocated for freePlaylistFile, will also free ++ * path, so be careful ++ */ ++ void mpd_freePlaylistFile(mpd_PlaylistFile * playlist); ++ ++/* mpd_playlistFileDup ++ * works like strdup, but for mpd_PlaylistFile ++ */ ++ mpd_PlaylistFile *mpd_playlistFileDup(mpd_PlaylistFile * playlist); ++ ++/* INFO ENTITY STUFF */ ++ ++/* the type of entity returned from one of the commands that generates info ++ * use in conjunction with mpd_InfoEntity.type ++ */ ++#define MPD_INFO_ENTITY_TYPE_DIRECTORY 0 ++#define MPD_INFO_ENTITY_TYPE_SONG 1 ++#define MPD_INFO_ENTITY_TYPE_PLAYLISTFILE 2 ++ ++/* mpd_InfoEntity ++ * stores info on stuff returned info commands ++ */ ++ typedef struct mpd_InfoEntity { ++ /* the type of entity, use with MPD_INFO_ENTITY_TYPE_* to determine ++ * what this entity is (song, directory, etc...) ++ */ ++ int type; ++ /* the actual data you want, mpd_Song, mpd_Directory, etc */ ++ union { ++ mpd_Directory *directory; ++ mpd_Song *song; ++ mpd_PlaylistFile *playlistFile; ++ } info; ++ } mpd_InfoEntity; ++ ++ mpd_InfoEntity *mpd_newInfoEntity(void); ++ ++ void mpd_freeInfoEntity(mpd_InfoEntity * entity); ++ ++/* INFO COMMANDS AND STUFF */ ++ ++/* use this function to loop over after calling Info/Listall functions */ ++ mpd_InfoEntity *mpd_getNextInfoEntity(mpd_Connection * connection); ++ ++/* fetches the currently seeletect song (the song referenced by status->song ++ * and status->songid*/ ++ void mpd_sendCurrentSongCommand(mpd_Connection * connection); ++ ++/* songNum of -1, means to display the whole list */ ++ void mpd_sendPlaylistInfoCommand(mpd_Connection * connection, int songNum); ++ ++/* songId of -1, means to display the whole list */ ++ void mpd_sendPlaylistIdCommand(mpd_Connection * connection, int songId); ++ ++/* use this to get the changes in the playlist since version _playlist_ */ ++ void mpd_sendPlChangesCommand(mpd_Connection * connection, long long playlist); ++ ++/** ++ * @param connection: A valid and connected mpd_Connection. ++ * @param playlist: The playlist version you want the diff with. ++ * A more bandwidth efficient version of the mpd_sendPlChangesCommand. ++ * It only returns the pos+id of the changes song. ++ */ ++ void mpd_sendPlChangesPosIdCommand(mpd_Connection * connection, long long playlist); ++ ++/* recursivel fetches all songs/dir/playlists in "dir* (no metadata is ++ * returned) */ ++ void mpd_sendListallCommand(mpd_Connection * connection, const char *dir); ++ ++/* same as sendListallCommand, but also metadata is returned */ ++ void mpd_sendListallInfoCommand(mpd_Connection * connection, const char *dir); ++ ++/* non-recursive version of ListallInfo */ ++ void mpd_sendLsInfoCommand(mpd_Connection * connection, const char *dir); ++ ++#define MPD_TABLE_ARTIST MPD_TAG_ITEM_ARTIST ++#define MPD_TABLE_ALBUM MPD_TAG_ITEM_ALBUM ++#define MPD_TABLE_TITLE MPD_TAG_ITEM_TITLE ++#define MPD_TABLE_FILENAME MPD_TAG_ITEM_FILENAME ++ ++ void mpd_sendSearchCommand(mpd_Connection * connection, int table, const char *str); ++ ++ void mpd_sendFindCommand(mpd_Connection * connection, int table, const char *str); ++ ++/* LIST TAG COMMANDS */ ++ ++/* use this function fetch next artist entry, be sure to free the returned ++ * string. NULL means there are no more. Best used with sendListArtists ++ */ ++ char *mpd_getNextArtist(mpd_Connection * connection); ++ ++ char *mpd_getNextAlbum(mpd_Connection * connection); ++ ++ char *mpd_getNextTag(mpd_Connection * connection, int type); ++ ++/* list artist or albums by artist, arg1 should be set to the artist if ++ * listing albums by a artist, otherwise NULL for listing all artists or albums ++ */ ++ void mpd_sendListCommand(mpd_Connection * connection, int table, const char *arg1); ++ ++/* SIMPLE COMMANDS */ ++ ++ void mpd_sendAddCommand(mpd_Connection * connection, const char *file); ++ ++ int mpd_sendAddIdCommand(mpd_Connection * connection, const char *file); ++ ++ void mpd_sendDeleteCommand(mpd_Connection * connection, int songNum); ++ ++ void mpd_sendDeleteIdCommand(mpd_Connection * connection, int songNum); ++ ++ void mpd_sendSaveCommand(mpd_Connection * connection, const char *name); ++ ++ void mpd_sendLoadCommand(mpd_Connection * connection, const char *name); ++ ++ void mpd_sendRmCommand(mpd_Connection * connection, const char *name); ++ ++ void mpd_sendRenameCommand(mpd_Connection * connection, const char *from, const char *to); ++ ++ void mpd_sendShuffleCommand(mpd_Connection * connection); ++ ++ void mpd_sendClearCommand(mpd_Connection * connection); ++ ++/* use this to start playing at the beginning, useful when in random mode */ ++#define MPD_PLAY_AT_BEGINNING -1 ++ ++ void mpd_sendPlayCommand(mpd_Connection * connection, int songNum); ++ ++ void mpd_sendPlayIdCommand(mpd_Connection * connection, int songNum); ++ ++ void mpd_sendStopCommand(mpd_Connection * connection); ++ ++ void mpd_sendPauseCommand(mpd_Connection * connection, int pauseMode); ++ ++ void mpd_sendNextCommand(mpd_Connection * connection); ++ ++ void mpd_sendPrevCommand(mpd_Connection * connection); ++ ++ void mpd_sendMoveCommand(mpd_Connection * connection, int from, int to); ++ ++ void mpd_sendMoveIdCommand(mpd_Connection * connection, int from, int to); ++ ++ void mpd_sendSwapCommand(mpd_Connection * connection, int song1, int song2); ++ ++ void mpd_sendSwapIdCommand(mpd_Connection * connection, int song1, int song2); ++ ++ void mpd_sendSeekCommand(mpd_Connection * connection, int song, int time); ++ ++ void mpd_sendSeekIdCommand(mpd_Connection * connection, int song, int time); ++ ++ void mpd_sendRepeatCommand(mpd_Connection * connection, int repeatMode); ++ ++ void mpd_sendRandomCommand(mpd_Connection * connection, int randomMode); ++ ++ void mpd_sendSetvolCommand(mpd_Connection * connection, int volumeChange); ++ ++/* WARNING: don't use volume command, its depreacted */ ++ void mpd_sendVolumeCommand(mpd_Connection * connection, int volumeChange); ++ ++ void mpd_sendCrossfadeCommand(mpd_Connection * connection, int seconds); ++ ++ void mpd_sendUpdateCommand(mpd_Connection * connection, char *path); ++ ++/* returns the update job id, call this after a update command*/ ++ int mpd_getUpdateId(mpd_Connection * connection); ++ ++ void mpd_sendPasswordCommand(mpd_Connection * connection, const char *pass); ++ ++/* after executing a command, when your done with it to get its status ++ * (you want to check connection->error for an error) ++ */ ++ void mpd_finishCommand(mpd_Connection * connection); ++ ++/* command list stuff, use this to do things like add files very quickly */ ++ void mpd_sendCommandListBegin(mpd_Connection * connection); ++ ++ void mpd_sendCommandListOkBegin(mpd_Connection * connection); ++ ++ void mpd_sendCommandListEnd(mpd_Connection * connection); ++ ++/* advance to the next listOk ++ * returns 0 if advanced to the next list_OK, ++ * returns -1 if it advanced to an OK or ACK */ ++ int mpd_nextListOkCommand(mpd_Connection * connection); ++ ++ typedef struct _mpd_OutputEntity { ++ int id; ++ char *name; ++ int enabled; ++ } mpd_OutputEntity; ++ ++ void mpd_sendOutputsCommand(mpd_Connection * connection); ++ ++ mpd_OutputEntity *mpd_getNextOutput(mpd_Connection * connection); ++ ++ void mpd_sendEnableOutputCommand(mpd_Connection * connection, int outputId); ++ ++ void mpd_sendDisableOutputCommand(mpd_Connection * connection, int outputId); ++ ++ void mpd_freeOutputElement(mpd_OutputEntity * output); ++ ++/** ++ * @param connection a #mpd_Connection ++ * ++ * Queries mpd for the allowed commands ++ */ ++ void mpd_sendCommandsCommand(mpd_Connection * connection); ++ ++/** ++ * @param connection a #mpd_Connection ++ * ++ * Queries mpd for the not allowed commands ++ */ ++ void mpd_sendNotCommandsCommand(mpd_Connection * connection); ++ ++/** ++ * @param connection a #mpd_Connection ++ * ++ * returns the next supported command. ++ * ++ * @returns a string, needs to be free'ed ++ */ ++ char *mpd_getNextCommand(mpd_Connection * connection); ++ ++ void mpd_sendUrlHandlersCommand(mpd_Connection * connection); ++ ++ char *mpd_getNextHandler(mpd_Connection * connection); ++ ++ void mpd_sendTagTypesCommand(mpd_Connection * connection); ++ ++ char *mpd_getNextTagType(mpd_Connection * connection); ++ ++/** ++ * @param connection a MpdConnection ++ * @param path the path to the playlist. ++ * ++ * List the content, with full metadata, of a stored playlist. ++ * ++ */ ++ void mpd_sendListPlaylistInfoCommand(mpd_Connection * connection, char *path); ++ ++/** ++ * @param connection a MpdConnection ++ * @param path the path to the playlist. ++ * ++ * List the content of a stored playlist. ++ * ++ */ ++ void mpd_sendListPlaylistCommand(mpd_Connection * connection, char *path); ++ ++/** ++ * @param connection a #mpd_Connection ++ * @param exact if to match exact ++ * ++ * starts a search, use mpd_addConstraintSearch to add ++ * a constraint to the search, and mpd_commitSearch to do the actual search ++ */ ++ void mpd_startSearch(mpd_Connection * connection, int exact); ++ ++/** ++ * @param connection a #mpd_Connection ++ * @param type ++ * @param name ++ */ ++ void mpd_addConstraintSearch(mpd_Connection * connection, int type, const char *name); ++ ++/** ++ * @param connection a #mpd_Connection ++ */ ++ void mpd_commitSearch(mpd_Connection * connection); ++ ++/** ++ * @param connection a #mpd_Connection ++ * @param type The type to search for ++ * ++ * starts a search for fields... f.e. get a list of artists would be: ++ * @code ++ * mpd_startFieldSearch(connection, MPD_TAG_ITEM_ARTIST); ++ * mpd_commitSearch(connection); ++ * @endcode ++ * ++ * or get a list of artist in genre "jazz" would be: ++ * @code ++ * mpd_startFieldSearch(connection, MPD_TAG_ITEM_ARTIST); ++ * mpd_addConstraintSearch(connection, MPD_TAG_ITEM_GENRE, "jazz") ++ * mpd_commitSearch(connection); ++ * @endcode ++ * ++ * mpd_startSearch will return a list of songs (and you need mpd_getNextInfoEntity) ++ * this one will return a list of only one field (the one specified with type) and you need ++ * mpd_getNextTag to get the results ++ */ ++ void mpd_startFieldSearch(mpd_Connection * connection, int type); ++ ++ void mpd_startPlaylistSearch(mpd_Connection * connection, int exact); ++ ++ void mpd_startStatsSearch(mpd_Connection * connection); ++ ++ void mpd_sendPlaylistClearCommand(mpd_Connection * connection, char *path); ++ ++ void mpd_sendPlaylistAddCommand(mpd_Connection * connection, char *playlist, char *path); ++ ++ void mpd_sendPlaylistMoveCommand(mpd_Connection * connection, char *playlist, int from, int to); ++ ++ void mpd_sendPlaylistDeleteCommand(mpd_Connection * connection, char *playlist, int pos); ++#ifdef __cplusplus ++} ++#endif ++#endif diff --git a/lcd4linux/patches/160-uinput_defs.patch b/lcd4linux/patches/160-uinput_defs.patch new file mode 100755 index 000000000..44b24b3c8 --- /dev/null +++ b/lcd4linux/patches/160-uinput_defs.patch @@ -0,0 +1,24 @@ +--- a/drv_G15.c ++++ b/drv_G15.c +@@ -42,6 +42,7 @@ + + #include + #include ++#include + #include + #include + +@@ -269,8 +270,13 @@ void drv_G15_initKeyHandling(char *devic + } + memset(&device, 0, sizeof(device)); + strncpy(device.name, "G15 Keys", UINPUT_MAX_NAME_SIZE); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + device.id.bustype = BUS_USB; + device.id.version = 4; ++#else ++ device.idbus = BUS_USB; ++ device.idversion = 4; ++#endif + + ioctl(uinput_fd, UI_SET_EVBIT, EV_KEY); + diff --git a/lcd4linux/patches/170-add-generic-spidev-driver.patch b/lcd4linux/patches/170-add-generic-spidev-driver.patch new file mode 100755 index 000000000..4cd53d6a5 --- /dev/null +++ b/lcd4linux/patches/170-add-generic-spidev-driver.patch @@ -0,0 +1,195 @@ +--- a/Makefile.am ++++ b/Makefile.am +@@ -71,6 +71,8 @@ drv_generic_i2c.c \ + drv_generic_i2c.h \ + drv_generic_keypad.c \ + drv_generic_keypad.h \ ++drv_generic_spidev.c \ ++drv_generic_spidev.h \ + drv_ASTUSB.c \ + drv_BeckmannEgle.c \ + drv_BWCT.c \ +--- /dev/null ++++ b/drv_generic_spidev.c +@@ -0,0 +1,89 @@ ++/* $Id$ ++ * $URL$ ++ * ++ * generic driver helper for displays connected via SPI bus ++ * ++ * Copyright (C) 2012 Gabor Juhos ++ * ++ * This file is part of LCD4Linux. ++ * ++ * LCD4Linux is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2, or (at your option) ++ * any later version. ++ * ++ * LCD4Linux is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include "config.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "debug.h" ++#include "qprintf.h" ++#include "cfg.h" ++#include "drv_generic_spidev.h" ++ ++static char *generic_spidev_section = ""; ++static char *generic_spidev_driver = ""; ++static int generic_spidev_fd; ++ ++int drv_generic_spidev_open(const char *section, const char *driver) ++{ ++ char *spidev; ++ ++ udelay_init(); ++ ++ generic_spidev_section = (char *) section; ++ generic_spidev_driver = (char *) driver; ++ ++ spidev = cfg_get(generic_spidev_section, "Port", NULL); ++ ++ info("%s: initializing SPI device %s", generic_spidev_driver, spidev); ++ generic_spidev_fd = open(spidev, O_WRONLY); ++ if (generic_spidev_fd < 0) { ++ error("%s: unable to open SPI device %s!\n", generic_spidev_driver, spidev); ++ goto exit_error; ++ } ++ ++ return 0; ++ ++ exit_error: ++ free(spidev); ++ return -1; ++} ++ ++int drv_generic_spidev_close(void) ++{ ++ close(generic_spidev_fd); ++ return 0; ++} ++ ++int drv_generic_spidev_transfer(const int count, struct spi_ioc_transfer *tr) ++{ ++ int ret; ++ ++ ret = ioctl(generic_spidev_fd, SPI_IOC_MESSAGE(count), tr); ++ if (ret < count) { ++ error("%s: can't send SPI message! (%s)\n", ++ generic_spidev_driver, strerror(errno)); ++ return -1; ++ } ++ ++ return 0; ++} +--- /dev/null ++++ b/drv_generic_spidev.h +@@ -0,0 +1,54 @@ ++/* $Id$ ++ * $URL$ ++ * ++ * generic driver helper for displays connected via SPI bus ++ * ++ * Copyright (C) 2012 Gabor Juhos ++ * Copyright (C) 2012 The LCD4Linux Team ++ * ++ * This file is part of LCD4Linux. ++ * ++ * LCD4Linux is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2, or (at your option) ++ * any later version. ++ * ++ * LCD4Linux is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++/* ++ * ++ * exported fuctions: ++ * ++ * int drv_generic_spidev_open (const char *section, const char *driver) ++ * reads 'Port' entry from config and opens ++ * the SPI device ++ * returns 0 if ok, -1 on failure ++ * ++ * int drv_generic_spidev_close (void) ++ * closes SPI device ++ * returns 0 if ok, -1 on failure ++ * ++ * void drv_generic_spidev_transfer (int count, struct spi_ioc_transfer *tr) ++ * transfer data to/from the SPI device ++ * ++ */ ++ ++#ifndef _DRV_GENERIC_SPIDEV_H_ ++#define _DRV_GENERIC_SPIDEV_H_ ++ ++#include ++ ++int drv_generic_spidev_open(const char *section, const char *driver); ++int drv_generic_spidev_close(void); ++int drv_generic_spidev_transfer(const int count, struct spi_ioc_transfer *tr); ++ ++#endif /* _DRV_GENERIC_SPIDEV_H_ */ +--- a/drivers.m4 ++++ b/drivers.m4 +@@ -301,6 +301,7 @@ PARPORT="no" + SERIAL="no" + I2C="no" + KEYPAD="no" ++SPIDEV="no" + + # generic libraries + LIBUSB="no" +@@ -936,6 +937,12 @@ if test "$LIBJPEG" = "yes"; then + DRVLIBS="$DRVLIBS -ljpeg" + fi + ++# generic spidev driver ++if test "$SPIDEV" = "yes"; then ++ DRIVERS="$DRIVERS drv_generic_spidev.o" ++ AC_DEFINE(WITH_SPIDEV, 1, [SPIDEV driver]) ++fi ++ + # libusb + if test "$LIBUSB" = "yes"; then + DRVLIBS="$DRVLIBS -lusb" +--- a/configure.ac ++++ b/configure.ac +@@ -115,6 +115,9 @@ AC_ARG_WITH(outb, + + AC_CHECK_HEADERS([asm/io.h] [linux/parport.h linux/ppdev.h], [has_parport="true"], [has_parport="false"]) + ++# check for spidev ++AC_CHECK_HEADERS([linux/spi/spidev.h], [has_spidev="true"], [has_spidev="false"]) ++ + # drivers + sinclude(drivers.m4) + diff --git a/lcd4linux/patches/173-glcd2usb-bigendian-fix.patch b/lcd4linux/patches/173-glcd2usb-bigendian-fix.patch new file mode 100755 index 000000000..97e03c748 --- /dev/null +++ b/lcd4linux/patches/173-glcd2usb-bigendian-fix.patch @@ -0,0 +1,20 @@ +--- a/drv_GLCD2USB.c ++++ b/drv_GLCD2USB.c +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + + #include "debug.h" + #include "cfg.h" +@@ -487,6 +488,9 @@ static int drv_GLCD2USB_start(const char + return -1; + } + ++ buffer.display_info.width = le16toh(buffer.display_info.width); ++ buffer.display_info.height = le16toh(buffer.display_info.height); ++ + info("%s: display name = %s", Name, buffer.display_info.name); + info("%s: display resolution = %d * %d", Name, buffer.display_info.width, buffer.display_info.height); + info("%s: display flags: %x", Name, buffer.display_info.flags); diff --git a/luci-app-openmptcprouter/root/etc/init.d/openmptcprouter b/luci-app-openmptcprouter/root/etc/init.d/openmptcprouter index f55513bf2..928feb60f 100755 --- a/luci-app-openmptcprouter/root/etc/init.d/openmptcprouter +++ b/luci-app-openmptcprouter/root/etc/init.d/openmptcprouter @@ -181,9 +181,9 @@ start_service() { rmmod fast_classifier 2>&1 >/dev/null fi - if [ "$(uci -q get rpcd.@rpcd[0].socket)" != "/var/run/ubus.sock" ]; then + if [ "$(uci -q get rpcd.@rpcd[0].socket)" != "/var/run/ubus/ubus.sock" ]; then uci -q batch <<-EOF >/dev/null - set rpcd.@rpcd[0].socket='/var/run/ubus.sock' + set rpcd.@rpcd[0].socket='/var/run/ubus/ubus.sock' commit rpcd EOF /etc/init.d/rpcd restart 2>&1 >/dev/null diff --git a/modemmanager/Makefile b/modemmanager/Makefile index 6d11f1e2e..3eb780398 100644 --- a/modemmanager/Makefile +++ b/modemmanager/Makefile @@ -9,14 +9,14 @@ include $(TOPDIR)/rules.mk PKG_NAME:=modemmanager PKG_VERSION:=1.18.6 -PKG_RELEASE:=$(AUTORELEASE) +PKG_RELEASE:=1 PKG_SOURCE:=ModemManager-$(PKG_VERSION).tar.xz PKG_SOURCE_URL:=https://www.freedesktop.org/software/ModemManager PKG_HASH:=d4f804b31cf504239c5f1d4973c62095c00cba1ee9abb503718dac6d146a470a PKG_BUILD_DIR:=$(BUILD_DIR)/ModemManager-$(PKG_VERSION) -PKG_MAINTAINER:=Nicholas Smith +PKG_MAINTAINER:=Nicholas Smith PKG_LICENSE:=GPL-2.0-or-later PKG_LICENSE_FILES:=COPYING diff --git a/modemmanager/files/modemmanager.common b/modemmanager/files/modemmanager.common index 6367eb32b..a439179de 100644 --- a/modemmanager/files/modemmanager.common +++ b/modemmanager/files/modemmanager.common @@ -20,8 +20,7 @@ MODEMMANAGER_EVENTS_CACHE="${MODEMMANAGER_RUNDIR}/events.cache" # Common logging mm_log() { - local level="$1"; shift - logger -p "daemon.${level}" -t "ModemManager[$$]" "hotplug: $*" + [ "$(uci -q get openmptcprouter.settings.debug)" = "true" ] && logger -t "ModemManager" "hotplug: $*" } ################################################################################ @@ -40,14 +39,14 @@ mm_find_physdev_sysfs_path() { # avoid infinite loops iterating [ -z "${tmp_path}" ] || [ "${tmp_path}" = "/" ] && return - # For USB devices, the physical device will be that with a idVendor - # and idProduct pair of files + # for USB devices, the physical device will be that with a idVendor + # and idProduct pair of files [ -f "${tmp_path}"/idVendor ] && [ -f "${tmp_path}"/idProduct ] && { tmp_path=$(readlink -f "$tmp_path") echo "${tmp_path}" return } - + # For PCI devices, the physical device will be that with a vendor # and device pair of files [ -f "${tmp_path}"/vendor ] && [ -f "${tmp_path}"/device ] && { @@ -170,7 +169,7 @@ mm_wait_for_modem() { while [ $n -ge 0 ]; do [ -d "${sysfspath}" ] || { - mm_log "error" "ignoring modem detection request: no device at ${sysfspath}" + mm_log "error: ignoring modem detection request: no device at ${sysfspath}" proto_set_available "${cfg}" 0 return 1 } @@ -178,10 +177,10 @@ mm_wait_for_modem() { # Check if the modem exists at the given sysfs path if ! mmcli -m "${sysfspath}" > /dev/null 2>&1 then - mm_log "error" "modem not detected at sysfs path" + mm_log "error: modem not detected at sysfs path" else - mm_log "info" "modem exported successfully at ${sysfspath}" - mm_log "info" "setting interface '${cfg}' as available" + mm_log "modem exported successfully at ${sysfspath}" + mm_log "setting interface '${cfg}' as available" proto_set_available "${cfg}" 1 return 0 fi @@ -190,7 +189,7 @@ mm_wait_for_modem() { n=$((n-step)) done - mm_log "error" "timed out waiting for the modem to get exported at ${sysfspath}" + mm_log "error: timed out waiting for the modem to get exported at ${sysfspath}" proto_set_available "${cfg}" 0 return 2 } @@ -202,7 +201,7 @@ mm_report_modem_wait() { parent_sysfspath=$(mm_find_physdev_sysfs_path "$sysfspath") [ -n "${parent_sysfspath}" ] || { - mm_log "error" "parent device sysfspath not found" + mm_log "error: parent device sysfspath not found" return } @@ -213,23 +212,23 @@ mm_report_modem_wait() { cfg=$(mm_get_modem_config "${parent_sysfspath}") if [ -n "${cfg}" ]; then - mm_log "info" "interface '${cfg}' is set to configure device '${parent_sysfspath}'" - mm_log "info" "now waiting for modem at sysfs path ${parent_sysfspath}" + mm_log "interface '${cfg}' is set to configure device '${parent_sysfspath}'" + mm_log "now waiting for modem at sysfs path ${parent_sysfspath}" mm_set_modem_wait_status "${parent_sysfspath}" "processed" # Launch subshell for the explicit wait ( mm_wait_for_modem "${cfg}" "${parent_sysfspath}" ) > /dev/null 2>&1 & else - mm_log "info" "no need to wait for modem at sysfs path ${parent_sysfspath}" + mm_log "no need to wait for modem at sysfs path ${parent_sysfspath}" mm_set_modem_wait_status "${parent_sysfspath}" "ignored" fi ;; "processed") - mm_log "info" "already waiting for modem at sysfs path ${parent_sysfspath}" + mm_log "already waiting for modem at sysfs path ${parent_sysfspath}" ;; "ignored") ;; *) - mm_log "error" "unknown status read for device at sysfs path ${parent_sysfspath}" + mm_log "error: unknown status read for device at sysfs path ${parent_sysfspath}" ;; esac } @@ -259,7 +258,7 @@ mm_cleanup_interface_by_sysfspath() { cfg=$(mm_get_modem_config "$dev") [ -n "${cfg}" ] || return - mm_log "info" "setting interface '$cfg' as unavailable" + mm_log "setting interface '$cfg' as unavailable" proto_set_available "${cfg}" 0 } @@ -287,7 +286,7 @@ mm_report_event() { esac # Report the event - mm_log "debug" "event reported: action=${action}, name=${name}, subsystem=${subsystem}" + mm_log "event reported: action=${action}, name=${name}, subsystem=${subsystem}" mmcli --report-kernel-event="action=${action},name=${name},subsystem=${subsystem}" 1>/dev/null 2>&1 & # Wait for added modem if a sysfspath is given @@ -303,7 +302,7 @@ mm_report_event_from_cache_line() { subsystem=$(echo "${event_line}" | awk -F ',' '{ print $3 }') sysfspath=$(echo "${event_line}" | awk -F ',' '{ print $4 }') - mm_log "debug" "cached event found: action=${action}, name=${name}, subsystem=${subsystem}, sysfspath=${sysfspath}" + mm_log "cached event found: action=${action}, name=${name}, subsystem=${subsystem}, sysfspath=${sysfspath}" mm_report_event "${action}" "${name}" "${subsystem}" "${sysfspath}" } @@ -318,11 +317,11 @@ mm_report_events_from_cache() { # Wait for ModemManager to be available in the bus while [ $n -ge 0 ]; do sleep $step - mm_log "info" "checking if ModemManager is available..." + mm_log "checking if ModemManager is available..." if ! mmcli -L >/dev/null 2>&1 then - mm_log "info" "ModemManager not yet available" + mm_log "ModemManager not yet available" else mmrunning=1 break @@ -331,7 +330,7 @@ mm_report_events_from_cache() { done [ ${mmrunning} -eq 1 ] || { - mm_log "error" "couldn't report initial kernel events: ModemManager not running" + mm_log "error: couldn't report initial kernel events: ModemManager not running" return } diff --git a/modemmanager/files/modemmanager.init b/modemmanager/files/modemmanager.init old mode 100755 new mode 100644 index 7f014dc56..a3f6c1b12 --- a/modemmanager/files/modemmanager.init +++ b/modemmanager/files/modemmanager.init @@ -4,8 +4,6 @@ USE_PROCD=1 START=70 -LOG_LEVEL="INFO" - stop_service() { # Load common utils . /usr/share/ModemManager/modemmanager.common @@ -30,8 +28,6 @@ start_service() { . /usr/share/ModemManager/modemmanager.common procd_open_instance procd_set_param command /usr/sbin/ModemManager-wrapper - procd_append_param command --log-level="$LOG_LEVEL" - [ "$LOG_LEVEL" = "DEBUG" ] && procd_append_param command --debug procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}" procd_set_param pidfile "${MODEMMANAGER_PID_FILE}" procd_close_instance diff --git a/modemmanager/files/modemmanager.proto b/modemmanager/files/modemmanager.proto index d24910b98..0e050e7b9 100755 --- a/modemmanager/files/modemmanager.proto +++ b/modemmanager/files/modemmanager.proto @@ -197,6 +197,7 @@ modemmanager_connected_method_dhcp_ipv4() { local interface="$1" local wwan="$2" local metric="$3" + local defaultroute="$4" proto_init_update "${wwan}" 1 proto_set_keep 1 @@ -222,6 +223,7 @@ modemmanager_connected_method_static_ipv4() { local dns1="$7" local dns2="$8" local metric="$9" + local defaultroute="$10" local mask="" @@ -242,8 +244,9 @@ modemmanager_connected_method_static_ipv4() { proto_set_keep 1 echo "adding IPv4 address ${address}, netmask ${mask}" proto_add_ipv4_address "${address}" "${mask}" - [ -n "${gateway}" ] && { + [ -n "${gateway}" ] && [ "${defaultroute}" != 0 ] && { echo "adding default IPv4 route via ${gateway}" + logger -t "modemmanager.proto" "adding default IPv4 route via ${gateway} ${address}" proto_add_ipv4_route "0.0.0.0" "0" "${gateway}" "${address}" } [ -n "${dns1}" ] && { @@ -262,6 +265,7 @@ modemmanager_connected_method_dhcp_ipv6() { local interface="$1" local wwan="$2" local metric="$3" + local defaultroute="$4" proto_init_update "${wwan}" 1 proto_set_keep 1 @@ -288,6 +292,7 @@ modemmanager_connected_method_static_ipv6() { local dns1="$7" local dns2="$8" local metric="$9" + local defaultroute="$10" [ -n "${address}" ] || { proto_notify_error "${interface}" ADDRESS_MISSING @@ -306,7 +311,7 @@ modemmanager_connected_method_static_ipv6() { echo "adding IPv6 address ${address}, prefix ${prefix}" proto_add_ipv6_address "${address}" "128" proto_add_ipv6_prefix "${address}/${prefix}" - [ -n "${gateway}" ] && { + [ -n "${gateway}" ] && [ "$defaultroute" != 0 ] && { echo "adding default IPv6 route via ${gateway}" proto_add_ipv6_route "${gateway}" "128" proto_add_ipv6_route "::0" "0" "${gateway}" "" "" "${address}/${prefix}" @@ -357,9 +362,9 @@ proto_modemmanager_setup() { local device apn allowedauth username password pincode iptype metric signalrate - local address prefix gateway mtu dns1 dns2 + local address prefix gateway mtu dns1 dns2 defaultroute - json_get_vars device apn allowedauth username password pincode iptype metric signalrate + json_get_vars device apn allowedauth username password pincode iptype metric signalrate defaultroute # validate sysfs path given in config [ -n "${device}" ] || { @@ -447,7 +452,7 @@ proto_modemmanager_setup() { echo "IPv4 connection setup required in interface ${interface}: ${bearermethod_ipv4}" case "${bearermethod_ipv4}" in "dhcp") - modemmanager_connected_method_dhcp_ipv4 "${interface}" "${beareriface}" "${metric}" + modemmanager_connected_method_dhcp_ipv4 "${interface}" "${beareriface}" "${metric}" "${defaultroute}" ;; "static") address=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv4-config.address") @@ -456,7 +461,7 @@ proto_modemmanager_setup() { mtu=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv4-config.mtu") dns1=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv4-config.dns.value\[1\]") dns2=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv4-config.dns.value\[2\]") - modemmanager_connected_method_static_ipv4 "${interface}" "${beareriface}" "${address}" "${prefix}" "${gateway}" "${mtu}" "${dns1}" "${dns2}" "${metric}" + modemmanager_connected_method_static_ipv4 "${interface}" "${beareriface}" "${address}" "${prefix}" "${gateway}" "${mtu}" "${dns1}" "${dns2}" "${metric}" "${defaultroute}" ;; "ppp") modemmanager_connected_method_ppp_ipv4 "${interface}" "${beareriface}" "${username}" "${password}" "${allowedauth}" @@ -474,7 +479,7 @@ proto_modemmanager_setup() { echo "IPv6 connection setup required in interface ${interface}: ${bearermethod_ipv6}" case "${bearermethod_ipv6}" in "dhcp") - modemmanager_connected_method_dhcp_ipv6 "${interface}" "${beareriface}" "${metric}" + modemmanager_connected_method_dhcp_ipv6 "${interface}" "${beareriface}" "${metric}" "${defaultroute}" ;; "static") address=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv6-config.address") @@ -483,7 +488,7 @@ proto_modemmanager_setup() { mtu=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv6-config.mtu") dns1=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv6-config.dns.value\[1\]") dns2=$(modemmanager_get_field "${bearerstatus}" "bearer.ipv6-config.dns.value\[2\]") - modemmanager_connected_method_static_ipv6 "${interface}" "${beareriface}" "${address}" "${prefix}" "${gateway}" "${mtu}" "${dns1}" "${dns2}" "${metric}" + modemmanager_connected_method_static_ipv6 "${interface}" "${beareriface}" "${address}" "${prefix}" "${gateway}" "${mtu}" "${dns1}" "${dns2}" "${metric}" "${defaultroute}" ;; "ppp") proto_notify_error "${interface}" "unsupported method" diff --git a/modemmanager/files/usr/sbin/ModemManager-wrapper b/modemmanager/files/usr/sbin/ModemManager-wrapper old mode 100644 new mode 100755 diff --git a/netifd/Makefile b/netifd/Makefile index c73e4fc81..4b5f110da 100644 --- a/netifd/Makefile +++ b/netifd/Makefile @@ -5,9 +5,9 @@ PKG_RELEASE:=1 PKG_SOURCE_PROTO:=git PKG_SOURCE_URL=$(PROJECT_GIT)/project/netifd.git -PKG_SOURCE_DATE:=2021-01-09 -PKG_SOURCE_VERSION:=753c351bc729967a691d99e27693be5aec334028 -PKG_MIRROR_HASH:=e7d95bde520fc660d0a49e28c5bb50fff3071d7f48fe7fc3fc610f38edfc7df1 +PKG_SOURCE_DATE:=2021-07-26 +PKG_SOURCE_VERSION:=440eb0647708274cc8d7d9e7c2bb0cfdfba90023 +PKG_MIRROR_HASH:=eed957036ab608fdc49bdf801fc5b4405fcd2a3a5e5d3343ec39898e156c10e9 PKG_MAINTAINER:=Felix Fietkau PKG_LICENSE:=GPL-2.0 diff --git a/openmptcprouter-full/Makefile b/openmptcprouter-full/Makefile index 3989acfb7..e759dc5ee 100755 --- a/openmptcprouter-full/Makefile +++ b/openmptcprouter-full/Makefile @@ -25,6 +25,7 @@ MY_DEPENDS := \ uhttpd \ uhttpd-mod-ubus \ curl \ + iperf3-ssl luci-app-iperf \ arptables \ bind-dig \ libnetfilter-conntrack ebtables ebtables-utils ip-full nstat \ @@ -44,6 +45,8 @@ MY_DEPENDS := \ luci-mod-rpc rpcd-mod-rpcsys rpcd-mod-file rpcd-mod-iwinfo \ luci-app-openvpn \ shadowsocks-libev-ss-server shadowsocks-libev-ss-tunnel \ + omr-6in4 ip6tables-mod-nat luci-proto-ipv6 6to4 6in4 6rd ip6tables \ + speedtestcpp \ iftop \ htop \ nano \ @@ -75,7 +78,7 @@ MY_DEPENDS := \ !TARGET_mvebu:kmod-usb-net-huawei-cdc-ncm !TARGET_mvebu:kmod-usb-net-rndis !TARGET_mvebu:kmod-usb-net-cdc-ether !TARGET_mvebu:kmod-usb-net-ipheth !TARGET_mvebu:usbmuxd \ kmod-rt2800-usb kmod-rtl8xxxu kmod-rtl8192cu kmod-net-rtl8192su kmod-rtl8812au-ct \ !TARGET_mvebu:luci-proto-qmi wpad-basic kmod-mt7601u kmod-rtl8187 TARGET_r4s:kmod-r8168 (TARGET_x86||TARGET_x86_64):kmod-usb-net-rtl8152 \ - luci-app-mlvpn mlvpn 464xlat !TARGET_mvebu:kmod-usb-net-smsc75xx kmod-zram kmod-swconfig swconfig kmod-ipt-nat kmod-tcp-nanqinlang (TARGET_x86_64||aarch64):kmod-tcp-bbr2 iptables-mod-ipopt igmpproxy ss iptraf-ng \ + luci-app-mlvpn mlvpn 464xlat !TARGET_mvebu:kmod-usb-net-smsc75xx kmod-zram kmod-swconfig swconfig kmod-ipt-nat kmod-ipt-nat6 luci-app-https-dns-proxy kmod-tcp-nanqinlang (TARGET_x86_64||aarch64):kmod-tcp-bbr2 iptables-mod-ipopt igmpproxy ss iptraf-ng \ luci-app-acl block-mount blockd fstools luci-app-shutdown libwebp luci-proto-gre tcptraceroute luci-proto-mbim kmod-rtl8xxxu kmod-ath9k-htc luci-app-ttyd luci-mod-dashboard (TARGET_x86||TARGET_x86_64):rtl8192eu-firmware kmod-usb2 libustream-openssl (TARGET_x86||TARGET_x86_64):kmod-ixgbevf (TARGET_x86||TARGET_x86_64):kmod-igbvf \ hwinfo (TARGET_x86||TARGET_x86_64):dmidecode luci-app-packet-capture kmod-bonding luci-proto-bonding luci-app-sysupgrade \ luci-theme-openwrt-2020 luci-proto-wireguard luci-app-wireguard (TARGET_x86||TARGET_x86_64):kmod-r8125 TARGET_x86_64:kmod-atlantic \ diff --git a/openmptcprouter/files/etc/uci-defaults/2060-omr-system b/openmptcprouter/files/etc/uci-defaults/2060-omr-system index 2ba0b41ae..4a4a37962 100755 --- a/openmptcprouter/files/etc/uci-defaults/2060-omr-system +++ b/openmptcprouter/files/etc/uci-defaults/2060-omr-system @@ -17,9 +17,9 @@ uci -q batch <<-EOF >/dev/null commit luci EOF -if [ "$(uci -q get rpcd.@rpcd[0].socket)" != "/var/run/ubus.sock" ]; then +if [ "$(uci -q get rpcd.@rpcd[0].socket)" != "/var/run/ubus/ubus.sock" ]; then uci -q batch <<-EOF >/dev/null - set rpcd.@rpcd[0].socket='/var/run/ubus.sock' + set rpcd.@rpcd[0].socket='/var/run/ubus/ubus.sock' commit rpcd EOF fi diff --git a/shadowsocks-v2ray-plugin/Makefile b/shadowsocks-v2ray-plugin/Makefile index e50a54e7c..5cc81054e 100755 --- a/shadowsocks-v2ray-plugin/Makefile +++ b/shadowsocks-v2ray-plugin/Makefile @@ -25,7 +25,7 @@ GO_PKG:=github.com/shadowsocks/v2ray-plugin PKG_USE_MIPS16:=0 include $(INCLUDE_DIR)/package.mk -include $(TOPDIR)/feeds/packages/lang/golang/golang-package.mk +include ../golang/golang-package.mk define Package/v2ray-plugin SECTION:=net diff --git a/shortcut-fe/Makefile b/shortcut-fe/Makefile new file mode 100755 index 000000000..54711c46a --- /dev/null +++ b/shortcut-fe/Makefile @@ -0,0 +1,77 @@ +# +# Copyright (c) 2014 The Linux Foundation. All rights reserved. +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all copies. +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +include $(TOPDIR)/rules.mk +include $(INCLUDE_DIR)/kernel.mk + +PKG_NAME:=shortcut-fe +PKG_RELEASE:=2 +PKG_CONFIG_DEPENDS := CONFIG_IPV6 + +include $(INCLUDE_DIR)/package.mk + +define KernelPackage/shortcut-fe + SECTION:=kernel + CATEGORY:=Kernel modules + SUBMENU:=Network Support + DEPENDS:= + TITLE:=Kernel driver for SFE + FILES:=$(PKG_BUILD_DIR)/shortcut-fe.ko $(if $(CONFIG_IPV6),$(PKG_BUILD_DIR)/shortcut-fe-ipv6.ko,) + KCONFIG:=CONFIG_NF_CONNTRACK_EVENTS=y \ + CONFIG_NF_CONNTRACK_TIMEOUT=y \ + CONFIG_SHORTCUT_FE=y \ + CONFIG_XFRM=y + AUTOLOAD:=$(call AutoLoad,09,shortcut-fe shortcut-fe-ipv6) +endef + +define KernelPackage/shortcut-fe/Description +Shortcut is an in-Linux-kernel IP packet forwarding engine. +endef + +define KernelPackage/shortcut-fe/install + $(INSTALL_DIR) $(1)/usr/bin + $(INSTALL_BIN) ./files/usr/bin/sfe_dump $(1)/usr/bin +endef + +define KernelPackage/shortcut-fe-cm + SECTION:=kernel + CATEGORY:=Kernel modules + SUBMENU:=Network Support + DEPENDS:=+kmod-ipt-conntrack +kmod-shortcut-fe + TITLE:=Kernel driver for SFE + FILES:=$(PKG_BUILD_DIR)/shortcut-fe-cm.ko + KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y +endef + +define KernelPackage/shortcut-fe-cm/Description +Simple connection manager for the Shortcut forwarding engine. +endef + +define Build/Compile + +$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" \ + $(KERNEL_MAKE_FLAGS) \ + $(PKG_MAKE_FLAGS) \ + M="$(PKG_BUILD_DIR)" \ + modules \ + $(if $(CONFIG_IPV6),EXTRA_CFLAGS="-DSFE_SUPPORT_IPV6" SFE_SUPPORT_IPV6=y,) +endef + +#ifneq ($(CONFIG_PACKAGE_kmod-shortcut-fe)$(CONFIG_PACKAGE_kmod-shortcut-fe-cm),) +define Build/InstallDev + $(INSTALL_DIR) $(1)/usr/include/shortcut-fe + $(CP) -rf $(PKG_BUILD_DIR)/sfe.h $(1)/usr/include/shortcut-fe +endef +#endif + +$(eval $(call KernelPackage,shortcut-fe)) +$(eval $(call KernelPackage,shortcut-fe-cm)) diff --git a/shortcut-fe/files/usr/bin/sfe_dump b/shortcut-fe/files/usr/bin/sfe_dump new file mode 100755 index 000000000..2a224e0ca --- /dev/null +++ b/shortcut-fe/files/usr/bin/sfe_dump @@ -0,0 +1,35 @@ +#!/bin/sh +# +# Copyright (c) 2015 The Linux Foundation. All rights reserved. +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all copies. +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +#@sfe_dump +#@example : sfe_dump (ipv4|ipv6) +sfe_dump(){ + [ -e "/dev/sfe_ipv4" ] || { + dev_num=$(cat /sys/sfe_ipv4/debug_dev) + mknod /dev/sfe_ipv4 c $dev_num 0 + } + [ -e "/dev/sfe_ipv6" ] || { + dev_num=$(cat /sys/sfe_ipv6/debug_dev) + mknod /dev/sfe_ipv6 c $dev_num 0 + } + cat /dev/sfe_$1 +} + +if [ -z "$1" ]; then + sfe_dump ipv4 + sfe_dump ipv6 +else + sfe_dump $1 +fi diff --git a/shortcut-fe/src/Kconfig b/shortcut-fe/src/Kconfig new file mode 100755 index 000000000..f45e56b47 --- /dev/null +++ b/shortcut-fe/src/Kconfig @@ -0,0 +1,15 @@ +# +# Shortcut forwarding engine +# + +config SHORTCUT_FE + tristate "Shortcut Forwarding Engine" + depends on NF_CONNTRACK + default n + help + Shortcut is a fast in-kernel packet forwarding engine. + + To compile this code as a module, choose M here: the module will be + called shortcut-fe. + + If unsure, say N. diff --git a/shortcut-fe/src/Makefile b/shortcut-fe/src/Makefile new file mode 100755 index 000000000..3b1ceaa44 --- /dev/null +++ b/shortcut-fe/src/Makefile @@ -0,0 +1,23 @@ +# +# Makefile for Shortcut FE. +# + +obj-m += shortcut-fe.o + +ifdef SFE_SUPPORT_IPV6 +obj-m += shortcut-fe-ipv6.o +endif + +obj-m += shortcut-fe-cm.o + +shortcut-fe-objs := \ + sfe_ipv4.o + +ifdef SFE_SUPPORT_IPV6 +shortcut-fe-ipv6-objs := \ + sfe_ipv6.o +endif + +shortcut-fe-cm-objs := \ + sfe_cm.o + diff --git a/shortcut-fe/src/sfe.h b/shortcut-fe/src/sfe.h new file mode 100755 index 000000000..279e7b3dc --- /dev/null +++ b/shortcut-fe/src/sfe.h @@ -0,0 +1,114 @@ +/* + * sfe.h + * Shortcut forwarding engine. + * + * Copyright (c) 2013-2017 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + + +/* + * The following are debug macros used throughout the SFE. + * + * The DEBUG_LEVEL enables the followings based on its value, + * when dynamic debug option is disabled. + * + * 0 = OFF + * 1 = ASSERTS / ERRORS + * 2 = 1 + WARN + * 3 = 2 + INFO + * 4 = 3 + TRACE + */ +#define DEBUG_LEVEL 2 + +#if (DEBUG_LEVEL < 1) +#define DEBUG_ASSERT(s, ...) +#define DEBUG_ERROR(s, ...) +#else +#define DEBUG_ASSERT(c, s, ...) if (!(c)) { pr_emerg("ASSERT: %s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__); BUG(); } +#define DEBUG_ERROR(s, ...) pr_err("%s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* + * Compile messages for dynamic enable/disable + */ +#define DEBUG_WARN(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#define DEBUG_INFO(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#define DEBUG_TRACE(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#else + +/* + * Statically compile messages at different levels + */ +#if (DEBUG_LEVEL < 2) +#define DEBUG_WARN(s, ...) +#else +#define DEBUG_WARN(s, ...) pr_warn("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if (DEBUG_LEVEL < 3) +#define DEBUG_INFO(s, ...) +#else +#define DEBUG_INFO(s, ...) pr_notice("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif + +#if (DEBUG_LEVEL < 4) +#define DEBUG_TRACE(s, ...) +#else +#define DEBUG_TRACE(s, ...) pr_info("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#endif +#endif + +#ifdef CONFIG_NF_FLOW_COOKIE +typedef int (*flow_cookie_set_func_t)(u32 protocol, __be32 src_ip, __be16 src_port, + __be32 dst_ip, __be16 dst_port, u16 flow_cookie); +/* + * sfe_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb); + +/* + * sfe_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb); + +typedef int (*sfe_ipv6_flow_cookie_set_func_t)(u32 protocol, __be32 src_ip[4], __be16 src_port, + __be32 dst_ip[4], __be16 dst_port, u16 flow_cookie); + +/* + * sfe_ipv6_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); + +/* + * sfe_ipv6_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb); + +#endif /*CONFIG_NF_FLOW_COOKIE*/ diff --git a/shortcut-fe/src/sfe_backport.h b/shortcut-fe/src/sfe_backport.h new file mode 100755 index 000000000..2f8c8ca3c --- /dev/null +++ b/shortcut-fe/src/sfe_backport.h @@ -0,0 +1,195 @@ +/* + * sfe_backport.h + * Shortcut forwarding engine compatible header file. + * + * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) +#include +#else +enum udp_conntrack { + UDP_CT_UNREPLIED, + UDP_CT_REPLIED, + UDP_CT_MAX +}; + +static inline unsigned int * +nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, + struct nf_conntrack_l4proto *l4proto) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + struct nf_conn_timeout *timeout_ext; + unsigned int *timeouts; + + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + + return timeouts; +#else + return l4proto->get_timeouts(net); +#endif /*CONFIG_NF_CONNTRACK_TIMEOUT*/ +} +#endif /*KERNEL_VERSION(3, 7, 0)*/ +#endif /*KERNEL_VERSION(3, 4, 0)*/ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(void *priv, \ + struct sk_buff *SKB, \ + const struct nf_hook_state *state) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(const struct nf_hook_ops *OPS, \ + struct sk_buff *SKB, \ + const struct net_device *UNUSED, \ + const struct net_device *OUT, \ + int (*OKFN)(struct sk_buff *)) +#else +#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ +static unsigned int FN_NAME(unsigned int HOOKNUM, \ + struct sk_buff *SKB, \ + const struct net_device *UNUSED, \ + const struct net_device *OUT, \ + int (*OKFN)(struct sk_buff *)) +#endif + +#define sfe_cm_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__sfe_cm_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define sfe_cm_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__sfe_cm_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define fast_classifier_ipv4_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__fast_classifier_ipv4_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) +#define fast_classifier_ipv6_post_routing_hook(HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \ + sfe_define_post_routing_hook(__fast_classifier_ipv6_post_routing_hook, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .pf = NFPROTO_IPV4, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#else +#define SFE_IPV4_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .owner = THIS_MODULE, \ + .pf = NFPROTO_IPV4, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .pf = NFPROTO_IPV6, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP_PRI_NAT_SRC + 1, \ + } +#else +#define SFE_IPV6_NF_POST_ROUTING_HOOK(fn) \ + { \ + .hook = fn, \ + .owner = THIS_MODULE, \ + .pf = NFPROTO_IPV6, \ + .hooknum = NF_INET_POST_ROUTING, \ + .priority = NF_IP6_PRI_NAT_SRC + 1, \ + } +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)) +#define SFE_NF_CT_DEFAULT_ZONE (&nf_ct_zone_dflt) +#else +#define SFE_NF_CT_DEFAULT_ZONE NF_CT_DEFAULT_ZONE +#endif + +/* + * sfe_dev_get_master + * get master of bridge port, and hold it + */ +static inline struct net_device *sfe_dev_get_master(struct net_device *dev) +{ + struct net_device *master; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + if (master) + dev_hold(master); + + rcu_read_unlock(); +#else + master = dev->master; + if (master) + dev_hold(master); +#endif + return master; +} + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) +#define SFE_DEV_EVENT_PTR(PTR) netdev_notifier_info_to_dev(PTR) +#else +#define SFE_DEV_EVENT_PTR(PTR) (struct net_device *)(PTR) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define SFE_NF_CONN_ACCT(NM) struct nf_conn_acct *NM +#else +#define SFE_NF_CONN_ACCT(NM) struct nf_conn_counter *NM +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#define SFE_ACCT_COUNTER(NM) ((NM)->counter) +#else +#define SFE_ACCT_COUNTER(NM) (NM) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#define sfe_hash_for_each_possible(name, obj, node, member, key) \ + hash_for_each_possible(name, obj, member, key) +#else +#define sfe_hash_for_each_possible(name, obj, node, member, key) \ + hash_for_each_possible(name, obj, node, member, key) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +#define sfe_hash_for_each(name, bkt, node, obj, member) \ + hash_for_each(name, bkt, obj, member) +#else +#define sfe_hash_for_each(name, bkt, node, obj, member) \ + hash_for_each(name, bkt, node, obj, member) +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) +#define sfe_dst_get_neighbour(dst, daddr) dst_neigh_lookup(dst, daddr) +#else +static inline struct neighbour * +sfe_dst_get_neighbour(struct dst_entry *dst, void *daddr) +{ + struct neighbour *neigh = dst_get_neighbour_noref(dst); + + if (neigh) + neigh_hold(neigh); + + return neigh; +} +#endif diff --git a/shortcut-fe/src/sfe_cm.c b/shortcut-fe/src/sfe_cm.c new file mode 100755 index 000000000..18f3475e5 --- /dev/null +++ b/shortcut-fe/src/sfe_cm.c @@ -0,0 +1,1146 @@ +/* + * sfe-cm.c + * Shortcut forwarding engine connection manager. + * + * Copyright (c) 2013-2018 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sfe.h" +#include "sfe_cm.h" +#include "sfe_backport.h" + +typedef enum sfe_cm_exception { + SFE_CM_EXCEPTION_PACKET_BROADCAST, + SFE_CM_EXCEPTION_PACKET_MULTICAST, + SFE_CM_EXCEPTION_NO_IIF, + SFE_CM_EXCEPTION_NO_CT, + SFE_CM_EXCEPTION_CT_NO_TRACK, + SFE_CM_EXCEPTION_CT_NO_CONFIRM, + SFE_CM_EXCEPTION_CT_IS_ALG, + SFE_CM_EXCEPTION_IS_IPV4_MCAST, + SFE_CM_EXCEPTION_IS_IPV6_MCAST, + SFE_CM_EXCEPTION_TCP_NOT_ASSURED, + SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED, + SFE_CM_EXCEPTION_UNKNOW_PROTOCOL, + SFE_CM_EXCEPTION_NO_SRC_DEV, + SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV, + SFE_CM_EXCEPTION_NO_DEST_DEV, + SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV, + SFE_CM_EXCEPTION_NO_BRIDGE, + SFE_CM_EXCEPTION_LOCAL_OUT, + SFE_CM_EXCEPTION_MAX +} sfe_cm_exception_t; + +static char *sfe_cm_exception_events_string[SFE_CM_EXCEPTION_MAX] = { + "PACKET_BROADCAST", + "PACKET_MULTICAST", + "NO_IIF", + "NO_CT", + "CT_NO_TRACK", + "CT_NO_CONFIRM", + "CT_IS_ALG", + "IS_IPV4_MCAST", + "IS_IPV6_MCAST", + "TCP_NOT_ASSURED", + "TCP_NOT_ESTABLISHED", + "UNKNOW_PROTOCOL", + "NO_SRC_DEV", + "NO_SRC_XLATE_DEV", + "NO_DEST_DEV", + "NO_DEST_XLATE_DEV", + "NO_BRIDGE", + "LOCAL_OUT" +}; + +/* + * Per-module structure. + */ +struct sfe_cm { + spinlock_t lock; /* Lock for SMP correctness */ + + /* + * Control state. + */ + struct kobject *sys_sfe_cm; /* sysfs linkage */ + + /* + * Callback notifiers. + */ + struct notifier_block dev_notifier; /* Device notifier */ + struct notifier_block inet_notifier; /* IPv4 notifier */ + struct notifier_block inet6_notifier; /* IPv6 notifier */ + u32 exceptions[SFE_CM_EXCEPTION_MAX]; +}; + +static struct sfe_cm __sc; + +/* + * sfe_cm_incr_exceptions() + * increase an exception counter. + */ +static inline void sfe_cm_incr_exceptions(sfe_cm_exception_t except) +{ + struct sfe_cm *sc = &__sc; + + spin_lock_bh(&sc->lock); + sc->exceptions[except]++; + spin_unlock_bh(&sc->lock); +} + +/* + * sfe_cm_recv() + * Handle packet receives. + * + * Returns 1 if the packet is forwarded or 0 if it isn't. + */ +int sfe_cm_recv(struct sk_buff *skb) +{ + struct net_device *dev; + + /* + * We know that for the vast majority of packets we need the transport + * layer header so we may as well start to fetch it now! + */ + prefetch(skb->data + 32); + barrier(); + + dev = skb->dev; + + /* + * We're only interested in IPv4 and IPv6 packets. + */ + if (likely(htons(ETH_P_IP) == skb->protocol)) { + struct in_device *in_dev; + + /* + * Does our input device support IP processing? + */ + in_dev = (struct in_device *)dev->ip_ptr; + if (unlikely(!in_dev)) { + DEBUG_TRACE("no IP processing for device: %s\n", dev->name); + return 0; + } + + /* + * Does it have an IP address? If it doesn't then we can't do anything + * interesting here! + */ + if (unlikely(!in_dev->ifa_list)) { + DEBUG_TRACE("no IP address for device: %s\n", dev->name); + return 0; + } + + return sfe_ipv4_recv(dev, skb); + } + + if (likely(htons(ETH_P_IPV6) == skb->protocol)) { + struct inet6_dev *in_dev; + + /* + * Does our input device support IPv6 processing? + */ + in_dev = (struct inet6_dev *)dev->ip6_ptr; + if (unlikely(!in_dev)) { + DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name); + return 0; + } + + /* + * Does it have an IPv6 address? If it doesn't then we can't do anything + * interesting here! + */ + if (unlikely(list_empty(&in_dev->addr_list))) { + DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name); + return 0; + } + + return sfe_ipv6_recv(dev, skb); + } + + DEBUG_TRACE("not IP packet\n"); + return 0; +} + +/* + * sfe_cm_find_dev_and_mac_addr() + * Find the device and MAC address for a given IPv4/IPv6 address. + * + * Returns true if we find the device and MAC address, otherwise false. + * + * We look up the rtable entry for the address and, from its neighbour + * structure, obtain the hardware address. This means this function also + * works if the neighbours are routers too. + */ +static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, int is_v4) +{ + struct neighbour *neigh; + struct rtable *rt; + struct rt6_info *rt6; + struct dst_entry *dst; + struct net_device *mac_dev; + + /* + * Look up the rtable entry for the IP address then get the hardware + * address from its neighbour structure. This means this work when the + * neighbours are routers too. + */ + if (likely(is_v4)) { + rt = ip_route_output(&init_net, addr->ip, 0, 0, 0); + if (unlikely(IS_ERR(rt))) { + goto ret_fail; + } + + dst = (struct dst_entry *)rt; + } else { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)) + rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0); +#else + rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0); +#endif /*KERNEL_VERSION(4, 17, 0)*/ + if (!rt6) { + goto ret_fail; + } + + dst = (struct dst_entry *)rt6; + } + + rcu_read_lock(); + neigh = sfe_dst_get_neighbour(dst, addr); + if (unlikely(!neigh)) { + rcu_read_unlock(); + dst_release(dst); + goto ret_fail; + } + + if (unlikely(!(neigh->nud_state & NUD_VALID))) { + rcu_read_unlock(); + neigh_release(neigh); + dst_release(dst); + goto ret_fail; + } + + mac_dev = neigh->dev; + if (!mac_dev) { + rcu_read_unlock(); + neigh_release(neigh); + dst_release(dst); + goto ret_fail; + } + + memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len); + + dev_hold(mac_dev); + *dev = mac_dev; + rcu_read_unlock(); + neigh_release(neigh); + dst_release(dst); + + return true; + +ret_fail: + if (is_v4) { + DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", &addr->ip); + + } else { + DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr->ip6); + } + + return false; +} + +/* + * sfe_cm_post_routing() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4) +{ + struct sfe_connection_create sic; + struct net_device *in; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct net_device *dev; + struct net_device *src_dev; + struct net_device *dest_dev; + struct net_device *src_dev_tmp; + struct net_device *dest_dev_tmp; + struct net_device *src_br_dev = NULL; + struct net_device *dest_br_dev = NULL; + struct nf_conntrack_tuple orig_tuple; + struct nf_conntrack_tuple reply_tuple; + SFE_NF_CONN_ACCT(acct); + + /* + * Don't process broadcast or multicast packets. + */ + if (unlikely(skb->pkt_type == PACKET_BROADCAST)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_BROADCAST); + DEBUG_TRACE("broadcast, ignoring\n"); + return NF_ACCEPT; + } + if (unlikely(skb->pkt_type == PACKET_MULTICAST)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_PACKET_MULTICAST); + DEBUG_TRACE("multicast, ignoring\n"); + return NF_ACCEPT; + } + +#ifdef CONFIG_XFRM + /* + * Packet to xfrm for encapsulation, we can't process it + */ + if (unlikely(skb_dst(skb)->xfrm)) { + DEBUG_TRACE("packet to xfrm, ignoring\n"); + return NF_ACCEPT; + } +#endif + + /* + * Don't process locally generated packets. + */ + if (skb->sk) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_LOCAL_OUT); + DEBUG_TRACE("skip local out packet\n"); + return NF_ACCEPT; + } + + /* + * Don't process packets that are not being forwarded. + */ + in = dev_get_by_index(&init_net, skb->skb_iif); + if (!in) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_IIF); + DEBUG_TRACE("packet not forwarding\n"); + return NF_ACCEPT; + } + + dev_put(in); + + /* + * Don't process packets that aren't being tracked by conntrack. + */ + ct = nf_ct_get(skb, &ctinfo); + if (unlikely(!ct)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_CT); + DEBUG_TRACE("no conntrack connection, ignoring\n"); + return NF_ACCEPT; + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) + /* + * Don't process untracked connections. + */ + if (unlikely(nf_ct_is_untracked(ct))) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_TRACK); + DEBUG_TRACE("untracked connection\n"); + return NF_ACCEPT; + } +#endif /*KERNEL_VERSION(4, 12, 0)*/ + + /* + * Unconfirmed connection may be dropped by Linux at the final step, + * So we don't process unconfirmed connections. + */ + if (!nf_ct_is_confirmed(ct)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_NO_CONFIRM); + DEBUG_TRACE("unconfirmed connection\n"); + return NF_ACCEPT; + } + + /* + * Don't process connections that require support from a 'helper' (typically a NAT ALG). + */ + if (unlikely(nfct_help(ct))) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_CT_IS_ALG); + DEBUG_TRACE("connection has helper\n"); + return NF_ACCEPT; + } + + /* + * Check if the acceleration of a flow could be rejected quickly. + */ + acct = nf_conn_acct_find(ct); + if (acct) { + long long packets = atomic64_read(&SFE_ACCT_COUNTER(acct)[CTINFO2DIR(ctinfo)].packets); + if ((packets > 0xff) && (packets & 0xff)) { + /* + * Connection hits slow path at least 256 times, so it must be not able to accelerate. + * But we also give it a chance to walk through ECM every 256 packets + */ + return NF_ACCEPT; + } + } + + /* + * Look up the details of our connection in conntrack. + * + * Note that the data we get from conntrack is for the "ORIGINAL" direction + * but our packet may actually be in the "REPLY" direction. + */ + orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + sic.protocol = (s32)orig_tuple.dst.protonum; + + sic.flags = 0; + + /* + * Get addressing information, non-NAT first + */ + if (likely(is_v4)) { + u32 dscp; + + sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip; + sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; + + if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV4_MCAST); + DEBUG_TRACE("multicast address\n"); + return NF_ACCEPT; + } + + /* + * NAT'ed addresses - note these are as seen from the 'reply' direction + * When NAT does not apply to this connection these will be identical to the above. + */ + sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip; + sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip; + + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; + if (dscp) { + sic.dest_dscp = dscp; + sic.src_dscp = sic.dest_dscp; + sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; + } + } else { + u32 dscp; + + sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); + sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); + + if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) || + ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_IS_IPV6_MCAST); + DEBUG_TRACE("multicast address\n"); + return NF_ACCEPT; + } + + /* + * NAT'ed addresses - note these are as seen from the 'reply' direction + * When NAT does not apply to this connection these will be identical to the above. + */ + sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6); + sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6); + + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; + if (dscp) { + sic.dest_dscp = dscp; + sic.src_dscp = sic.dest_dscp; + sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP; + } + } + + switch (sic.protocol) { + case IPPROTO_TCP: + sic.src_port = orig_tuple.src.u.tcp.port; + sic.dest_port = orig_tuple.dst.u.tcp.port; + sic.src_port_xlate = reply_tuple.dst.u.tcp.port; + sic.dest_port_xlate = reply_tuple.src.u.tcp.port; + sic.src_td_window_scale = ct->proto.tcp.seen[0].td_scale; + sic.src_td_max_window = ct->proto.tcp.seen[0].td_maxwin; + sic.src_td_end = ct->proto.tcp.seen[0].td_end; + sic.src_td_max_end = ct->proto.tcp.seen[0].td_maxend; + sic.dest_td_window_scale = ct->proto.tcp.seen[1].td_scale; + sic.dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin; + sic.dest_td_end = ct->proto.tcp.seen[1].td_end; + sic.dest_td_max_end = ct->proto.tcp.seen[1].td_maxend; + + if (nf_ct_tcp_no_window_check + || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL) + || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) { + sic.flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK; + } + + /* + * Don't try to manage a non-established connection. + */ + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ASSURED); + DEBUG_TRACE("non-established connection\n"); + return NF_ACCEPT; + } + + /* + * If the connection is shutting down do not manage it. + * state can not be SYN_SENT, SYN_RECV because connection is assured + * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE. + */ + spin_lock_bh(&ct->lock); + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) { + spin_unlock_bh(&ct->lock); + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_TCP_NOT_ESTABLISHED); + DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n", + ct->proto.tcp.state, &sic.src_ip, ntohs(sic.src_port), + &sic.dest_ip, ntohs(sic.dest_port)); + return NF_ACCEPT; + } + spin_unlock_bh(&ct->lock); + break; + + case IPPROTO_UDP: + sic.src_port = orig_tuple.src.u.udp.port; + sic.dest_port = orig_tuple.dst.u.udp.port; + sic.src_port_xlate = reply_tuple.dst.u.udp.port; + sic.dest_port_xlate = reply_tuple.src.u.udp.port; + break; + + default: + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_UNKNOW_PROTOCOL); + DEBUG_TRACE("unhandled protocol %d\n", sic.protocol); + return NF_ACCEPT; + } + +#ifdef CONFIG_XFRM + sic.original_accel = 1; + sic.reply_accel = 1; + + /* + * For packets de-capsulated from xfrm, we still can accelerate it + * on the direction we just received the packet. + */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)) + if (unlikely(skb_ext_exist(skb, SKB_EXT_SEC_PATH))) { +#else + if (unlikely(skb->sp)) { +#endif + if (sic.protocol == IPPROTO_TCP && + !(sic.flags & SFE_CREATE_FLAG_NO_SEQ_CHECK)) { + return NF_ACCEPT; + } + + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + sic.reply_accel = 0; + } else { + sic.original_accel = 0; + } + } +#endif + + /* + * Get QoS information + */ + if (skb->priority) { + sic.dest_priority = skb->priority; + sic.src_priority = sic.dest_priority; + sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY; + } + + /* + * Get the net device and MAC addresses that correspond to the various source and + * destination host addresses. + */ + if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_DEV); + return NF_ACCEPT; + } + src_dev = src_dev_tmp; + + if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV); + goto done1; + } + dev_put(dev); + + if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_DEV); + goto done1; + } + dev_put(dev); + + if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV); + goto done1; + } + dest_dev = dest_dev_tmp; + + /* + * Our devices may actually be part of a bridge interface. If that's + * the case then find the bridge interface instead. + */ + if (src_dev->priv_flags & IFF_BRIDGE_PORT) { + src_br_dev = sfe_dev_get_master(src_dev); + if (!src_br_dev) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); + DEBUG_TRACE("no bridge found for: %s\n", src_dev->name); + goto done2; + } + src_dev = src_br_dev; + } + + if (dest_dev->priv_flags & IFF_BRIDGE_PORT) { + dest_br_dev = sfe_dev_get_master(dest_dev); + if (!dest_br_dev) { + sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_BRIDGE); + DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name); + goto done3; + } + dest_dev = dest_br_dev; + } + + sic.src_dev = src_dev; + sic.dest_dev = dest_dev; + + sic.src_mtu = src_dev->mtu; + sic.dest_mtu = dest_dev->mtu; + + if (likely(is_v4)) { + sfe_ipv4_create_rule(&sic); + } else { + sfe_ipv6_create_rule(&sic); + } + + /* + * If we had bridge ports then release them too. + */ + if (dest_br_dev) { + dev_put(dest_br_dev); + } +done3: + if (src_br_dev) { + dev_put(src_br_dev); + } +done2: + dev_put(dest_dev_tmp); +done1: + dev_put(src_dev_tmp); + + return NF_ACCEPT; +} + +/* + * sfe_cm_ipv4_post_routing_hook() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +sfe_cm_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) +{ + return sfe_cm_post_routing(skb, true); +} + +/* + * sfe_cm_ipv6_post_routing_hook() + * Called for packets about to leave the box - either locally generated or forwarded from another interface + */ +sfe_cm_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn) +{ + return sfe_cm_post_routing(skb, false); +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +/* + * sfe_cm_conntrack_event() + * Callback event invoked when a conntrack connection's state changes. + */ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS +static int sfe_cm_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +#else +static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item) +#endif +{ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + struct nf_ct_event *item = ptr; +#endif + struct sfe_connection_destroy sid; + struct nf_conn *ct = item->ct; + struct nf_conntrack_tuple orig_tuple; + + /* + * If we don't have a conntrack entry then we're done. + */ + if (unlikely(!ct)) { + DEBUG_WARN("no ct in conntrack event callback\n"); + return NOTIFY_DONE; + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)) + /* + * If this is an untracked connection then we can't have any state either. + */ + if (unlikely(nf_ct_is_untracked(ct))) { + DEBUG_TRACE("ignoring untracked conn\n"); + return NOTIFY_DONE; + } +#endif /*KERNEL_VERSION(4, 12, 0)*/ + + /* + * We're only interested in destroy events. + */ + if (unlikely(!(events & (1 << IPCT_DESTROY)))) { + DEBUG_TRACE("ignoring non-destroy event\n"); + return NOTIFY_DONE; + } + + orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + sid.protocol = (s32)orig_tuple.dst.protonum; + + /* + * Extract information from the conntrack connection. We're only interested + * in nominal connection information (i.e. we're ignoring any NAT information). + */ + switch (sid.protocol) { + case IPPROTO_TCP: + sid.src_port = orig_tuple.src.u.tcp.port; + sid.dest_port = orig_tuple.dst.u.tcp.port; + break; + + case IPPROTO_UDP: + sid.src_port = orig_tuple.src.u.udp.port; + sid.dest_port = orig_tuple.dst.u.udp.port; + break; + + default: + DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol); + return NOTIFY_DONE; + } + + if (likely(nf_ct_l3num(ct) == AF_INET)) { + sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip; + sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip; + + sfe_ipv4_destroy_rule(&sid); + } else if (likely(nf_ct_l3num(ct) == AF_INET6)) { + sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6); + sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6); + + sfe_ipv6_destroy_rule(&sid); + } else { + DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n"); + } + + return NOTIFY_DONE; +} + +/* + * Netfilter conntrack event system to monitor connection tracking changes + */ +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS +static struct notifier_block sfe_cm_conntrack_notifier = { + .notifier_call = sfe_cm_conntrack_event, +}; +#else +static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = { + .fcn = sfe_cm_conntrack_event, +}; +#endif +#endif + +/* + * Structure to establish a hook into the post routing netfilter point - this + * will pick up local outbound and packets going from one interface to another. + * + * Note: see include/linux/netfilter_ipv4.h for info related to priority levels. + * We want to examine packets after NAT translation and any ALG processing. + */ +static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = { + SFE_IPV4_NF_POST_ROUTING_HOOK(__sfe_cm_ipv4_post_routing_hook), +#ifdef SFE_SUPPORT_IPV6 + SFE_IPV6_NF_POST_ROUTING_HOOK(__sfe_cm_ipv6_post_routing_hook), +#endif +}; + +/* + * sfe_cm_sync_rule() + * Synchronize a connection's state. + */ +static void sfe_cm_sync_rule(struct sfe_connection_sync *sis) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + SFE_NF_CONN_ACCT(acct); + + /* + * Create a tuple so as to be able to look up a connection + */ + memset(&tuple, 0, sizeof(tuple)); + tuple.src.u.all = (__be16)sis->src_port; + tuple.dst.dir = IP_CT_DIR_ORIGINAL; + tuple.dst.protonum = (u8)sis->protocol; + tuple.dst.u.all = (__be16)sis->dest_port; + + if (sis->is_v6) { + tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6); + tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6); + tuple.src.l3num = AF_INET6; + + DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n", + (int)tuple.dst.protonum, + &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all), + &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all)); + } else { + tuple.src.u3.ip = sis->src_ip.ip; + tuple.dst.u3.ip = sis->dest_ip.ip; + tuple.src.l3num = AF_INET; + + DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n", + (int)tuple.dst.protonum, + &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all), + &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all)); + } + + /* + * Look up conntrack connection + */ + h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple); + if (unlikely(!h)) { + DEBUG_TRACE("no connection found\n"); + return; + } + + ct = nf_ct_tuplehash_to_ctrack(h); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) + NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); +#endif /*KERNEL_VERSION(4, 9, 0)*/ + + /* + * Only update if this is not a fixed timeout + */ + if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_lock_bh(&ct->lock); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) + ct->timeout += sis->delta_jiffies; +#else + ct->timeout.expires += sis->delta_jiffies; +#endif /*KERNEL_VERSION(4, 9, 0)*/ + spin_unlock_bh(&ct->lock); + } + + acct = nf_conn_acct_find(ct); + if (acct) { + spin_lock_bh(&ct->lock); + atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets); + atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes); + atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); + atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes); + spin_unlock_bh(&ct->lock); + } + + switch (sis->protocol) { + case IPPROTO_TCP: + spin_lock_bh(&ct->lock); + if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) { + ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window; + } + if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) { + ct->proto.tcp.seen[0].td_end = sis->src_td_end; + } + if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) { + ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end; + } + if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) { + ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window; + } + if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) { + ct->proto.tcp.seen[1].td_end = sis->dest_td_end; + } + if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) { + ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end; + } + spin_unlock_bh(&ct->lock); + break; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)) + case IPPROTO_UDP: + /* + * In Linux connection track, UDP flow has two timeout values: + * /proc/sys/net/netfilter/nf_conntrack_udp_timeout: + * this is for uni-direction UDP flow, normally its value is 60 seconds + * /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream: + * this is for bi-direction UDP flow, normally its value is 180 seconds + * + * Linux will update timer of UDP flow to stream timeout once it seen packets + * in reply direction. But if flow is accelerated by NSS or SFE, Linux won't + * see any packets. So we have to do the same thing in our stats sync message. + */ + if (!test_bit(IPS_ASSURED_BIT, &ct->status) && acct) { + u_int64_t reply_pkts = atomic64_read(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets); + + if (reply_pkts != 0) { + unsigned int *timeouts; + + set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + set_bit(IPS_ASSURED_BIT, &ct->status); + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) + timeouts = nf_ct_timeout_lookup(ct); +#else + struct nf_conntrack_l4proto *l4proto; + + l4proto = __nf_ct_l4proto_find((sis->is_v6 ? AF_INET6 : AF_INET), IPPROTO_UDP); + timeouts = nf_ct_timeout_lookup(&init_net, ct, l4proto); +#endif /*KERNEL_VERSION(4, 19, 0)*/ + + spin_lock_bh(&ct->lock); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) + ct->timeout = jiffies + timeouts[UDP_CT_REPLIED]; +#else + ct->timeout.expires = jiffies + timeouts[UDP_CT_REPLIED]; +#endif /*KERNEL_VERSION(4, 9, 0)*/ + spin_unlock_bh(&ct->lock); + } + } + break; +#endif /*KERNEL_VERSION(3, 4, 0)*/ + } + + /* + * Release connection + */ + nf_ct_put(ct); +} + +/* + * sfe_cm_device_event() + */ +int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = SFE_DEV_EVENT_PTR(ptr); + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv4_destroy_all_rules_for_dev(dev); + sfe_ipv6_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * sfe_cm_inet_event() + */ +static int sfe_cm_inet_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv4_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * sfe_cm_inet6_event() + */ +static int sfe_cm_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev; + + if (dev && (event == NETDEV_DOWN)) { + sfe_ipv6_destroy_all_rules_for_dev(dev); + } + + return NOTIFY_DONE; +} + +/* + * sfe_cm_get_exceptions + * dump exception counters + */ +static ssize_t sfe_cm_get_exceptions(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int idx, len; + struct sfe_cm *sc = &__sc; + + spin_lock_bh(&sc->lock); + for (len = 0, idx = 0; idx < SFE_CM_EXCEPTION_MAX; idx++) { + if (sc->exceptions[idx]) { + len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", sfe_cm_exception_events_string[idx], sc->exceptions[idx]); + } + } + spin_unlock_bh(&sc->lock); + + return len; +} + +/* + * sysfs attributes. + */ +static const struct device_attribute sfe_cm_exceptions_attr = + __ATTR(exceptions, S_IRUGO, sfe_cm_get_exceptions, NULL); + +/* + * sfe_cm_init() + */ +static int __init sfe_cm_init(void) +{ + struct sfe_cm *sc = &__sc; + int result = -1; + + DEBUG_INFO("SFE CM init\n"); + + /* + * Create sys/sfe_cm + */ + sc->sys_sfe_cm = kobject_create_and_add("sfe_cm", NULL); + if (!sc->sys_sfe_cm) { + DEBUG_ERROR("failed to register sfe_cm\n"); + goto exit1; + } + + /* + * Create sys/sfe_cm/exceptions + */ + result = sysfs_create_file(sc->sys_sfe_cm, &sfe_cm_exceptions_attr.attr); + if (result) { + DEBUG_ERROR("failed to register exceptions file: %d\n", result); + goto exit2; + } + + sc->dev_notifier.notifier_call = sfe_cm_device_event; + sc->dev_notifier.priority = 1; + register_netdevice_notifier(&sc->dev_notifier); + + sc->inet_notifier.notifier_call = sfe_cm_inet_event; + sc->inet_notifier.priority = 1; + register_inetaddr_notifier(&sc->inet_notifier); + + sc->inet6_notifier.notifier_call = sfe_cm_inet6_event; + sc->inet6_notifier.priority = 1; + register_inet6addr_notifier(&sc->inet6_notifier); + /* + * Register our netfilter hooks. + */ + result = nf_register_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); + if (result < 0) { + DEBUG_ERROR("can't register nf post routing hook: %d\n", result); + goto exit3; + } + + /* + * Register a notifier hook to get fast notifications of expired connections. + * Note: In CONFIG_NF_CONNTRACK_CHAIN_EVENTS enabled case, nf_conntrack_register_notifier() + * function always returns 0. + */ +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + (void)nf_conntrack_register_chain_notifier(&init_net, &sfe_cm_conntrack_notifier); +#else + result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier); + if (result < 0) { + DEBUG_ERROR("can't register nf notifier hook: %d\n", result); + goto exit4; + } +#endif +#endif + + spin_lock_init(&sc->lock); + + /* + * Hook the receive path in the network stack. + */ + BUG_ON(athrs_fast_nat_recv); + RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv); + + /* + * Hook the shortcut sync callback. + */ + sfe_ipv4_register_sync_rule_callback(sfe_cm_sync_rule); + sfe_ipv6_register_sync_rule_callback(sfe_cm_sync_rule); + return 0; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS +exit4: + nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); +#endif +#endif +exit3: + unregister_inet6addr_notifier(&sc->inet6_notifier); + unregister_inetaddr_notifier(&sc->inet_notifier); + unregister_netdevice_notifier(&sc->dev_notifier); +exit2: + kobject_put(sc->sys_sfe_cm); + +exit1: + return result; +} + +/* + * sfe_cm_exit() + */ +static void __exit sfe_cm_exit(void) +{ + struct sfe_cm *sc = &__sc; + + DEBUG_INFO("SFE CM exit\n"); + + /* + * Unregister our sync callback. + */ + sfe_ipv4_register_sync_rule_callback(NULL); + sfe_ipv6_register_sync_rule_callback(NULL); + + /* + * Unregister our receive callback. + */ + RCU_INIT_POINTER(athrs_fast_nat_recv, NULL); + + /* + * Wait for all callbacks to complete. + */ + rcu_barrier(); + + /* + * Destroy all connections. + */ + sfe_ipv4_destroy_all_rules_for_dev(NULL); + sfe_ipv6_destroy_all_rules_for_dev(NULL); + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS + nf_conntrack_unregister_chain_notifier(&init_net, &sfe_cm_conntrack_notifier); +#else + nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier); +#endif +#endif + nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing)); + + unregister_inet6addr_notifier(&sc->inet6_notifier); + unregister_inetaddr_notifier(&sc->inet_notifier); + unregister_netdevice_notifier(&sc->dev_notifier); + + kobject_put(sc->sys_sfe_cm); +} + +module_init(sfe_cm_init) +module_exit(sfe_cm_exit) + +MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/shortcut-fe/src/sfe_cm.h b/shortcut-fe/src/sfe_cm.h new file mode 100755 index 000000000..23cbde859 --- /dev/null +++ b/shortcut-fe/src/sfe_cm.h @@ -0,0 +1,259 @@ +/* + * sfe_cm.h + * Shortcut forwarding engine. + * + * Copyright (c) 2013-2016 The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * connection flags. + */ +#define SFE_CREATE_FLAG_NO_SEQ_CHECK BIT(0) + /* Indicates that we should not check sequence numbers */ +#define SFE_CREATE_FLAG_REMARK_PRIORITY BIT(1) + /* Indicates that we should remark priority of skb */ +#define SFE_CREATE_FLAG_REMARK_DSCP BIT(2) + /* Indicates that we should remark DSCP of packet */ + +/* + * IPv6 address structure + */ +struct sfe_ipv6_addr { + __be32 addr[4]; +}; + +typedef union { + __be32 ip; + struct sfe_ipv6_addr ip6[1]; +} sfe_ip_addr_t; + +/* + * connection creation structure. + */ +struct sfe_connection_create { + int protocol; + struct net_device *src_dev; + struct net_device *dest_dev; + u32 flags; + u32 src_mtu; + u32 dest_mtu; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t src_ip_xlate; + sfe_ip_addr_t dest_ip; + sfe_ip_addr_t dest_ip_xlate; + __be16 src_port; + __be16 src_port_xlate; + __be16 dest_port; + __be16 dest_port_xlate; + u8 src_mac[ETH_ALEN]; + u8 src_mac_xlate[ETH_ALEN]; + u8 dest_mac[ETH_ALEN]; + u8 dest_mac_xlate[ETH_ALEN]; + u8 src_td_window_scale; + u32 src_td_max_window; + u32 src_td_end; + u32 src_td_max_end; + u8 dest_td_window_scale; + u32 dest_td_max_window; + u32 dest_td_end; + u32 dest_td_max_end; + u32 mark; +#ifdef CONFIG_XFRM + u32 original_accel; + u32 reply_accel; +#endif + u32 src_priority; + u32 dest_priority; + u32 src_dscp; + u32 dest_dscp; +}; + +/* + * connection destruction structure. + */ +struct sfe_connection_destroy { + int protocol; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t dest_ip; + __be16 src_port; + __be16 dest_port; +}; + +typedef enum sfe_sync_reason { + SFE_SYNC_REASON_STATS, /* Sync is to synchronize stats */ + SFE_SYNC_REASON_FLUSH, /* Sync is to flush a entry */ + SFE_SYNC_REASON_DESTROY /* Sync is to destroy a entry(requested by connection manager) */ +} sfe_sync_reason_t; + +/* + * Structure used to sync connection stats/state back within the system. + * + * NOTE: The addresses here are NON-NAT addresses, i.e. the true endpoint addressing. + * 'src' is the creator of the connection. + */ +struct sfe_connection_sync { + struct net_device *src_dev; + struct net_device *dest_dev; + int is_v6; /* Is it for ipv6? */ + int protocol; /* IP protocol number (IPPROTO_...) */ + sfe_ip_addr_t src_ip; /* Non-NAT source address, i.e. the creator of the connection */ + sfe_ip_addr_t src_ip_xlate; /* NATed source address */ + __be16 src_port; /* Non-NAT source port */ + __be16 src_port_xlate; /* NATed source port */ + sfe_ip_addr_t dest_ip; /* Non-NAT destination address, i.e. to whom the connection was created */ + sfe_ip_addr_t dest_ip_xlate; /* NATed destination address */ + __be16 dest_port; /* Non-NAT destination port */ + __be16 dest_port_xlate; /* NATed destination port */ + u32 src_td_max_window; + u32 src_td_end; + u32 src_td_max_end; + u64 src_packet_count; + u64 src_byte_count; + u32 src_new_packet_count; + u32 src_new_byte_count; + u32 dest_td_max_window; + u32 dest_td_end; + u32 dest_td_max_end; + u64 dest_packet_count; + u64 dest_byte_count; + u32 dest_new_packet_count; + u32 dest_new_byte_count; + u32 reason; /* reason for stats sync message, i.e. destroy, flush, period sync */ + u64 delta_jiffies; /* Time to be added to the current timeout to keep the connection alive */ +}; + +/* + * connection mark structure + */ +struct sfe_connection_mark { + int protocol; + sfe_ip_addr_t src_ip; + sfe_ip_addr_t dest_ip; + __be16 src_port; + __be16 dest_port; + u32 mark; +}; + +/* + * Expose the hook for the receive processing. + */ +extern int (*athrs_fast_nat_recv)(struct sk_buff *skb); + +/* + * Expose what should be a static flag in the TCP connection tracker. + */ +extern int nf_ct_tcp_no_window_check; + +/* + * This callback will be called in a timer + * at 100 times per second to sync stats back to + * Linux connection track. + * + * A RCU lock is taken to prevent this callback + * from unregistering. + */ +typedef void (*sfe_sync_rule_callback_t)(struct sfe_connection_sync *); + +/* + * IPv4 APIs used by connection manager + */ +int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb); +int sfe_ipv4_create_rule(struct sfe_connection_create *sic); +void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid); +void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev); +void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t callback); +void sfe_ipv4_update_rule(struct sfe_connection_create *sic); +void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark); + +#ifdef SFE_SUPPORT_IPV6 +/* + * IPv6 APIs used by connection manager + */ +int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb); +int sfe_ipv6_create_rule(struct sfe_connection_create *sic); +void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid); +void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev); +void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback); +void sfe_ipv6_update_rule(struct sfe_connection_create *sic); +void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark); +#else +static inline int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) +{ + return 0; +} + +static inline int sfe_ipv6_create_rule(struct sfe_connection_create *sic) +{ + return 0; +} + +static inline void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) +{ + return; +} + +static inline void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) +{ + return; +} + +static inline void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback) +{ + return; +} + +static inline void sfe_ipv6_update_rule(struct sfe_connection_create *sic) +{ + return; +} + +static inline void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) +{ + return; +} +#endif + +/* + * sfe_ipv6_addr_equal() + * compare ipv6 address + * + * return: 1, equal; 0, no equal + */ +static inline int sfe_ipv6_addr_equal(struct sfe_ipv6_addr *a, + struct sfe_ipv6_addr *b) +{ + return a->addr[0] == b->addr[0] && + a->addr[1] == b->addr[1] && + a->addr[2] == b->addr[2] && + a->addr[3] == b->addr[3]; +} + +/* + * sfe_ipv4_addr_equal() + * compare ipv4 address + * + * return: 1, equal; 0, no equal + */ +#define sfe_ipv4_addr_equal(a, b) ((u32)(a) == (u32)(b)) + +/* + * sfe_addr_equal() + * compare ipv4 or ipv6 address + * + * return: 1, equal; 0, no equal + */ +static inline int sfe_addr_equal(sfe_ip_addr_t *a, + sfe_ip_addr_t *b, int is_v4) +{ + return is_v4 ? sfe_ipv4_addr_equal(a->ip, b->ip) : sfe_ipv6_addr_equal(a->ip6, b->ip6); +} diff --git a/shortcut-fe/src/sfe_ipv4.c b/shortcut-fe/src/sfe_ipv4.c new file mode 100755 index 000000000..531456c05 --- /dev/null +++ b/shortcut-fe/src/sfe_ipv4.c @@ -0,0 +1,3621 @@ +/* + * sfe_ipv4.c + * Shortcut forwarding engine - IPv4 edition. + * + * Copyright (c) 2013-2016, 2019, The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sfe.h" +#include "sfe_cm.h" + +/* + * By default Linux IP header and transport layer header structures are + * unpacked, assuming that such headers should be 32-bit aligned. + * Unfortunately some wireless adaptors can't cope with this requirement and + * some CPUs can't handle misaligned accesses. For those platforms we + * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed. + * When we do this the compiler will generate slightly worse code than for the + * aligned case (on most platforms) but will be much quicker than fixing + * things up in an unaligned trap handler. + */ +#define SFE_IPV4_UNALIGNED_IP_HEADER 1 +#if SFE_IPV4_UNALIGNED_IP_HEADER +#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed)) +#else +#define SFE_IPV4_UNALIGNED_STRUCT +#endif + +/* + * An Ethernet header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV4_UNALIGNED_STRUCT) + */ +struct sfe_ipv4_eth_hdr { + __be16 h_dest[ETH_ALEN / 2]; + __be16 h_source[ETH_ALEN / 2]; + __be16 h_proto; +} SFE_IPV4_UNALIGNED_STRUCT; + +#define SFE_IPV4_DSCP_MASK 0x3 +#define SFE_IPV4_DSCP_SHIFT 2 + +/* + * An IPv4 header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV4_UNALIGNED_STRUCT) + */ +struct sfe_ipv4_ip_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ihl:4, + version:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:4, + ihl:4; +#else +#error "Please fix " +#endif + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + __be32 saddr; + __be32 daddr; + + /* + * The options start here. + */ +} SFE_IPV4_UNALIGNED_STRUCT; + +/* + * A UDP header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV4_UNALIGNED_STRUCT) + */ +struct sfe_ipv4_udp_hdr { + __be16 source; + __be16 dest; + __be16 len; + __sum16 check; +} SFE_IPV4_UNALIGNED_STRUCT; + +/* + * A TCP header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV4_UNALIGNED_STRUCT) + */ +struct sfe_ipv4_tcp_hdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +} SFE_IPV4_UNALIGNED_STRUCT; + +/* + * Specifies the lower bound on ACK numbers carried in the TCP header + */ +#define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520 + +/* + * IPv4 TCP connection match additional data. + */ +struct sfe_ipv4_tcp_connection_match { + u8 win_scale; /* Window scale */ + u32 max_win; /* Maximum window size seen */ + u32 end; /* Sequence number of the next byte to send (seq + segment length) */ + u32 max_end; /* Sequence number of the last byte to ack */ +}; + +/* + * Bit flags for IPv4 connection matching entry. + */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) + /* Perform source translation */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) + /* Perform destination translation */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) + /* Ignore TCP sequence numbers */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) + /* Fast Ethernet header write */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) + /* Fast Ethernet header write */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) + /* remark priority of SKB */ +#define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) + /* remark DSCP of packet */ + +/* + * IPv4 connection matching structure. + */ +struct sfe_ipv4_connection_match { + /* + * References to other objects. + */ + struct sfe_ipv4_connection_match *next; + struct sfe_ipv4_connection_match *prev; + struct sfe_ipv4_connection *connection; + struct sfe_ipv4_connection_match *counter_match; + /* Matches the flow in the opposite direction as the one in *connection */ + struct sfe_ipv4_connection_match *active_next; + struct sfe_ipv4_connection_match *active_prev; + bool active; /* Flag to indicate if we're on the active list */ + + /* + * Characteristics that identify flows that match this rule. + */ + struct net_device *match_dev; /* Network device */ + u8 match_protocol; /* Protocol */ + __be32 match_src_ip; /* Source IP address */ + __be32 match_dest_ip; /* Destination IP address */ + __be16 match_src_port; /* Source port/connection ident */ + __be16 match_dest_port; /* Destination port/connection ident */ + + /* + * Control the operations of the match. + */ + u32 flags; /* Bit flags */ +#ifdef CONFIG_NF_FLOW_COOKIE + u32 flow_cookie; /* used flow cookie, for debug */ +#endif +#ifdef CONFIG_XFRM + u32 flow_accel; /* The flow accelerated or not */ +#endif + + /* + * Connection state that we track once we match. + */ + union { /* Protocol-specific state */ + struct sfe_ipv4_tcp_connection_match tcp; + } protocol_state; + /* + * Stats recorded in a sync period. These stats will be added to + * rx_packet_count64/rx_byte_count64 after a sync period. + */ + u32 rx_packet_count; + u32 rx_byte_count; + + /* + * Packet translation information. + */ + __be32 xlate_src_ip; /* Address after source translation */ + __be16 xlate_src_port; /* Port/connection ident after source translation */ + u16 xlate_src_csum_adjustment; + /* Transport layer checksum adjustment after source translation */ + u16 xlate_src_partial_csum_adjustment; + /* Transport layer pseudo header checksum adjustment after source translation */ + + __be32 xlate_dest_ip; /* Address after destination translation */ + __be16 xlate_dest_port; /* Port/connection ident after destination translation */ + u16 xlate_dest_csum_adjustment; + /* Transport layer checksum adjustment after destination translation */ + u16 xlate_dest_partial_csum_adjustment; + /* Transport layer pseudo header checksum adjustment after destination translation */ + + /* + * QoS information + */ + u32 priority; + u32 dscp; + + /* + * Packet transmit information. + */ + struct net_device *xmit_dev; /* Network device on which to transmit */ + unsigned short int xmit_dev_mtu; + /* Interface MTU */ + u16 xmit_dest_mac[ETH_ALEN / 2]; + /* Destination MAC address to use when forwarding */ + u16 xmit_src_mac[ETH_ALEN / 2]; + /* Source MAC address to use when forwarding */ + + /* + * Summary stats. + */ + u64 rx_packet_count64; + u64 rx_byte_count64; +}; + +/* + * Per-connection data structure. + */ +struct sfe_ipv4_connection { + struct sfe_ipv4_connection *next; + /* Pointer to the next entry in a hash chain */ + struct sfe_ipv4_connection *prev; + /* Pointer to the previous entry in a hash chain */ + int protocol; /* IP protocol number */ + __be32 src_ip; /* Src IP addr pre-translation */ + __be32 src_ip_xlate; /* Src IP addr post-translation */ + __be32 dest_ip; /* Dest IP addr pre-translation */ + __be32 dest_ip_xlate; /* Dest IP addr post-translation */ + __be16 src_port; /* Src port pre-translation */ + __be16 src_port_xlate; /* Src port post-translation */ + __be16 dest_port; /* Dest port pre-translation */ + __be16 dest_port_xlate; /* Dest port post-translation */ + struct sfe_ipv4_connection_match *original_match; + /* Original direction matching structure */ + struct net_device *original_dev; + /* Original direction source device */ + struct sfe_ipv4_connection_match *reply_match; + /* Reply direction matching structure */ + struct net_device *reply_dev; /* Reply direction source device */ + u64 last_sync_jiffies; /* Jiffies count for the last sync */ + struct sfe_ipv4_connection *all_connections_next; + /* Pointer to the next entry in the list of all connections */ + struct sfe_ipv4_connection *all_connections_prev; + /* Pointer to the previous entry in the list of all connections */ + u32 mark; /* mark for outgoing packet */ + u32 debug_read_seq; /* sequence number for debug dump */ +}; + +/* + * IPv4 connections and hash table size information. + */ +#define SFE_IPV4_CONNECTION_HASH_SHIFT 12 +#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT) +#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1) + +#ifdef CONFIG_NF_FLOW_COOKIE +#define SFE_FLOW_COOKIE_SIZE 2048 +#define SFE_FLOW_COOKIE_MASK 0x7ff + +struct sfe_flow_cookie_entry { + struct sfe_ipv4_connection_match *match; + unsigned long last_clean_time; +}; +#endif + +enum sfe_ipv4_exception_events { + SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION, + SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, + SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL, + SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, + SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, + SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, + SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, + SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL, + SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, + SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS, + SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, + SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, + SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK, + SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, + SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, + SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, + SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL, + SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION, + SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, + SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, + SFE_IPV4_EXCEPTION_EVENT_NON_V4, + SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, + SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, + SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, + SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR, + SFE_IPV4_EXCEPTION_EVENT_LAST +}; + +static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = { + "UDP_HEADER_INCOMPLETE", + "UDP_NO_CONNECTION", + "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", + "UDP_SMALL_TTL", + "UDP_NEEDS_FRAGMENTATION", + "TCP_HEADER_INCOMPLETE", + "TCP_NO_CONNECTION_SLOW_FLAGS", + "TCP_NO_CONNECTION_FAST_FLAGS", + "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", + "TCP_SMALL_TTL", + "TCP_NEEDS_FRAGMENTATION", + "TCP_FLAGS", + "TCP_SEQ_EXCEEDS_RIGHT_EDGE", + "TCP_SMALL_DATA_OFFS", + "TCP_BAD_SACK", + "TCP_BIG_DATA_OFFS", + "TCP_SEQ_BEFORE_LEFT_EDGE", + "TCP_ACK_EXCEEDS_RIGHT_EDGE", + "TCP_ACK_BEFORE_LEFT_EDGE", + "ICMP_HEADER_INCOMPLETE", + "ICMP_UNHANDLED_TYPE", + "ICMP_IPV4_HEADER_INCOMPLETE", + "ICMP_IPV4_NON_V4", + "ICMP_IPV4_IP_OPTIONS_INCOMPLETE", + "ICMP_IPV4_UDP_HEADER_INCOMPLETE", + "ICMP_IPV4_TCP_HEADER_INCOMPLETE", + "ICMP_IPV4_UNHANDLED_PROTOCOL", + "ICMP_NO_CONNECTION", + "ICMP_FLUSHED_CONNECTION", + "HEADER_INCOMPLETE", + "BAD_TOTAL_LENGTH", + "NON_V4", + "NON_INITIAL_FRAGMENT", + "DATAGRAM_INCOMPLETE", + "IP_OPTIONS_INCOMPLETE", + "UNHANDLED_PROTOCOL", + "CLONED_SKB_UNSHARE_ERROR" +}; + +/* + * Per-module structure. + */ +struct sfe_ipv4 { + spinlock_t lock; /* Lock for SMP correctness */ + struct sfe_ipv4_connection_match *active_head; + /* Head of the list of recently active connections */ + struct sfe_ipv4_connection_match *active_tail; + /* Tail of the list of recently active connections */ + struct sfe_ipv4_connection *all_connections_head; + /* Head of the list of all connections */ + struct sfe_ipv4_connection *all_connections_tail; + /* Tail of the list of all connections */ + unsigned int num_connections; /* Number of connections */ + struct timer_list timer; /* Timer used for periodic sync ops */ + sfe_sync_rule_callback_t __rcu sync_rule_callback; + /* Callback function registered by a connection manager for stats syncing */ + struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; + /* Connection hash table */ + struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE]; + /* Connection match hash table */ +#ifdef CONFIG_NF_FLOW_COOKIE + struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; + /* flow cookie table*/ + flow_cookie_set_func_t flow_cookie_set_func; + /* function used to configure flow cookie in hardware*/ + int flow_cookie_enable; + /* Enable/disable flow cookie at runtime */ +#endif + + /* + * Stats recorded in a sync period. These stats will be added to + * connection_xxx64 after a sync period. + */ + u32 connection_create_requests; + /* Number of IPv4 connection create requests */ + u32 connection_create_collisions; + /* Number of IPv4 connection create requests that collided with existing hash table entries */ + u32 connection_destroy_requests; + /* Number of IPv4 connection destroy requests */ + u32 connection_destroy_misses; + /* Number of IPv4 connection destroy requests that missed our hash table */ + u32 connection_match_hash_hits; + /* Number of IPv4 connection match hash hits */ + u32 connection_match_hash_reorders; + /* Number of IPv4 connection match hash reorders */ + u32 connection_flushes; /* Number of IPv4 connection flushes */ + u32 packets_forwarded; /* Number of IPv4 packets forwarded */ + u32 packets_not_forwarded; /* Number of IPv4 packets not forwarded */ + u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST]; + + /* + * Summary statistics. + */ + u64 connection_create_requests64; + /* Number of IPv4 connection create requests */ + u64 connection_create_collisions64; + /* Number of IPv4 connection create requests that collided with existing hash table entries */ + u64 connection_destroy_requests64; + /* Number of IPv4 connection destroy requests */ + u64 connection_destroy_misses64; + /* Number of IPv4 connection destroy requests that missed our hash table */ + u64 connection_match_hash_hits64; + /* Number of IPv4 connection match hash hits */ + u64 connection_match_hash_reorders64; + /* Number of IPv4 connection match hash reorders */ + u64 connection_flushes64; /* Number of IPv4 connection flushes */ + u64 packets_forwarded64; /* Number of IPv4 packets forwarded */ + u64 packets_not_forwarded64; + /* Number of IPv4 packets not forwarded */ + u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST]; + + /* + * Control state. + */ + struct kobject *sys_sfe_ipv4; /* sysfs linkage */ + int debug_dev; /* Major number of the debug char device */ + u32 debug_read_seq; /* sequence number for debug dump */ +}; + +/* + * Enumeration of the XML output. + */ +enum sfe_ipv4_debug_xml_states { + SFE_IPV4_DEBUG_XML_STATE_START, + SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START, + SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, + SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END, + SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START, + SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, + SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END, + SFE_IPV4_DEBUG_XML_STATE_STATS, + SFE_IPV4_DEBUG_XML_STATE_END, + SFE_IPV4_DEBUG_XML_STATE_DONE +}; + +/* + * XML write state. + */ +struct sfe_ipv4_debug_xml_write_state { + enum sfe_ipv4_debug_xml_states state; + /* XML output file state machine state */ + int iter_exception; /* Next exception iterator */ +}; + +typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws); + +static struct sfe_ipv4 __si; + +/* + * sfe_ipv4_gen_ip_csum() + * Generate the IP checksum for an IPv4 header. + * + * Note that this function assumes that we have only 20 bytes of IP header. + */ +static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph) +{ + u32 sum; + u16 *i = (u16 *)iph; + + iph->check = 0; + + /* + * Generate the sum. + */ + sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9]; + + /* + * Fold it to ones-complement form. + */ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + return (u16)sum ^ 0xffff; +} + +/* + * sfe_ipv4_get_connection_match_hash() + * Generate the hash used in connection match lookups. + */ +static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol, + __be32 src_ip, __be16 src_port, + __be32 dest_ip, __be16 dest_port) +{ + size_t dev_addr = (size_t)dev; + u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); + return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; +} + +/* + * sfe_ipv4_find_sfe_ipv4_connection_match() + * Get the IPv4 flow match info that corresponds to a particular 5-tuple. + * + * On entry we must be holding the lock that protects the hash table. + */ +static struct sfe_ipv4_connection_match * +sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol, + __be32 src_ip, __be16 src_port, + __be32 dest_ip, __be16 dest_port) +{ + struct sfe_ipv4_connection_match *cm; + struct sfe_ipv4_connection_match *head; + unsigned int conn_match_idx; + + conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); + cm = si->conn_match_hash[conn_match_idx]; + + /* + * If we don't have anything in this chain then bail. + */ + if (unlikely(!cm)) { + return NULL; + } + + /* + * Hopefully the first entry is the one we want. + */ + if ((cm->match_src_port == src_port) + && (cm->match_dest_port == dest_port) + && (cm->match_src_ip == src_ip) + && (cm->match_dest_ip == dest_ip) + && (cm->match_protocol == protocol) + && (cm->match_dev == dev)) { + si->connection_match_hash_hits++; + return cm; + } + + /* + * Unfortunately we didn't find it at head, so we search it in chain and + * move matching entry to the top of the hash chain. We presume that this + * will be reused again very quickly. + */ + head = cm; + do { + cm = cm->next; + } while (cm && (cm->match_src_port != src_port + || cm->match_dest_port != dest_port + || cm->match_src_ip != src_ip + || cm->match_dest_ip != dest_ip + || cm->match_protocol != protocol + || cm->match_dev != dev)); + + /* + * Not found then we're done. + */ + if (unlikely(!cm)) { + return NULL; + } + + /* + * We found a match so move it. + */ + if (cm->next) { + cm->next->prev = cm->prev; + } + cm->prev->next = cm->next; + cm->prev = NULL; + cm->next = head; + head->prev = cm; + si->conn_match_hash[conn_match_idx] = cm; + si->connection_match_hash_reorders++; + + return cm; +} + +/* + * sfe_ipv4_connection_match_update_summary_stats() + * Update the summary stats for a connection match entry. + */ +static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm) +{ + cm->rx_packet_count64 += cm->rx_packet_count; + cm->rx_packet_count = 0; + cm->rx_byte_count64 += cm->rx_byte_count; + cm->rx_byte_count = 0; +} + +/* + * sfe_ipv4_connection_match_compute_translations() + * Compute port and address translations for a connection match entry. + */ +static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm) +{ + /* + * Before we insert the entry look to see if this is tagged as doing address + * translations. If it is then work out the adjustment that we need to apply + * to the transport checksum. + */ + if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { + /* + * Precompute an incremental checksum adjustment so we can + * edit packets in this stream very quickly. The algorithm is from RFC1624. + */ + u16 src_ip_hi = cm->match_src_ip >> 16; + u16 src_ip_lo = cm->match_src_ip & 0xffff; + u32 xlate_src_ip = ~cm->xlate_src_ip; + u16 xlate_src_ip_hi = xlate_src_ip >> 16; + u16 xlate_src_ip_lo = xlate_src_ip & 0xffff; + u16 xlate_src_port = ~cm->xlate_src_port; + u32 adj; + + /* + * When we compute this fold it down to a 16-bit offset + * as that way we can avoid having to do a double + * folding of the twos-complement result because the + * addition of 2 16-bit values cannot cause a double + * wrap-around! + */ + adj = src_ip_hi + src_ip_lo + cm->match_src_port + + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port; + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_src_csum_adjustment = (u16)adj; + + } + + if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { + /* + * Precompute an incremental checksum adjustment so we can + * edit packets in this stream very quickly. The algorithm is from RFC1624. + */ + u16 dest_ip_hi = cm->match_dest_ip >> 16; + u16 dest_ip_lo = cm->match_dest_ip & 0xffff; + u32 xlate_dest_ip = ~cm->xlate_dest_ip; + u16 xlate_dest_ip_hi = xlate_dest_ip >> 16; + u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff; + u16 xlate_dest_port = ~cm->xlate_dest_port; + u32 adj; + + /* + * When we compute this fold it down to a 16-bit offset + * as that way we can avoid having to do a double + * folding of the twos-complement result because the + * addition of 2 16-bit values cannot cause a double + * wrap-around! + */ + adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port + + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port; + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_dest_csum_adjustment = (u16)adj; + } + + if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) { + u32 adj = ~cm->match_src_ip + cm->xlate_src_ip; + if (adj < cm->xlate_src_ip) { + adj++; + } + + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_src_partial_csum_adjustment = (u16)adj; + } + + if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) { + u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip; + if (adj < cm->xlate_dest_ip) { + adj++; + } + + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_dest_partial_csum_adjustment = (u16)adj; + } + +} + +/* + * sfe_ipv4_update_summary_stats() + * Update the summary stats. + */ +static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si) +{ + int i; + + si->connection_create_requests64 += si->connection_create_requests; + si->connection_create_requests = 0; + si->connection_create_collisions64 += si->connection_create_collisions; + si->connection_create_collisions = 0; + si->connection_destroy_requests64 += si->connection_destroy_requests; + si->connection_destroy_requests = 0; + si->connection_destroy_misses64 += si->connection_destroy_misses; + si->connection_destroy_misses = 0; + si->connection_match_hash_hits64 += si->connection_match_hash_hits; + si->connection_match_hash_hits = 0; + si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; + si->connection_match_hash_reorders = 0; + si->connection_flushes64 += si->connection_flushes; + si->connection_flushes = 0; + si->packets_forwarded64 += si->packets_forwarded; + si->packets_forwarded = 0; + si->packets_not_forwarded64 += si->packets_not_forwarded; + si->packets_not_forwarded = 0; + + for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) { + si->exception_events64[i] += si->exception_events[i]; + si->exception_events[i] = 0; + } +} + +/* + * sfe_ipv4_insert_sfe_ipv4_connection_match() + * Insert a connection match into the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si, + struct sfe_ipv4_connection_match *cm) +{ + struct sfe_ipv4_connection_match **hash_head; + struct sfe_ipv4_connection_match *prev_head; + unsigned int conn_match_idx + = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, + cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port); + + hash_head = &si->conn_match_hash[conn_match_idx]; + prev_head = *hash_head; + cm->prev = NULL; + if (prev_head) { + prev_head->prev = cm; + } + + cm->next = prev_head; + *hash_head = cm; + +#ifdef CONFIG_NF_FLOW_COOKIE + if (!si->flow_cookie_enable) + return; + + /* + * Configure hardware to put a flow cookie in packet of this flow, + * then we can accelerate the lookup process when we received this packet. + */ + for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { + struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; + + if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { + flow_cookie_set_func_t func; + + rcu_read_lock(); + func = rcu_dereference(si->flow_cookie_set_func); + if (func) { + if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) { + entry->match = cm; + cm->flow_cookie = conn_match_idx; + } + } + rcu_read_unlock(); + + break; + } + } +#endif +} + +/* + * sfe_ipv4_remove_sfe_ipv4_connection_match() + * Remove a connection match object from the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm) +{ +#ifdef CONFIG_NF_FLOW_COOKIE + if (si->flow_cookie_enable) { + /* + * Tell hardware that we no longer need a flow cookie in packet of this flow + */ + unsigned int conn_match_idx; + + for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { + struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; + + if (cm == entry->match) { + flow_cookie_set_func_t func; + + rcu_read_lock(); + func = rcu_dereference(si->flow_cookie_set_func); + if (func) { + func(cm->match_protocol, cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port, 0); + } + rcu_read_unlock(); + + cm->flow_cookie = 0; + entry->match = NULL; + entry->last_clean_time = jiffies; + break; + } + } + } +#endif + + /* + * Unlink the connection match entry from the hash. + */ + if (cm->prev) { + cm->prev->next = cm->next; + } else { + unsigned int conn_match_idx + = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol, + cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port); + si->conn_match_hash[conn_match_idx] = cm->next; + } + + if (cm->next) { + cm->next->prev = cm->prev; + } + + /* + * If the connection match entry is in the active list remove it. + */ + if (cm->active) { + if (likely(cm->active_prev)) { + cm->active_prev->active_next = cm->active_next; + } else { + si->active_head = cm->active_next; + } + + if (likely(cm->active_next)) { + cm->active_next->active_prev = cm->active_prev; + } else { + si->active_tail = cm->active_prev; + } + } +} + +/* + * sfe_ipv4_get_connection_hash() + * Generate the hash used in connection lookups. + */ +static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port, + __be32 dest_ip, __be16 dest_port) +{ + u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port); + return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK; +} + +/* + * sfe_ipv4_find_sfe_ipv4_connection() + * Get the IPv4 connection info that corresponds to a particular 5-tuple. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol, + __be32 src_ip, __be16 src_port, + __be32 dest_ip, __be16 dest_port) +{ + struct sfe_ipv4_connection *c; + unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); + c = si->conn_hash[conn_idx]; + + /* + * If we don't have anything in this chain then bale. + */ + if (unlikely(!c)) { + return NULL; + } + + /* + * Hopefully the first entry is the one we want. + */ + if ((c->src_port == src_port) + && (c->dest_port == dest_port) + && (c->src_ip == src_ip) + && (c->dest_ip == dest_ip) + && (c->protocol == protocol)) { + return c; + } + + /* + * Unfortunately we didn't find it at head, so we search it in chain. + */ + do { + c = c->next; + } while (c && (c->src_port != src_port + || c->dest_port != dest_port + || c->src_ip != src_ip + || c->dest_ip != dest_ip + || c->protocol != protocol)); + + /* + * Will need connection entry for next create/destroy metadata, + * So no need to re-order entry for these requests + */ + return c; +} + +/* + * sfe_ipv4_mark_rule() + * Updates the mark for a current offloaded connection + * + * Will take hash lock upon entry + */ +void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark) +{ + struct sfe_ipv4 *si = &__si; + struct sfe_ipv4_connection *c; + + spin_lock_bh(&si->lock); + c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol, + mark->src_ip.ip, mark->src_port, + mark->dest_ip.ip, mark->dest_port); + if (c) { + WARN_ON((0 != c->mark) && (0 == mark->mark)); + c->mark = mark->mark; + } + spin_unlock_bh(&si->lock); + + if (c) { + DEBUG_TRACE("Matching connection found for mark, " + "setting from %08x to %08x\n", + c->mark, mark->mark); + } +} + +/* + * sfe_ipv4_insert_sfe_ipv4_connection() + * Insert a connection into the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) +{ + struct sfe_ipv4_connection **hash_head; + struct sfe_ipv4_connection *prev_head; + unsigned int conn_idx; + + /* + * Insert entry into the connection hash. + */ + conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, + c->dest_ip, c->dest_port); + hash_head = &si->conn_hash[conn_idx]; + prev_head = *hash_head; + c->prev = NULL; + if (prev_head) { + prev_head->prev = c; + } + + c->next = prev_head; + *hash_head = c; + + /* + * Insert entry into the "all connections" list. + */ + if (si->all_connections_tail) { + c->all_connections_prev = si->all_connections_tail; + si->all_connections_tail->all_connections_next = c; + } else { + c->all_connections_prev = NULL; + si->all_connections_head = c; + } + + si->all_connections_tail = c; + c->all_connections_next = NULL; + si->num_connections++; + + /* + * Insert the connection match objects too. + */ + sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match); + sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match); +} + +/* + * sfe_ipv4_remove_sfe_ipv4_connection() + * Remove a sfe_ipv4_connection object from the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c) +{ + /* + * Remove the connection match objects. + */ + sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match); + sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match); + + /* + * Unlink the connection. + */ + if (c->prev) { + c->prev->next = c->next; + } else { + unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port, + c->dest_ip, c->dest_port); + si->conn_hash[conn_idx] = c->next; + } + + if (c->next) { + c->next->prev = c->prev; + } + + /* + * Unlink connection from all_connections list + */ + if (c->all_connections_prev) { + c->all_connections_prev->all_connections_next = c->all_connections_next; + } else { + si->all_connections_head = c->all_connections_next; + } + + if (c->all_connections_next) { + c->all_connections_next->all_connections_prev = c->all_connections_prev; + } else { + si->all_connections_tail = c->all_connections_prev; + } + + si->num_connections--; +} + +/* + * sfe_ipv4_sync_sfe_ipv4_connection() + * Sync a connection. + * + * On entry to this function we expect that the lock for the connection is either + * already held or isn't required. + */ +static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c, + struct sfe_connection_sync *sis, sfe_sync_reason_t reason, + u64 now_jiffies) +{ + struct sfe_ipv4_connection_match *original_cm; + struct sfe_ipv4_connection_match *reply_cm; + + /* + * Fill in the update message. + */ + sis->is_v6 = 0; + sis->protocol = c->protocol; + sis->src_ip.ip = c->src_ip; + sis->src_ip_xlate.ip = c->src_ip_xlate; + sis->dest_ip.ip = c->dest_ip; + sis->dest_ip_xlate.ip = c->dest_ip_xlate; + sis->src_port = c->src_port; + sis->src_port_xlate = c->src_port_xlate; + sis->dest_port = c->dest_port; + sis->dest_port_xlate = c->dest_port_xlate; + + original_cm = c->original_match; + reply_cm = c->reply_match; + sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; + sis->src_td_end = original_cm->protocol_state.tcp.end; + sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; + sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; + sis->dest_td_end = reply_cm->protocol_state.tcp.end; + sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; + + sis->src_new_packet_count = original_cm->rx_packet_count; + sis->src_new_byte_count = original_cm->rx_byte_count; + sis->dest_new_packet_count = reply_cm->rx_packet_count; + sis->dest_new_byte_count = reply_cm->rx_byte_count; + + sfe_ipv4_connection_match_update_summary_stats(original_cm); + sfe_ipv4_connection_match_update_summary_stats(reply_cm); + + sis->src_dev = original_cm->match_dev; + sis->src_packet_count = original_cm->rx_packet_count64; + sis->src_byte_count = original_cm->rx_byte_count64; + + sis->dest_dev = reply_cm->match_dev; + sis->dest_packet_count = reply_cm->rx_packet_count64; + sis->dest_byte_count = reply_cm->rx_byte_count64; + + sis->reason = reason; + + /* + * Get the time increment since our last sync. + */ + sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; + c->last_sync_jiffies = now_jiffies; +} + +/* + * sfe_ipv4_flush_sfe_ipv4_connection() + * Flush a connection and free all associated resources. + * + * We need to be called with bottom halves disabled locally as we need to acquire + * the connection hash lock and release it again. In general we're actually called + * from within a BH and so we're fine, but we're also called when connections are + * torn down. + */ +static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si, + struct sfe_ipv4_connection *c, + sfe_sync_reason_t reason) +{ + struct sfe_connection_sync sis; + u64 now_jiffies; + sfe_sync_rule_callback_t sync_rule_callback; + + rcu_read_lock(); + spin_lock_bh(&si->lock); + si->connection_flushes++; + sync_rule_callback = rcu_dereference(si->sync_rule_callback); + spin_unlock_bh(&si->lock); + + if (sync_rule_callback) { + /* + * Generate a sync message and then sync. + */ + now_jiffies = get_jiffies_64(); + sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies); + sync_rule_callback(&sis); + } + + rcu_read_unlock(); + + /* + * Release our hold of the source and dest devices and free the memory + * for our connection objects. + */ + dev_put(c->original_dev); + dev_put(c->reply_dev); + kfree(c->original_match); + kfree(c->reply_match); + kfree(c); +} + +/* + * sfe_ipv4_recv_udp() + * Handle UDP packet receives and forwarding. + */ +static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) +{ + struct sfe_ipv4_udp_hdr *udph; + __be32 src_ip; + __be32 dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv4_connection_match *cm; + u8 ttl; + struct net_device *xmit_dev; + + /* + * Is our packet too short to contain a valid UDP header? + */ + if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for UDP header\n"); + return 0; + } + + /* + * Read the IP address and port information. Read the IP header data first + * because we've almost certainly got that in the cache. We may not yet have + * the UDP header cached though so allow more time for any prefetching. + */ + src_ip = iph->saddr; + dest_ip = iph->daddr; + + udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); + src_port = udph->source; + dest_port = udph->dest; + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. + */ +#ifdef CONFIG_NF_FLOW_COOKIE + cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; + if (unlikely(!cm)) { + cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); + } +#else + cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); +#endif + if (unlikely(!cm)) { + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found\n"); + return 0; + } + + /* + * If our packet has beern marked as "flush on find" we can't actually + * forward it in the fast path, but now that we've found an associated + * connection we can flush that out before we process the packet. + */ + if (unlikely(flush_on_find)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("flush on find\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + +#ifdef CONFIG_XFRM + /* + * We can't accelerate the flow on this direction, just let it go + * through the slow path. + */ + if (unlikely(!cm->flow_accel)) { + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + return 0; + } +#endif + + /* + * Does our TTL allow forwarding? + */ + ttl = iph->ttl; + if (unlikely(ttl < 2)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ttl too low\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * If our packet is larger than the MTU of the transmit interface then + * we can't forward it easily. + */ + if (unlikely(len > cm->xmit_dev_mtu)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("larger than mtu\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * From this point on we're good to modify the packet. + */ + + /* + * Check if skb was cloned. If it was, unshare it. Because + * the data area is going to be written in this path and we don't want to + * change the cloned skb's data section. + */ + if (unlikely(skb_cloned(skb))) { + DEBUG_TRACE("%p: skb is a cloned skb\n", skb); + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + DEBUG_WARN("Failed to unshare the cloned skb\n"); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + return 0; + } + + /* + * Update the iph and udph pointers with the unshared skb's data area. + */ + iph = (struct sfe_ipv4_ip_hdr *)skb->data; + udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl); + } + + /* + * Update DSCP + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { + iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; + } + + /* + * Decrement our TTL. + */ + iph->ttl = ttl - 1; + + /* + * Do we have to perform translations of the source address/port? + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { + u16 udp_csum; + + iph->saddr = cm->xlate_src_ip; + udph->source = cm->xlate_src_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + udp_csum = udph->check; + if (likely(udp_csum)) { + u32 sum; + + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { + sum = udp_csum + cm->xlate_src_partial_csum_adjustment; + } else { + sum = udp_csum + cm->xlate_src_csum_adjustment; + } + + sum = (sum & 0xffff) + (sum >> 16); + udph->check = (u16)sum; + } + } + + /* + * Do we have to perform translations of the destination address/port? + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { + u16 udp_csum; + + iph->daddr = cm->xlate_dest_ip; + udph->dest = cm->xlate_dest_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + udp_csum = udph->check; + if (likely(udp_csum)) { + u32 sum; + + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { + sum = udp_csum + cm->xlate_dest_partial_csum_adjustment; + } else { + sum = udp_csum + cm->xlate_dest_csum_adjustment; + } + + sum = (sum & 0xffff) + (sum >> 16); + udph->check = (u16)sum; + } + } + + /* + * Replace the IP checksum. + */ + iph->check = sfe_ipv4_gen_ip_csum(iph); + + /* + * Update traffic stats. + */ + cm->rx_packet_count++; + cm->rx_byte_count += len; + + /* + * If we're not already on the active list then insert ourselves at the tail + * of the current list. + */ + if (unlikely(!cm->active)) { + cm->active = true; + cm->active_prev = si->active_tail; + if (likely(si->active_tail)) { + si->active_tail->active_next = cm; + } else { + si->active_head = cm; + } + si->active_tail = cm; + } + + xmit_dev = cm->xmit_dev; + skb->dev = xmit_dev; + + /* + * Check to see if we need to write a header. + */ + if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { + if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { + dev_hard_header(skb, xmit_dev, ETH_P_IP, + cm->xmit_dest_mac, cm->xmit_src_mac, len); + } else { + /* + * For the simple case we write this really fast. + */ + struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); + eth->h_proto = htons(ETH_P_IP); + eth->h_dest[0] = cm->xmit_dest_mac[0]; + eth->h_dest[1] = cm->xmit_dest_mac[1]; + eth->h_dest[2] = cm->xmit_dest_mac[2]; + eth->h_source[0] = cm->xmit_src_mac[0]; + eth->h_source[1] = cm->xmit_src_mac[1]; + eth->h_source[2] = cm->xmit_src_mac[2]; + } + } + + /* + * Update priority of skb. + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { + skb->priority = cm->priority; + } + + /* + * Mark outgoing packet. + */ + skb->mark = cm->connection->mark; + if (skb->mark) { + DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); + } + + si->packets_forwarded++; + spin_unlock_bh(&si->lock); + + /* + * We're going to check for GSO flags when we transmit the packet so + * start fetching the necessary cache line now. + */ + prefetch(skb_shinfo(skb)); + + /* + * Mark that this packet has been fast forwarded. + */ + skb->fast_forwarded = 1; + + /* + * Send the packet on its way. + */ + dev_queue_xmit(skb); + + return 1; +} + +/* + * sfe_ipv4_process_tcp_option_sack() + * Parse TCP SACK option and update ack according + */ +static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs, + u32 *ack) +{ + u32 length = sizeof(struct sfe_ipv4_tcp_hdr); + u8 *ptr = (u8 *)th + length; + + /* + * Ignore processing if TCP packet has only TIMESTAMP option. + */ + if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) + && likely(ptr[0] == TCPOPT_NOP) + && likely(ptr[1] == TCPOPT_NOP) + && likely(ptr[2] == TCPOPT_TIMESTAMP) + && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { + return true; + } + + /* + * TCP options. Parse SACK option. + */ + while (length < data_offs) { + u8 size; + u8 kind; + + ptr = (u8 *)th + length; + kind = *ptr; + + /* + * NOP, for padding + * Not in the switch because to fast escape and to not calculate size + */ + if (kind == TCPOPT_NOP) { + length++; + continue; + } + + if (kind == TCPOPT_SACK) { + u32 sack = 0; + u8 re = 1 + 1; + + size = *(ptr + 1); + if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) + || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) + || (size > (data_offs - length))) { + return false; + } + + re += 4; + while (re < size) { + u32 sack_re; + u8 *sptr = ptr + re; + sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; + if (sack_re > sack) { + sack = sack_re; + } + re += TCPOLEN_SACK_PERBLOCK; + } + if (sack > *ack) { + *ack = sack; + } + length += size; + continue; + } + if (kind == TCPOPT_EOL) { + return true; + } + size = *(ptr + 1); + if (size < 2) { + return false; + } + length += size; + } + + return true; +} + +/* + * sfe_ipv4_recv_tcp() + * Handle TCP packet receives and forwarding. + */ +static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find) +{ + struct sfe_ipv4_tcp_hdr *tcph; + __be32 src_ip; + __be32 dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv4_connection_match *cm; + struct sfe_ipv4_connection_match *counter_cm; + u8 ttl; + u32 flags; + struct net_device *xmit_dev; + + /* + * Is our packet too short to contain a valid UDP header? + */ + if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for TCP header\n"); + return 0; + } + + /* + * Read the IP address and port information. Read the IP header data first + * because we've almost certainly got that in the cache. We may not yet have + * the TCP header cached though so allow more time for any prefetching. + */ + src_ip = iph->saddr; + dest_ip = iph->daddr; + + tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); + src_port = tcph->source; + dest_port = tcph->dest; + flags = tcp_flag_word(tcph); + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. + */ +#ifdef CONFIG_NF_FLOW_COOKIE + cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; + if (unlikely(!cm)) { + cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); + } +#else + cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); +#endif + if (unlikely(!cm)) { + /* + * We didn't get a connection but as TCP is connection-oriented that + * may be because this is a non-fast connection (not running established). + * For diagnostic purposes we differentiate this here. + */ + if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found - fast flags\n"); + return 0; + } + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found - slow flags: 0x%x\n", + flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); + return 0; + } + + /* + * If our packet has beern marked as "flush on find" we can't actually + * forward it in the fast path, but now that we've found an associated + * connection we can flush that out before we process the packet. + */ + if (unlikely(flush_on_find)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("flush on find\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + +#ifdef CONFIG_XFRM + /* + * We can't accelerate the flow on this direction, just let it go + * through the slow path. + */ + if (unlikely(!cm->flow_accel)) { + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + return 0; + } +#endif + /* + * Does our TTL allow forwarding? + */ + ttl = iph->ttl; + if (unlikely(ttl < 2)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ttl too low\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * If our packet is larger than the MTU of the transmit interface then + * we can't forward it easily. + */ + if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("larger than mtu\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN + * set is not a fast path packet. + */ + if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP flags: 0x%x are not fast\n", + flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + counter_cm = cm->counter_match; + + /* + * Are we doing sequence number checking? + */ + if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { + u32 seq; + u32 ack; + u32 sack; + u32 data_offs; + u32 end; + u32 left_edge; + u32 scaled_win; + u32 max_end; + + /* + * Is our sequence fully past the right hand edge of the window? + */ + seq = ntohl(tcph->seq); + if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("seq: %u exceeds right edge: %u\n", + seq, cm->protocol_state.tcp.max_end + 1); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Check that our TCP data offset isn't too short. + */ + data_offs = tcph->doff << 2; + if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Update ACK according to any SACK option. + */ + ack = ntohl(tcph->ack_seq); + sack = ack; + if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP option SACK size is wrong\n"); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Check that our TCP data offset isn't past the end of the packet. + */ + data_offs += sizeof(struct sfe_ipv4_ip_hdr); + if (unlikely(len < data_offs)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", + data_offs, len); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + end = seq + len - data_offs; + + /* + * Is our sequence fully before the left hand edge of the window? + */ + if (unlikely((s32)(end - (cm->protocol_state.tcp.end + - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("seq: %u before left edge: %u\n", + end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Are we acking data that is to the right of what has been sent? + */ + if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ack: %u exceeds right edge: %u\n", + sack, counter_cm->protocol_state.tcp.end + 1); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Is our ack too far before the left hand edge of the window? + */ + left_edge = counter_cm->protocol_state.tcp.end + - cm->protocol_state.tcp.max_win + - SFE_IPV4_TCP_MAX_ACK_WINDOW + - 1; + if (unlikely((s32)(sack - left_edge) < 0)) { + struct sfe_ipv4_connection *c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Have we just seen the largest window size yet for this connection? If yes + * then we need to record the new value. + */ + scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; + scaled_win += (sack - ack); + if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { + cm->protocol_state.tcp.max_win = scaled_win; + } + + /* + * If our sequence and/or ack numbers have advanced then record the new state. + */ + if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { + cm->protocol_state.tcp.end = end; + } + + max_end = sack + scaled_win; + if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { + counter_cm->protocol_state.tcp.max_end = max_end; + } + } + + /* + * From this point on we're good to modify the packet. + */ + + /* + * Check if skb was cloned. If it was, unshare it. Because + * the data area is going to be written in this path and we don't want to + * change the cloned skb's data section. + */ + if (unlikely(skb_cloned(skb))) { + DEBUG_TRACE("%p: skb is a cloned skb\n", skb); + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + DEBUG_WARN("Failed to unshare the cloned skb\n"); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + return 0; + } + + /* + * Update the iph and tcph pointers with the unshared skb's data area. + */ + iph = (struct sfe_ipv4_ip_hdr *)skb->data; + tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl); + } + + /* + * Update DSCP + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { + iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp; + } + + /* + * Decrement our TTL. + */ + iph->ttl = ttl - 1; + + /* + * Do we have to perform translations of the source address/port? + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) { + u16 tcp_csum; + u32 sum; + + iph->saddr = cm->xlate_src_ip; + tcph->source = cm->xlate_src_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + tcp_csum = tcph->check; + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { + sum = tcp_csum + cm->xlate_src_partial_csum_adjustment; + } else { + sum = tcp_csum + cm->xlate_src_csum_adjustment; + } + + sum = (sum & 0xffff) + (sum >> 16); + tcph->check = (u16)sum; + } + + /* + * Do we have to perform translations of the destination address/port? + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) { + u16 tcp_csum; + u32 sum; + + iph->daddr = cm->xlate_dest_ip; + tcph->dest = cm->xlate_dest_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + tcp_csum = tcph->check; + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) { + sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment; + } else { + sum = tcp_csum + cm->xlate_dest_csum_adjustment; + } + + sum = (sum & 0xffff) + (sum >> 16); + tcph->check = (u16)sum; + } + + /* + * Replace the IP checksum. + */ + iph->check = sfe_ipv4_gen_ip_csum(iph); + + /* + * Update traffic stats. + */ + cm->rx_packet_count++; + cm->rx_byte_count += len; + + /* + * If we're not already on the active list then insert ourselves at the tail + * of the current list. + */ + if (unlikely(!cm->active)) { + cm->active = true; + cm->active_prev = si->active_tail; + if (likely(si->active_tail)) { + si->active_tail->active_next = cm; + } else { + si->active_head = cm; + } + si->active_tail = cm; + } + + xmit_dev = cm->xmit_dev; + skb->dev = xmit_dev; + + /* + * Check to see if we need to write a header. + */ + if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { + if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { + dev_hard_header(skb, xmit_dev, ETH_P_IP, + cm->xmit_dest_mac, cm->xmit_src_mac, len); + } else { + /* + * For the simple case we write this really fast. + */ + struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN); + eth->h_proto = htons(ETH_P_IP); + eth->h_dest[0] = cm->xmit_dest_mac[0]; + eth->h_dest[1] = cm->xmit_dest_mac[1]; + eth->h_dest[2] = cm->xmit_dest_mac[2]; + eth->h_source[0] = cm->xmit_src_mac[0]; + eth->h_source[1] = cm->xmit_src_mac[1]; + eth->h_source[2] = cm->xmit_src_mac[2]; + } + } + + /* + * Update priority of skb. + */ + if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { + skb->priority = cm->priority; + } + + /* + * Mark outgoing packet + */ + skb->mark = cm->connection->mark; + if (skb->mark) { + DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); + } + + si->packets_forwarded++; + spin_unlock_bh(&si->lock); + + /* + * We're going to check for GSO flags when we transmit the packet so + * start fetching the necessary cache line now. + */ + prefetch(skb_shinfo(skb)); + + /* + * Mark that this packet has been fast forwarded. + */ + skb->fast_forwarded = 1; + + /* + * Send the packet on its way. + */ + dev_queue_xmit(skb); + + return 1; +} + +/* + * sfe_ipv4_recv_icmp() + * Handle ICMP packet receives. + * + * ICMP packets aren't handled as a "fast path" and always have us process them + * through the default Linux stack. What we do need to do is look for any errors + * about connections we are handling in the fast path. If we find any such + * connections then we want to flush their state so that the ICMP error path + * within Linux has all of the correct state should it need it. + */ +static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl) +{ + struct icmphdr *icmph; + struct sfe_ipv4_ip_hdr *icmp_iph; + unsigned int icmp_ihl_words; + unsigned int icmp_ihl; + u32 *icmp_trans_h; + struct sfe_ipv4_udp_hdr *icmp_udph; + struct sfe_ipv4_tcp_hdr *icmp_tcph; + __be32 src_ip; + __be32 dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv4_connection_match *cm; + struct sfe_ipv4_connection *c; + u32 pull_len = sizeof(struct icmphdr) + ihl; + + /* + * Is our packet too short to contain a valid ICMP header? + */ + len -= ihl; + if (!pskb_may_pull(skb, pull_len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for ICMP header\n"); + return 0; + } + + /* + * We only handle "destination unreachable" and "time exceeded" messages. + */ + icmph = (struct icmphdr *)(skb->data + ihl); + if ((icmph->type != ICMP_DEST_UNREACH) + && (icmph->type != ICMP_TIME_EXCEEDED)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type); + return 0; + } + + /* + * Do we have the full embedded IP header? + */ + len -= sizeof(struct icmphdr); + pull_len += sizeof(struct sfe_ipv4_ip_hdr); + if (!pskb_may_pull(skb, pull_len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Embedded IP header not complete\n"); + return 0; + } + + /* + * Is our embedded IP version wrong? + */ + icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1); + if (unlikely(icmp_iph->version != 4)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("IP version: %u\n", icmp_iph->version); + return 0; + } + + /* + * Do we have the full embedded IP header, including any options? + */ + icmp_ihl_words = icmp_iph->ihl; + icmp_ihl = icmp_ihl_words << 2; + pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr); + if (!pskb_may_pull(skb, pull_len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Embedded header not large enough for IP options\n"); + return 0; + } + + len -= icmp_ihl; + icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words; + + /* + * Handle the embedded transport layer header. + */ + switch (icmp_iph->protocol) { + case IPPROTO_UDP: + /* + * We should have 8 bytes of UDP header - that's enough to identify + * the connection. + */ + pull_len += 8; + if (!pskb_may_pull(skb, pull_len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Incomplete embedded UDP header\n"); + return 0; + } + + icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h; + src_port = icmp_udph->source; + dest_port = icmp_udph->dest; + break; + + case IPPROTO_TCP: + /* + * We should have 8 bytes of TCP header - that's enough to identify + * the connection. + */ + pull_len += 8; + if (!pskb_may_pull(skb, pull_len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Incomplete embedded TCP header\n"); + return 0; + } + + icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h; + src_port = icmp_tcph->source; + dest_port = icmp_tcph->dest; + break; + + default: + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol); + return 0; + } + + src_ip = icmp_iph->saddr; + dest_ip = icmp_iph->daddr; + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. Note that we reverse the source and destination + * here because our embedded message contains a packet that was sent in the + * opposite direction to the one in which we just received it. It will have + * been sent on the interface from which we received it though so that's still + * ok to use. + */ + cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port); + if (unlikely(!cm)) { + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found\n"); + return 0; + } + + /* + * We found a connection so now remove it from the connection list and flush + * its state. + */ + c = cm->connection; + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; +} + +/* + * sfe_ipv4_recv() + * Handle packet receives and forwaring. + * + * Returns 1 if the packet is forwarded or 0 if it isn't. + */ +int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb) +{ + struct sfe_ipv4 *si = &__si; + unsigned int len; + unsigned int tot_len; + unsigned int frag_off; + unsigned int ihl; + bool flush_on_find; + bool ip_options; + struct sfe_ipv4_ip_hdr *iph; + u32 protocol; + + /* + * Check that we have space for an IP header here. + */ + len = skb->len; + if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("len: %u is too short\n", len); + return 0; + } + + /* + * Check that our "total length" is large enough for an IP header. + */ + iph = (struct sfe_ipv4_ip_hdr *)skb->data; + tot_len = ntohs(iph->tot_len); + if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("tot_len: %u is too short\n", tot_len); + return 0; + } + + /* + * Is our IP version wrong? + */ + if (unlikely(iph->version != 4)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("IP version: %u\n", iph->version); + return 0; + } + + /* + * Does our datagram fit inside the skb? + */ + if (unlikely(tot_len > len)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len); + return 0; + } + + /* + * Do we have a non-initial fragment? + */ + frag_off = ntohs(iph->frag_off); + if (unlikely(frag_off & IP_OFFSET)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("non-initial fragment\n"); + return 0; + } + + /* + * If we have a (first) fragment then mark it to cause any connection to flush. + */ + flush_on_find = unlikely(frag_off & IP_MF) ? true : false; + + /* + * Do we have any IP options? That's definite a slow path! If we do have IP + * options we need to recheck our header size. + */ + ihl = iph->ihl << 2; + ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false; + if (unlikely(ip_options)) { + if (unlikely(len < ihl)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl); + return 0; + } + + flush_on_find = true; + } + + protocol = iph->protocol; + if (IPPROTO_UDP == protocol) { + return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); + } + + if (IPPROTO_TCP == protocol) { + return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); + } + + if (IPPROTO_ICMP == protocol) { + return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl); + } + + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol); + return 0; +} + +static void +sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c, + struct sfe_connection_create *sic) +{ + struct sfe_ipv4_connection_match *orig_cm; + struct sfe_ipv4_connection_match *repl_cm; + struct sfe_ipv4_tcp_connection_match *orig_tcp; + struct sfe_ipv4_tcp_connection_match *repl_tcp; + + orig_cm = c->original_match; + repl_cm = c->reply_match; + orig_tcp = &orig_cm->protocol_state.tcp; + repl_tcp = &repl_cm->protocol_state.tcp; + + /* update orig */ + if (orig_tcp->max_win < sic->src_td_max_window) { + orig_tcp->max_win = sic->src_td_max_window; + } + if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { + orig_tcp->end = sic->src_td_end; + } + if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { + orig_tcp->max_end = sic->src_td_max_end; + } + + /* update reply */ + if (repl_tcp->max_win < sic->dest_td_max_window) { + repl_tcp->max_win = sic->dest_td_max_window; + } + if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { + repl_tcp->end = sic->dest_td_end; + } + if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { + repl_tcp->max_end = sic->dest_td_max_end; + } + + /* update match flags */ + orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { + orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + } +} + +static void +sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c, + struct sfe_connection_create *sic) +{ + switch (sic->protocol) { + case IPPROTO_TCP: + sfe_ipv4_update_tcp_state(c, sic); + break; + } +} + +void sfe_ipv4_update_rule(struct sfe_connection_create *sic) +{ + struct sfe_ipv4_connection *c; + struct sfe_ipv4 *si = &__si; + + spin_lock_bh(&si->lock); + + c = sfe_ipv4_find_sfe_ipv4_connection(si, + sic->protocol, + sic->src_ip.ip, + sic->src_port, + sic->dest_ip.ip, + sic->dest_port); + if (c != NULL) { + sfe_ipv4_update_protocol_state(c, sic); + } + + spin_unlock_bh(&si->lock); +} + +/* + * sfe_ipv4_create_rule() + * Create a forwarding rule. + */ +int sfe_ipv4_create_rule(struct sfe_connection_create *sic) +{ + struct sfe_ipv4 *si = &__si; + struct sfe_ipv4_connection *c; + struct sfe_ipv4_connection_match *original_cm; + struct sfe_ipv4_connection_match *reply_cm; + struct net_device *dest_dev; + struct net_device *src_dev; + + dest_dev = sic->dest_dev; + src_dev = sic->src_dev; + + if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || + (src_dev->reg_state != NETREG_REGISTERED))) { + return -EINVAL; + } + + spin_lock_bh(&si->lock); + si->connection_create_requests++; + + /* + * Check to see if there is already a flow that matches the rule we're + * trying to create. If there is then we can't create a new one. + */ + c = sfe_ipv4_find_sfe_ipv4_connection(si, + sic->protocol, + sic->src_ip.ip, + sic->src_port, + sic->dest_ip.ip, + sic->dest_port); + if (c != NULL) { + si->connection_create_collisions++; + + /* + * If we already have the flow then it's likely that this + * request to create the connection rule contains more + * up-to-date information. Check and update accordingly. + */ + sfe_ipv4_update_protocol_state(c, sic); + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" + " s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n", + sic->mark, sic->protocol, + sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port), + sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port)); + return -EADDRINUSE; + } + + /* + * Allocate the various connection tracking objects. + */ + c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC); + if (unlikely(!c)) { + spin_unlock_bh(&si->lock); + return -ENOMEM; + } + + original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); + if (unlikely(!original_cm)) { + spin_unlock_bh(&si->lock); + kfree(c); + return -ENOMEM; + } + + reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC); + if (unlikely(!reply_cm)) { + spin_unlock_bh(&si->lock); + kfree(original_cm); + kfree(c); + return -ENOMEM; + } + + /* + * Fill in the "original" direction connection matching object. + * Note that the transmit MAC address is "dest_mac_xlate" because + * we always know both ends of a connection by their translated + * addresses and not their public addresses. + */ + original_cm->match_dev = src_dev; + original_cm->match_protocol = sic->protocol; + original_cm->match_src_ip = sic->src_ip.ip; + original_cm->match_src_port = sic->src_port; + original_cm->match_dest_ip = sic->dest_ip.ip; + original_cm->match_dest_port = sic->dest_port; + original_cm->xlate_src_ip = sic->src_ip_xlate.ip; + original_cm->xlate_src_port = sic->src_port_xlate; + original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip; + original_cm->xlate_dest_port = sic->dest_port_xlate; + original_cm->rx_packet_count = 0; + original_cm->rx_packet_count64 = 0; + original_cm->rx_byte_count = 0; + original_cm->rx_byte_count64 = 0; + original_cm->xmit_dev = dest_dev; + original_cm->xmit_dev_mtu = sic->dest_mtu; + memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); + memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); + original_cm->connection = c; + original_cm->counter_match = reply_cm; + original_cm->flags = 0; + if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { + original_cm->priority = sic->src_priority; + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; + } + if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { + original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT; + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; + } +#ifdef CONFIG_NF_FLOW_COOKIE + original_cm->flow_cookie = 0; +#endif +#ifdef CONFIG_XFRM + original_cm->flow_accel = sic->original_accel; +#endif + original_cm->active_next = NULL; + original_cm->active_prev = NULL; + original_cm->active = false; + + /* + * For PPP links we don't write an L2 header. For everything else we do. + */ + if (!(dest_dev->flags & IFF_POINTOPOINT)) { + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; + + /* + * If our dev writes Ethernet headers then we can write a really fast + * version. + */ + if (dest_dev->header_ops) { + if (dest_dev->header_ops->create == eth_header) { + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; + } + } + } + + /* + * Fill in the "reply" direction connection matching object. + */ + reply_cm->match_dev = dest_dev; + reply_cm->match_protocol = sic->protocol; + reply_cm->match_src_ip = sic->dest_ip_xlate.ip; + reply_cm->match_src_port = sic->dest_port_xlate; + reply_cm->match_dest_ip = sic->src_ip_xlate.ip; + reply_cm->match_dest_port = sic->src_port_xlate; + reply_cm->xlate_src_ip = sic->dest_ip.ip; + reply_cm->xlate_src_port = sic->dest_port; + reply_cm->xlate_dest_ip = sic->src_ip.ip; + reply_cm->xlate_dest_port = sic->src_port; + reply_cm->rx_packet_count = 0; + reply_cm->rx_packet_count64 = 0; + reply_cm->rx_byte_count = 0; + reply_cm->rx_byte_count64 = 0; + reply_cm->xmit_dev = src_dev; + reply_cm->xmit_dev_mtu = sic->src_mtu; + memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); + memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); + reply_cm->connection = c; + reply_cm->counter_match = original_cm; + reply_cm->flags = 0; + if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { + reply_cm->priority = sic->dest_priority; + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; + } + if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { + reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT; + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK; + } +#ifdef CONFIG_NF_FLOW_COOKIE + reply_cm->flow_cookie = 0; +#endif +#ifdef CONFIG_XFRM + reply_cm->flow_accel = sic->reply_accel; +#endif + reply_cm->active_next = NULL; + reply_cm->active_prev = NULL; + reply_cm->active = false; + + /* + * For PPP links we don't write an L2 header. For everything else we do. + */ + if (!(src_dev->flags & IFF_POINTOPOINT)) { + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; + + /* + * If our dev writes Ethernet headers then we can write a really fast + * version. + */ + if (src_dev->header_ops) { + if (src_dev->header_ops->create == eth_header) { + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; + } + } + } + + + if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) { + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; + } + + if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) { + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC; + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST; + } + + c->protocol = sic->protocol; + c->src_ip = sic->src_ip.ip; + c->src_ip_xlate = sic->src_ip_xlate.ip; + c->src_port = sic->src_port; + c->src_port_xlate = sic->src_port_xlate; + c->original_dev = src_dev; + c->original_match = original_cm; + c->dest_ip = sic->dest_ip.ip; + c->dest_ip_xlate = sic->dest_ip_xlate.ip; + c->dest_port = sic->dest_port; + c->dest_port_xlate = sic->dest_port_xlate; + c->reply_dev = dest_dev; + c->reply_match = reply_cm; + c->mark = sic->mark; + c->debug_read_seq = 0; + c->last_sync_jiffies = get_jiffies_64(); + + /* + * Take hold of our source and dest devices for the duration of the connection. + */ + dev_hold(c->original_dev); + dev_hold(c->reply_dev); + + /* + * Initialize the protocol-specific information that we track. + */ + switch (sic->protocol) { + case IPPROTO_TCP: + original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; + original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; + original_cm->protocol_state.tcp.end = sic->src_td_end; + original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; + reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; + reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; + reply_cm->protocol_state.tcp.end = sic->dest_td_end; + reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; + if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { + original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + } + break; + } + + sfe_ipv4_connection_match_compute_translations(original_cm); + sfe_ipv4_connection_match_compute_translations(reply_cm); + sfe_ipv4_insert_sfe_ipv4_connection(si, c); + + spin_unlock_bh(&si->lock); + + /* + * We have everything we need! + */ + DEBUG_INFO("new connection - mark: %08x, p: %d\n" + " s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n" + " d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n", + sic->mark, sic->protocol, + sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, + &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate), + dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, + &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); + + return 0; +} + +/* + * sfe_ipv4_destroy_rule() + * Destroy a forwarding rule. + */ +void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid) +{ + struct sfe_ipv4 *si = &__si; + struct sfe_ipv4_connection *c; + + spin_lock_bh(&si->lock); + si->connection_destroy_requests++; + + /* + * Check to see if we have a flow that matches the rule we're trying + * to destroy. If there isn't then we can't destroy it. + */ + c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port, + sid->dest_ip.ip, sid->dest_port); + if (!c) { + si->connection_destroy_misses++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n", + sid->protocol, &sid->src_ip, ntohs(sid->src_port), + &sid->dest_ip, ntohs(sid->dest_port)); + return; + } + + /* + * Remove our connection details from the hash tables. + */ + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + spin_unlock_bh(&si->lock); + + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); + + DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n", + sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port), + &sid->dest_ip.ip, ntohs(sid->dest_port)); +} + +/* + * sfe_ipv4_register_sync_rule_callback() + * Register a callback for rule synchronization. + */ +void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) +{ + struct sfe_ipv4 *si = &__si; + + spin_lock_bh(&si->lock); + rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); + spin_unlock_bh(&si->lock); +} + +/* + * sfe_ipv4_get_debug_dev() + */ +static ssize_t sfe_ipv4_get_debug_dev(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct sfe_ipv4 *si = &__si; + ssize_t count; + int num; + + spin_lock_bh(&si->lock); + num = si->debug_dev; + spin_unlock_bh(&si->lock); + + count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); + return count; +} + +/* + * sysfs attributes. + */ +static const struct device_attribute sfe_ipv4_debug_dev_attr = + __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL); + +/* + * sfe_ipv4_destroy_all_rules_for_dev() + * Destroy all connections that match a particular device. + * + * If we pass dev as NULL then this destroys all connections. + */ +void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev) +{ + struct sfe_ipv4 *si = &__si; + struct sfe_ipv4_connection *c; + +another_round: + spin_lock_bh(&si->lock); + + for (c = si->all_connections_head; c; c = c->all_connections_next) { + /* + * Does this connection relate to the device we are destroying? + */ + if (!dev + || (dev == c->original_dev) + || (dev == c->reply_dev)) { + break; + } + } + + if (c) { + sfe_ipv4_remove_sfe_ipv4_connection(si, c); + } + + spin_unlock_bh(&si->lock); + + if (c) { + sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY); + goto another_round; + } +} + +/* + * sfe_ipv4_periodic_sync() + */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) +static void sfe_ipv4_periodic_sync(struct timer_list *arg) +#else +static void sfe_ipv4_periodic_sync(unsigned long arg) +#endif /*KERNEL_VERSION(4, 15, 0)*/ +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) + struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg->cust_data; +#else + struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg; +#endif /*KERNEL_VERSION(4, 15, 0)*/ + u64 now_jiffies; + int quota; + sfe_sync_rule_callback_t sync_rule_callback; + + now_jiffies = get_jiffies_64(); + + rcu_read_lock(); + sync_rule_callback = rcu_dereference(si->sync_rule_callback); + if (!sync_rule_callback) { + rcu_read_unlock(); + goto done; + } + + spin_lock_bh(&si->lock); + sfe_ipv4_update_summary_stats(si); + + /* + * Get an estimate of the number of connections to parse in this sync. + */ + quota = (si->num_connections + 63) / 64; + + /* + * Walk the "active" list and sync the connection state. + */ + while (quota--) { + struct sfe_ipv4_connection_match *cm; + struct sfe_ipv4_connection_match *counter_cm; + struct sfe_ipv4_connection *c; + struct sfe_connection_sync sis; + + cm = si->active_head; + if (!cm) { + break; + } + + /* + * There's a possibility that our counter match is in the active list too. + * If it is then remove it. + */ + counter_cm = cm->counter_match; + if (counter_cm->active) { + counter_cm->active = false; + + /* + * We must have a connection preceding this counter match + * because that's the one that got us to this point, so we don't have + * to worry about removing the head of the list. + */ + counter_cm->active_prev->active_next = counter_cm->active_next; + + if (likely(counter_cm->active_next)) { + counter_cm->active_next->active_prev = counter_cm->active_prev; + } else { + si->active_tail = counter_cm->active_prev; + } + + counter_cm->active_next = NULL; + counter_cm->active_prev = NULL; + } + + /* + * Now remove the head of the active scan list. + */ + cm->active = false; + si->active_head = cm->active_next; + if (likely(cm->active_next)) { + cm->active_next->active_prev = NULL; + } else { + si->active_tail = NULL; + } + cm->active_next = NULL; + + /* + * Sync the connection state. + */ + c = cm->connection; + sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); + + /* + * We don't want to be holding the lock when we sync! + */ + spin_unlock_bh(&si->lock); + sync_rule_callback(&sis); + spin_lock_bh(&si->lock); + } + + spin_unlock_bh(&si->lock); + rcu_read_unlock(); + +done: + mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); +} + +#define CHAR_DEV_MSG_SIZE 768 + +/* + * sfe_ipv4_debug_dev_read_start() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + si->debug_read_seq++; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_connections_start() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_connections_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_connections_connection() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + struct sfe_ipv4_connection *c; + struct sfe_ipv4_connection_match *original_cm; + struct sfe_ipv4_connection_match *reply_cm; + int bytes_read; + int protocol; + struct net_device *src_dev; + __be32 src_ip; + __be32 src_ip_xlate; + __be16 src_port; + __be16 src_port_xlate; + u64 src_rx_packets; + u64 src_rx_bytes; + struct net_device *dest_dev; + __be32 dest_ip; + __be32 dest_ip_xlate; + __be16 dest_port; + __be16 dest_port_xlate; + u64 dest_rx_packets; + u64 dest_rx_bytes; + u64 last_sync_jiffies; + u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; +#ifdef CONFIG_NF_FLOW_COOKIE + int src_flow_cookie, dst_flow_cookie; +#endif + + spin_lock_bh(&si->lock); + + for (c = si->all_connections_head; c; c = c->all_connections_next) { + if (c->debug_read_seq < si->debug_read_seq) { + c->debug_read_seq = si->debug_read_seq; + break; + } + } + + /* + * If there were no connections then move to the next state. + */ + if (!c) { + spin_unlock_bh(&si->lock); + ws->state++; + return true; + } + + original_cm = c->original_match; + reply_cm = c->reply_match; + + protocol = c->protocol; + src_dev = c->original_dev; + src_ip = c->src_ip; + src_ip_xlate = c->src_ip_xlate; + src_port = c->src_port; + src_port_xlate = c->src_port_xlate; + src_priority = original_cm->priority; + src_dscp = original_cm->dscp >> SFE_IPV4_DSCP_SHIFT; + + sfe_ipv4_connection_match_update_summary_stats(original_cm); + sfe_ipv4_connection_match_update_summary_stats(reply_cm); + + src_rx_packets = original_cm->rx_packet_count64; + src_rx_bytes = original_cm->rx_byte_count64; + dest_dev = c->reply_dev; + dest_ip = c->dest_ip; + dest_ip_xlate = c->dest_ip_xlate; + dest_port = c->dest_port; + dest_port_xlate = c->dest_port_xlate; + dest_priority = reply_cm->priority; + dest_dscp = reply_cm->dscp >> SFE_IPV4_DSCP_SHIFT; + dest_rx_packets = reply_cm->rx_packet_count64; + dest_rx_bytes = reply_cm->rx_byte_count64; + last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; + mark = c->mark; +#ifdef CONFIG_NF_FLOW_COOKIE + src_flow_cookie = original_cm->flow_cookie; + dst_flow_cookie = reply_cm->flow_cookie; +#endif + spin_unlock_bh(&si->lock); + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", + protocol, + src_dev->name, + &src_ip, &src_ip_xlate, + ntohs(src_port), ntohs(src_port_xlate), + src_priority, src_dscp, + src_rx_packets, src_rx_bytes, + dest_dev->name, + &dest_ip, &dest_ip_xlate, + ntohs(dest_port), ntohs(dest_port_xlate), + dest_priority, dest_dscp, + dest_rx_packets, dest_rx_bytes, +#ifdef CONFIG_NF_FLOW_COOKIE + src_flow_cookie, dst_flow_cookie, +#endif + last_sync_jiffies, mark); + + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + return true; +} + +/* + * sfe_ipv4_debug_dev_read_connections_end() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_connections_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_exceptions_start() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_exceptions_start(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_exceptions_exception() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_exceptions_exception(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + u64 ct; + + spin_lock_bh(&si->lock); + ct = si->exception_events64[ws->iter_exception]; + spin_unlock_bh(&si->lock); + + if (ct) { + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, + "\t\t\n", + sfe_ipv4_exception_events_string[ws->iter_exception], + ct); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + } + + ws->iter_exception++; + if (ws->iter_exception >= SFE_IPV4_EXCEPTION_EVENT_LAST) { + ws->iter_exception = 0; + ws->state++; + } + + return true; +} + +/* + * sfe_ipv4_debug_dev_read_exceptions_end() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_exceptions_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_stats() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_stats(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + unsigned int num_connections; + u64 packets_forwarded; + u64 packets_not_forwarded; + u64 connection_create_requests; + u64 connection_create_collisions; + u64 connection_destroy_requests; + u64 connection_destroy_misses; + u64 connection_flushes; + u64 connection_match_hash_hits; + u64 connection_match_hash_reorders; + + spin_lock_bh(&si->lock); + sfe_ipv4_update_summary_stats(si); + + num_connections = si->num_connections; + packets_forwarded = si->packets_forwarded64; + packets_not_forwarded = si->packets_not_forwarded64; + connection_create_requests = si->connection_create_requests64; + connection_create_collisions = si->connection_create_collisions64; + connection_destroy_requests = si->connection_destroy_requests64; + connection_destroy_misses = si->connection_destroy_misses64; + connection_flushes = si->connection_flushes64; + connection_match_hash_hits = si->connection_match_hash_hits64; + connection_match_hash_reorders = si->connection_match_hash_reorders64; + spin_unlock_bh(&si->lock); + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", + num_connections, + packets_forwarded, + packets_not_forwarded, + connection_create_requests, + connection_create_collisions, + connection_destroy_requests, + connection_destroy_misses, + connection_flushes, + connection_match_hash_hits, + connection_match_hash_reorders); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv4_debug_dev_read_end() + * Generate part of the XML output. + */ +static bool sfe_ipv4_debug_dev_read_end(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv4_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * Array of write functions that write various XML elements that correspond to + * our XML output state machine. + */ +static sfe_ipv4_debug_xml_write_method_t sfe_ipv4_debug_xml_write_methods[SFE_IPV4_DEBUG_XML_STATE_DONE] = { + sfe_ipv4_debug_dev_read_start, + sfe_ipv4_debug_dev_read_connections_start, + sfe_ipv4_debug_dev_read_connections_connection, + sfe_ipv4_debug_dev_read_connections_end, + sfe_ipv4_debug_dev_read_exceptions_start, + sfe_ipv4_debug_dev_read_exceptions_exception, + sfe_ipv4_debug_dev_read_exceptions_end, + sfe_ipv4_debug_dev_read_stats, + sfe_ipv4_debug_dev_read_end, +}; + +/* + * sfe_ipv4_debug_dev_read() + * Send info to userspace upon read request from user + */ +static ssize_t sfe_ipv4_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) +{ + char msg[CHAR_DEV_MSG_SIZE]; + int total_read = 0; + struct sfe_ipv4_debug_xml_write_state *ws; + struct sfe_ipv4 *si = &__si; + + ws = (struct sfe_ipv4_debug_xml_write_state *)filp->private_data; + while ((ws->state != SFE_IPV4_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { + if ((sfe_ipv4_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { + continue; + } + } + + return total_read; +} + +/* + * sfe_ipv4_debug_dev_write() + * Write to char device resets some stats + */ +static ssize_t sfe_ipv4_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) +{ + struct sfe_ipv4 *si = &__si; + + spin_lock_bh(&si->lock); + sfe_ipv4_update_summary_stats(si); + + si->packets_forwarded64 = 0; + si->packets_not_forwarded64 = 0; + si->connection_create_requests64 = 0; + si->connection_create_collisions64 = 0; + si->connection_destroy_requests64 = 0; + si->connection_destroy_misses64 = 0; + si->connection_flushes64 = 0; + si->connection_match_hash_hits64 = 0; + si->connection_match_hash_reorders64 = 0; + spin_unlock_bh(&si->lock); + + return length; +} + +/* + * sfe_ipv4_debug_dev_open() + */ +static int sfe_ipv4_debug_dev_open(struct inode *inode, struct file *file) +{ + struct sfe_ipv4_debug_xml_write_state *ws; + + ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; + if (!ws) { + ws = kzalloc(sizeof(struct sfe_ipv4_debug_xml_write_state), GFP_KERNEL); + if (!ws) { + return -ENOMEM; + } + + ws->state = SFE_IPV4_DEBUG_XML_STATE_START; + file->private_data = ws; + } + + return 0; +} + +/* + * sfe_ipv4_debug_dev_release() + */ +static int sfe_ipv4_debug_dev_release(struct inode *inode, struct file *file) +{ + struct sfe_ipv4_debug_xml_write_state *ws; + + ws = (struct sfe_ipv4_debug_xml_write_state *)file->private_data; + if (ws) { + /* + * We've finished with our output so free the write state. + */ + kfree(ws); + } + + return 0; +} + +/* + * File operations used in the debug char device + */ +static struct file_operations sfe_ipv4_debug_dev_fops = { + .read = sfe_ipv4_debug_dev_read, + .write = sfe_ipv4_debug_dev_write, + .open = sfe_ipv4_debug_dev_open, + .release = sfe_ipv4_debug_dev_release +}; + +#ifdef CONFIG_NF_FLOW_COOKIE +/* + * sfe_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb) +{ + struct sfe_ipv4 *si = &__si; + + BUG_ON(!cb); + + if (si->flow_cookie_set_func) { + return -1; + } + + rcu_assign_pointer(si->flow_cookie_set_func, cb); + return 0; +} + +/* + * sfe_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb) +{ + struct sfe_ipv4 *si = &__si; + + RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); + return 0; +} + +/* + * sfe_ipv4_get_flow_cookie() + */ +static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct sfe_ipv4 *si = &__si; + return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); +} + +/* + * sfe_ipv4_set_flow_cookie() + */ +static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct sfe_ipv4 *si = &__si; + strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); + + return size; +} + +/* + * sysfs attributes. + */ +static const struct device_attribute sfe_ipv4_flow_cookie_attr = + __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie); +#endif /*CONFIG_NF_FLOW_COOKIE*/ + +/* + * sfe_ipv4_init() + */ +static int __init sfe_ipv4_init(void) +{ + struct sfe_ipv4 *si = &__si; + int result = -1; + + DEBUG_INFO("SFE IPv4 init\n"); + + /* + * Create sys/sfe_ipv4 + */ + si->sys_sfe_ipv4 = kobject_create_and_add("sfe_ipv4", NULL); + if (!si->sys_sfe_ipv4) { + DEBUG_ERROR("failed to register sfe_ipv4\n"); + goto exit1; + } + + /* + * Create files, one for each parameter supported by this module. + */ + result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); + if (result) { + DEBUG_ERROR("failed to register debug dev file: %d\n", result); + goto exit2; + } + +#ifdef CONFIG_NF_FLOW_COOKIE + result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); + if (result) { + DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); + goto exit3; + } +#endif /* CONFIG_NF_FLOW_COOKIE */ + + /* + * Register our debug char device. + */ + result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops); + if (result < 0) { + DEBUG_ERROR("Failed to register chrdev: %d\n", result); + goto exit4; + } + + si->debug_dev = result; + + /* + * Create a timer to handle periodic statistics. + */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) + timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0); + si->timer.cust_data = (unsigned long)si; +#else + setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si); +#endif /*KERNEL_VERSION(4, 15, 0)*/ + mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); + + spin_lock_init(&si->lock); + + return 0; + +exit4: +#ifdef CONFIG_NF_FLOW_COOKIE + sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); + +exit3: +#endif /* CONFIG_NF_FLOW_COOKIE */ + sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); + +exit2: + kobject_put(si->sys_sfe_ipv4); + +exit1: + return result; +} + +/* + * sfe_ipv4_exit() + */ +static void __exit sfe_ipv4_exit(void) +{ + struct sfe_ipv4 *si = &__si; + + DEBUG_INFO("SFE IPv4 exit\n"); + + /* + * Destroy all connections. + */ + sfe_ipv4_destroy_all_rules_for_dev(NULL); + + del_timer_sync(&si->timer); + + unregister_chrdev(si->debug_dev, "sfe_ipv4"); + +#ifdef CONFIG_NF_FLOW_COOKIE + sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr); +#endif /* CONFIG_NF_FLOW_COOKIE */ + sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr); + + kobject_put(si->sys_sfe_ipv4); + +} + +module_init(sfe_ipv4_init) +module_exit(sfe_ipv4_exit) + +EXPORT_SYMBOL(sfe_ipv4_recv); +EXPORT_SYMBOL(sfe_ipv4_create_rule); +EXPORT_SYMBOL(sfe_ipv4_destroy_rule); +EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev); +EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback); +EXPORT_SYMBOL(sfe_ipv4_mark_rule); +EXPORT_SYMBOL(sfe_ipv4_update_rule); +#ifdef CONFIG_NF_FLOW_COOKIE +EXPORT_SYMBOL(sfe_register_flow_cookie_cb); +EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb); +#endif + +MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/shortcut-fe/src/sfe_ipv6.c b/shortcut-fe/src/sfe_ipv6.c new file mode 100755 index 000000000..3c5ef1263 --- /dev/null +++ b/shortcut-fe/src/sfe_ipv6.c @@ -0,0 +1,3628 @@ +/* + * sfe_ipv6.c + * Shortcut forwarding engine - IPv6 support. + * + * Copyright (c) 2015-2016, 2019, The Linux Foundation. All rights reserved. + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that the + * above copyright notice and this permission notice appear in all copies. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sfe.h" +#include "sfe_cm.h" + +/* + * By default Linux IP header and transport layer header structures are + * unpacked, assuming that such headers should be 32-bit aligned. + * Unfortunately some wireless adaptors can't cope with this requirement and + * some CPUs can't handle misaligned accesses. For those platforms we + * define SFE_IPV6_UNALIGNED_IP_HEADER and mark the structures as packed. + * When we do this the compiler will generate slightly worse code than for the + * aligned case (on most platforms) but will be much quicker than fixing + * things up in an unaligned trap handler. + */ +#define SFE_IPV6_UNALIGNED_IP_HEADER 1 +#if SFE_IPV6_UNALIGNED_IP_HEADER +#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed)) +#else +#define SFE_IPV6_UNALIGNED_STRUCT +#endif + +#define CHAR_DEV_MSG_SIZE 768 + +/* + * An Ethernet header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV6_UNALIGNED_STRUCT) + */ +struct sfe_ipv6_eth_hdr { + __be16 h_dest[ETH_ALEN / 2]; + __be16 h_source[ETH_ALEN / 2]; + __be16 h_proto; +} SFE_IPV6_UNALIGNED_STRUCT; + +#define SFE_IPV6_DSCP_MASK 0xf03f +#define SFE_IPV6_DSCP_SHIFT 2 + +/* + * An IPv6 header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV6_UNALIGNED_STRUCT) + */ +struct sfe_ipv6_ip_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 priority:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:4, + priority:4; +#else +#error "Please fix " +#endif + __u8 flow_lbl[3]; + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; + struct sfe_ipv6_addr saddr; + struct sfe_ipv6_addr daddr; + + /* + * The extension header start here. + */ +} SFE_IPV6_UNALIGNED_STRUCT; + +#define SFE_IPV6_EXT_HDR_HOP 0 +#define SFE_IPV6_EXT_HDR_ROUTING 43 +#define SFE_IPV6_EXT_HDR_FRAG 44 +#define SFE_IPV6_EXT_HDR_ESP 50 +#define SFE_IPV6_EXT_HDR_AH 51 +#define SFE_IPV6_EXT_HDR_NONE 59 +#define SFE_IPV6_EXT_HDR_DST 60 +#define SFE_IPV6_EXT_HDR_MH 135 + +/* + * fragmentation header + */ + +struct sfe_ipv6_frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +#define SFE_IPV6_FRAG_OFFSET 0xfff8 + +/* + * generic IPv6 extension header + */ +struct sfe_ipv6_ext_hdr { + __u8 next_hdr; + __u8 hdr_len; + __u8 padding[6]; +} SFE_IPV6_UNALIGNED_STRUCT; + +/* + * A UDP header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV6_UNALIGNED_STRUCT) + */ +struct sfe_ipv6_udp_hdr { + __be16 source; + __be16 dest; + __be16 len; + __sum16 check; +} SFE_IPV6_UNALIGNED_STRUCT; + +/* + * A TCP header, but with an optional "packed" attribute to + * help with performance on some platforms (see the definition of + * SFE_IPV6_UNALIGNED_STRUCT) + */ +struct sfe_ipv6_tcp_hdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +} SFE_IPV6_UNALIGNED_STRUCT; + +/* + * Specifies the lower bound on ACK numbers carried in the TCP header + */ +#define SFE_IPV6_TCP_MAX_ACK_WINDOW 65520 + +/* + * IPv6 TCP connection match additional data. + */ +struct sfe_ipv6_tcp_connection_match { + u8 win_scale; /* Window scale */ + u32 max_win; /* Maximum window size seen */ + u32 end; /* Sequence number of the next byte to send (seq + segment length) */ + u32 max_end; /* Sequence number of the last byte to ack */ +}; + +/* + * Bit flags for IPv6 connection matching entry. + */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0) + /* Perform source translation */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1) + /* Perform destination translation */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2) + /* Ignore TCP sequence numbers */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3) + /* Fast Ethernet header write */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4) + /* Fast Ethernet header write */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5) + /* remark priority of SKB */ +#define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6) + /* remark DSCP of packet */ + +/* + * IPv6 connection matching structure. + */ +struct sfe_ipv6_connection_match { + /* + * References to other objects. + */ + struct sfe_ipv6_connection_match *next; + struct sfe_ipv6_connection_match *prev; + struct sfe_ipv6_connection *connection; + struct sfe_ipv6_connection_match *counter_match; + /* Matches the flow in the opposite direction as the one in connection */ + struct sfe_ipv6_connection_match *active_next; + struct sfe_ipv6_connection_match *active_prev; + bool active; /* Flag to indicate if we're on the active list */ + + /* + * Characteristics that identify flows that match this rule. + */ + struct net_device *match_dev; /* Network device */ + u8 match_protocol; /* Protocol */ + struct sfe_ipv6_addr match_src_ip[1]; /* Source IP address */ + struct sfe_ipv6_addr match_dest_ip[1]; /* Destination IP address */ + __be16 match_src_port; /* Source port/connection ident */ + __be16 match_dest_port; /* Destination port/connection ident */ + + /* + * Control the operations of the match. + */ + u32 flags; /* Bit flags */ +#ifdef CONFIG_NF_FLOW_COOKIE + u32 flow_cookie; /* used flow cookie, for debug */ +#endif +#ifdef CONFIG_XFRM + u32 flow_accel; /* The flow accelerated or not */ +#endif + + /* + * Connection state that we track once we match. + */ + union { /* Protocol-specific state */ + struct sfe_ipv6_tcp_connection_match tcp; + } protocol_state; + /* + * Stats recorded in a sync period. These stats will be added to + * rx_packet_count64/rx_byte_count64 after a sync period. + */ + u32 rx_packet_count; + u32 rx_byte_count; + + /* + * Packet translation information. + */ + struct sfe_ipv6_addr xlate_src_ip[1]; /* Address after source translation */ + __be16 xlate_src_port; /* Port/connection ident after source translation */ + u16 xlate_src_csum_adjustment; + /* Transport layer checksum adjustment after source translation */ + struct sfe_ipv6_addr xlate_dest_ip[1]; /* Address after destination translation */ + __be16 xlate_dest_port; /* Port/connection ident after destination translation */ + u16 xlate_dest_csum_adjustment; + /* Transport layer checksum adjustment after destination translation */ + + /* + * QoS information + */ + u32 priority; + u32 dscp; + + /* + * Packet transmit information. + */ + struct net_device *xmit_dev; /* Network device on which to transmit */ + unsigned short int xmit_dev_mtu; + /* Interface MTU */ + u16 xmit_dest_mac[ETH_ALEN / 2]; + /* Destination MAC address to use when forwarding */ + u16 xmit_src_mac[ETH_ALEN / 2]; + /* Source MAC address to use when forwarding */ + + /* + * Summary stats. + */ + u64 rx_packet_count64; + u64 rx_byte_count64; +}; + +/* + * Per-connection data structure. + */ +struct sfe_ipv6_connection { + struct sfe_ipv6_connection *next; + /* Pointer to the next entry in a hash chain */ + struct sfe_ipv6_connection *prev; + /* Pointer to the previous entry in a hash chain */ + int protocol; /* IP protocol number */ + struct sfe_ipv6_addr src_ip[1]; /* Src IP addr pre-translation */ + struct sfe_ipv6_addr src_ip_xlate[1]; /* Src IP addr post-translation */ + struct sfe_ipv6_addr dest_ip[1]; /* Dest IP addr pre-translation */ + struct sfe_ipv6_addr dest_ip_xlate[1]; /* Dest IP addr post-translation */ + __be16 src_port; /* Src port pre-translation */ + __be16 src_port_xlate; /* Src port post-translation */ + __be16 dest_port; /* Dest port pre-translation */ + __be16 dest_port_xlate; /* Dest port post-translation */ + struct sfe_ipv6_connection_match *original_match; + /* Original direction matching structure */ + struct net_device *original_dev; + /* Original direction source device */ + struct sfe_ipv6_connection_match *reply_match; + /* Reply direction matching structure */ + struct net_device *reply_dev; /* Reply direction source device */ + u64 last_sync_jiffies; /* Jiffies count for the last sync */ + struct sfe_ipv6_connection *all_connections_next; + /* Pointer to the next entry in the list of all connections */ + struct sfe_ipv6_connection *all_connections_prev; + /* Pointer to the previous entry in the list of all connections */ + u32 mark; /* mark for outgoing packet */ + u32 debug_read_seq; /* sequence number for debug dump */ +}; + +/* + * IPv6 connections and hash table size information. + */ +#define SFE_IPV6_CONNECTION_HASH_SHIFT 12 +#define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT) +#define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1) + +#ifdef CONFIG_NF_FLOW_COOKIE +#define SFE_FLOW_COOKIE_SIZE 2048 +#define SFE_FLOW_COOKIE_MASK 0x7ff + +struct sfe_ipv6_flow_cookie_entry { + struct sfe_ipv6_connection_match *match; + unsigned long last_clean_time; +}; +#endif + +enum sfe_ipv6_exception_events { + SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION, + SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT, + SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL, + SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION, + SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS, + SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS, + SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT, + SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL, + SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION, + SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS, + SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE, + SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS, + SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK, + SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS, + SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE, + SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE, + SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_IP_OPTIONS_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UDP_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_TCP_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL, + SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION, + SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION, + SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_BAD_TOTAL_LENGTH, + SFE_IPV6_EXCEPTION_EVENT_NON_V6, + SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT, + SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE, + SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL, + SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL, + SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR, + SFE_IPV6_EXCEPTION_EVENT_LAST +}; + +static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = { + "UDP_HEADER_INCOMPLETE", + "UDP_NO_CONNECTION", + "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT", + "UDP_SMALL_TTL", + "UDP_NEEDS_FRAGMENTATION", + "TCP_HEADER_INCOMPLETE", + "TCP_NO_CONNECTION_SLOW_FLAGS", + "TCP_NO_CONNECTION_FAST_FLAGS", + "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT", + "TCP_SMALL_TTL", + "TCP_NEEDS_FRAGMENTATION", + "TCP_FLAGS", + "TCP_SEQ_EXCEEDS_RIGHT_EDGE", + "TCP_SMALL_DATA_OFFS", + "TCP_BAD_SACK", + "TCP_BIG_DATA_OFFS", + "TCP_SEQ_BEFORE_LEFT_EDGE", + "TCP_ACK_EXCEEDS_RIGHT_EDGE", + "TCP_ACK_BEFORE_LEFT_EDGE", + "ICMP_HEADER_INCOMPLETE", + "ICMP_UNHANDLED_TYPE", + "ICMP_IPV6_HEADER_INCOMPLETE", + "ICMP_IPV6_NON_V6", + "ICMP_IPV6_IP_OPTIONS_INCOMPLETE", + "ICMP_IPV6_UDP_HEADER_INCOMPLETE", + "ICMP_IPV6_TCP_HEADER_INCOMPLETE", + "ICMP_IPV6_UNHANDLED_PROTOCOL", + "ICMP_NO_CONNECTION", + "ICMP_FLUSHED_CONNECTION", + "HEADER_INCOMPLETE", + "BAD_TOTAL_LENGTH", + "NON_V6", + "NON_INITIAL_FRAGMENT", + "DATAGRAM_INCOMPLETE", + "IP_OPTIONS_INCOMPLETE", + "UNHANDLED_PROTOCOL", + "FLOW_COOKIE_ADD_FAIL", + "CLONED_SKB_UNSHARE_ERROR" +}; + +/* + * Per-module structure. + */ +struct sfe_ipv6 { + spinlock_t lock; /* Lock for SMP correctness */ + struct sfe_ipv6_connection_match *active_head; + /* Head of the list of recently active connections */ + struct sfe_ipv6_connection_match *active_tail; + /* Tail of the list of recently active connections */ + struct sfe_ipv6_connection *all_connections_head; + /* Head of the list of all connections */ + struct sfe_ipv6_connection *all_connections_tail; + /* Tail of the list of all connections */ + unsigned int num_connections; /* Number of connections */ + struct timer_list timer; /* Timer used for periodic sync ops */ + sfe_sync_rule_callback_t __rcu sync_rule_callback; + /* Callback function registered by a connection manager for stats syncing */ + struct sfe_ipv6_connection *conn_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; + /* Connection hash table */ + struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE]; + /* Connection match hash table */ +#ifdef CONFIG_NF_FLOW_COOKIE + struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE]; + /* flow cookie table*/ + sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func; + /* function used to configure flow cookie in hardware*/ + int flow_cookie_enable; + /* Enable/disable flow cookie at runtime */ +#endif + + /* + * Stats recorded in a sync period. These stats will be added to + * connection_xxx64 after a sync period. + */ + u32 connection_create_requests; + /* Number of IPv6 connection create requests */ + u32 connection_create_collisions; + /* Number of IPv6 connection create requests that collided with existing hash table entries */ + u32 connection_destroy_requests; + /* Number of IPv6 connection destroy requests */ + u32 connection_destroy_misses; + /* Number of IPv6 connection destroy requests that missed our hash table */ + u32 connection_match_hash_hits; + /* Number of IPv6 connection match hash hits */ + u32 connection_match_hash_reorders; + /* Number of IPv6 connection match hash reorders */ + u32 connection_flushes; /* Number of IPv6 connection flushes */ + u32 packets_forwarded; /* Number of IPv6 packets forwarded */ + u32 packets_not_forwarded; /* Number of IPv6 packets not forwarded */ + u32 exception_events[SFE_IPV6_EXCEPTION_EVENT_LAST]; + + /* + * Summary statistics. + */ + u64 connection_create_requests64; + /* Number of IPv6 connection create requests */ + u64 connection_create_collisions64; + /* Number of IPv6 connection create requests that collided with existing hash table entries */ + u64 connection_destroy_requests64; + /* Number of IPv6 connection destroy requests */ + u64 connection_destroy_misses64; + /* Number of IPv6 connection destroy requests that missed our hash table */ + u64 connection_match_hash_hits64; + /* Number of IPv6 connection match hash hits */ + u64 connection_match_hash_reorders64; + /* Number of IPv6 connection match hash reorders */ + u64 connection_flushes64; /* Number of IPv6 connection flushes */ + u64 packets_forwarded64; /* Number of IPv6 packets forwarded */ + u64 packets_not_forwarded64; + /* Number of IPv6 packets not forwarded */ + u64 exception_events64[SFE_IPV6_EXCEPTION_EVENT_LAST]; + + /* + * Control state. + */ + struct kobject *sys_sfe_ipv6; /* sysfs linkage */ + int debug_dev; /* Major number of the debug char device */ + u32 debug_read_seq; /* sequence number for debug dump */ +}; + +/* + * Enumeration of the XML output. + */ +enum sfe_ipv6_debug_xml_states { + SFE_IPV6_DEBUG_XML_STATE_START, + SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_START, + SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_CONNECTION, + SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_END, + SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_START, + SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION, + SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_END, + SFE_IPV6_DEBUG_XML_STATE_STATS, + SFE_IPV6_DEBUG_XML_STATE_END, + SFE_IPV6_DEBUG_XML_STATE_DONE +}; + +/* + * XML write state. + */ +struct sfe_ipv6_debug_xml_write_state { + enum sfe_ipv6_debug_xml_states state; + /* XML output file state machine state */ + int iter_exception; /* Next exception iterator */ +}; + +typedef bool (*sfe_ipv6_debug_xml_write_method_t)(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws); + +static struct sfe_ipv6 __si6; + +/* + * sfe_ipv6_get_debug_dev() + */ +static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf); + +/* + * sysfs attributes. + */ +static const struct device_attribute sfe_ipv6_debug_dev_attr = + __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv6_get_debug_dev, NULL); + +/* + * sfe_ipv6_is_ext_hdr() + * check if we recognize ipv6 extension header + */ +static inline bool sfe_ipv6_is_ext_hdr(u8 hdr) +{ + return (hdr == SFE_IPV6_EXT_HDR_HOP) || + (hdr == SFE_IPV6_EXT_HDR_ROUTING) || + (hdr == SFE_IPV6_EXT_HDR_FRAG) || + (hdr == SFE_IPV6_EXT_HDR_AH) || + (hdr == SFE_IPV6_EXT_HDR_DST) || + (hdr == SFE_IPV6_EXT_HDR_MH); +} + +/* + * sfe_ipv6_change_dsfield() + * change dscp field in IPv6 packet + */ +static inline void sfe_ipv6_change_dsfield(struct sfe_ipv6_ip_hdr *iph, u8 dscp) +{ + __be16 *p = (__be16 *)iph; + + *p = ((*p & htons(SFE_IPV6_DSCP_MASK)) | htons((u16)dscp << 4)); +} + +/* + * sfe_ipv6_get_connection_match_hash() + * Generate the hash used in connection match lookups. + */ +static inline unsigned int sfe_ipv6_get_connection_match_hash(struct net_device *dev, u8 protocol, + struct sfe_ipv6_addr *src_ip, __be16 src_port, + struct sfe_ipv6_addr *dest_ip, __be16 dest_port) +{ + u32 idx, hash = 0; + size_t dev_addr = (size_t)dev; + + for (idx = 0; idx < 4; idx++) { + hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; + } + hash = ((u32)dev_addr) ^ hash ^ protocol ^ ntohs(src_port ^ dest_port); + return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; +} + +/* + * sfe_ipv6_find_connection_match() + * Get the IPv6 flow match info that corresponds to a particular 5-tuple. + * + * On entry we must be holding the lock that protects the hash table. + */ +static struct sfe_ipv6_connection_match * +sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, u8 protocol, + struct sfe_ipv6_addr *src_ip, __be16 src_port, + struct sfe_ipv6_addr *dest_ip, __be16 dest_port) +{ + struct sfe_ipv6_connection_match *cm; + struct sfe_ipv6_connection_match *head; + unsigned int conn_match_idx; + + conn_match_idx = sfe_ipv6_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port); + cm = si->conn_match_hash[conn_match_idx]; + + /* + * If we don't have anything in this chain then bail. + */ + if (unlikely(!cm)) { + return NULL; + } + + /* + * Hopefully the first entry is the one we want. + */ + if ((cm->match_src_port == src_port) + && (cm->match_dest_port == dest_port) + && (sfe_ipv6_addr_equal(cm->match_src_ip, src_ip)) + && (sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip)) + && (cm->match_protocol == protocol) + && (cm->match_dev == dev)) { + si->connection_match_hash_hits++; + return cm; + } + + /* + * Unfortunately we didn't find it at head, so we search it in chain and + * move matching entry to the top of the hash chain. We presume that this + * will be reused again very quickly. + */ + head = cm; + do { + cm = cm->next; + } while (cm && (cm->match_src_port != src_port + || cm->match_dest_port != dest_port + || !sfe_ipv6_addr_equal(cm->match_src_ip, src_ip) + || !sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip) + || cm->match_protocol != protocol + || cm->match_dev != dev)); + + /* + * Not found then we're done. + */ + if (unlikely(!cm)) { + return NULL; + } + + /* + * We found a match so move it. + */ + if (cm->next) { + cm->next->prev = cm->prev; + } + cm->prev->next = cm->next; + cm->prev = NULL; + cm->next = head; + head->prev = cm; + si->conn_match_hash[conn_match_idx] = cm; + si->connection_match_hash_reorders++; + + return cm; +} + +/* + * sfe_ipv6_connection_match_update_summary_stats() + * Update the summary stats for a connection match entry. + */ +static inline void sfe_ipv6_connection_match_update_summary_stats(struct sfe_ipv6_connection_match *cm) +{ + cm->rx_packet_count64 += cm->rx_packet_count; + cm->rx_packet_count = 0; + cm->rx_byte_count64 += cm->rx_byte_count; + cm->rx_byte_count = 0; +} + +/* + * sfe_ipv6_connection_match_compute_translations() + * Compute port and address translations for a connection match entry. + */ +static void sfe_ipv6_connection_match_compute_translations(struct sfe_ipv6_connection_match *cm) +{ + u32 diff[9]; + u32 *idx_32; + u16 *idx_16; + + /* + * Before we insert the entry look to see if this is tagged as doing address + * translations. If it is then work out the adjustment that we need to apply + * to the transport checksum. + */ + if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC) { + u32 adj = 0; + u32 carry = 0; + + /* + * Precompute an incremental checksum adjustment so we can + * edit packets in this stream very quickly. The algorithm is from RFC1624. + */ + idx_32 = diff; + *(idx_32++) = cm->match_src_ip->addr[0]; + *(idx_32++) = cm->match_src_ip->addr[1]; + *(idx_32++) = cm->match_src_ip->addr[2]; + *(idx_32++) = cm->match_src_ip->addr[3]; + + idx_16 = (u16 *)idx_32; + *(idx_16++) = cm->match_src_port; + *(idx_16++) = ~cm->xlate_src_port; + idx_32 = (u32 *)idx_16; + + *(idx_32++) = ~cm->xlate_src_ip->addr[0]; + *(idx_32++) = ~cm->xlate_src_ip->addr[1]; + *(idx_32++) = ~cm->xlate_src_ip->addr[2]; + *(idx_32++) = ~cm->xlate_src_ip->addr[3]; + + /* + * When we compute this fold it down to a 16-bit offset + * as that way we can avoid having to do a double + * folding of the twos-complement result because the + * addition of 2 16-bit values cannot cause a double + * wrap-around! + */ + for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { + u32 w = *idx_32; + adj += carry; + adj += w; + carry = (w > adj); + } + adj += carry; + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_src_csum_adjustment = (u16)adj; + } + + if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST) { + u32 adj = 0; + u32 carry = 0; + + /* + * Precompute an incremental checksum adjustment so we can + * edit packets in this stream very quickly. The algorithm is from RFC1624. + */ + idx_32 = diff; + *(idx_32++) = cm->match_dest_ip->addr[0]; + *(idx_32++) = cm->match_dest_ip->addr[1]; + *(idx_32++) = cm->match_dest_ip->addr[2]; + *(idx_32++) = cm->match_dest_ip->addr[3]; + + idx_16 = (u16 *)idx_32; + *(idx_16++) = cm->match_dest_port; + *(idx_16++) = ~cm->xlate_dest_port; + idx_32 = (u32 *)idx_16; + + *(idx_32++) = ~cm->xlate_dest_ip->addr[0]; + *(idx_32++) = ~cm->xlate_dest_ip->addr[1]; + *(idx_32++) = ~cm->xlate_dest_ip->addr[2]; + *(idx_32++) = ~cm->xlate_dest_ip->addr[3]; + + /* + * When we compute this fold it down to a 16-bit offset + * as that way we can avoid having to do a double + * folding of the twos-complement result because the + * addition of 2 16-bit values cannot cause a double + * wrap-around! + */ + for (idx_32 = diff; idx_32 < diff + 9; idx_32++) { + u32 w = *idx_32; + adj += carry; + adj += w; + carry = (w > adj); + } + adj += carry; + adj = (adj & 0xffff) + (adj >> 16); + adj = (adj & 0xffff) + (adj >> 16); + cm->xlate_dest_csum_adjustment = (u16)adj; + } +} + +/* + * sfe_ipv6_update_summary_stats() + * Update the summary stats. + */ +static void sfe_ipv6_update_summary_stats(struct sfe_ipv6 *si) +{ + int i; + + si->connection_create_requests64 += si->connection_create_requests; + si->connection_create_requests = 0; + si->connection_create_collisions64 += si->connection_create_collisions; + si->connection_create_collisions = 0; + si->connection_destroy_requests64 += si->connection_destroy_requests; + si->connection_destroy_requests = 0; + si->connection_destroy_misses64 += si->connection_destroy_misses; + si->connection_destroy_misses = 0; + si->connection_match_hash_hits64 += si->connection_match_hash_hits; + si->connection_match_hash_hits = 0; + si->connection_match_hash_reorders64 += si->connection_match_hash_reorders; + si->connection_match_hash_reorders = 0; + si->connection_flushes64 += si->connection_flushes; + si->connection_flushes = 0; + si->packets_forwarded64 += si->packets_forwarded; + si->packets_forwarded = 0; + si->packets_not_forwarded64 += si->packets_not_forwarded; + si->packets_not_forwarded = 0; + + for (i = 0; i < SFE_IPV6_EXCEPTION_EVENT_LAST; i++) { + si->exception_events64[i] += si->exception_events[i]; + si->exception_events[i] = 0; + } +} + +/* + * sfe_ipv6_insert_connection_match() + * Insert a connection match into the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si, + struct sfe_ipv6_connection_match *cm) +{ + struct sfe_ipv6_connection_match **hash_head; + struct sfe_ipv6_connection_match *prev_head; + unsigned int conn_match_idx + = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, + cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port); + + hash_head = &si->conn_match_hash[conn_match_idx]; + prev_head = *hash_head; + cm->prev = NULL; + if (prev_head) { + prev_head->prev = cm; + } + + cm->next = prev_head; + *hash_head = cm; + +#ifdef CONFIG_NF_FLOW_COOKIE + if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST))) + return; + + /* + * Configure hardware to put a flow cookie in packet of this flow, + * then we can accelerate the lookup process when we received this packet. + */ + for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { + struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; + + if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) { + sfe_ipv6_flow_cookie_set_func_t func; + + rcu_read_lock(); + func = rcu_dereference(si->flow_cookie_set_func); + if (func) { + if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, + cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) { + entry->match = cm; + cm->flow_cookie = conn_match_idx; + } else { + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++; + } + } + rcu_read_unlock(); + + break; + } + } +#endif +} + +/* + * sfe_ipv6_remove_connection_match() + * Remove a connection match object from the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm) +{ +#ifdef CONFIG_NF_FLOW_COOKIE + if (si->flow_cookie_enable) { + /* + * Tell hardware that we no longer need a flow cookie in packet of this flow + */ + unsigned int conn_match_idx; + + for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) { + struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx]; + + if (cm == entry->match) { + sfe_ipv6_flow_cookie_set_func_t func; + + rcu_read_lock(); + func = rcu_dereference(si->flow_cookie_set_func); + if (func) { + func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port, + cm->match_dest_ip->addr, cm->match_dest_port, 0); + } + rcu_read_unlock(); + + cm->flow_cookie = 0; + entry->match = NULL; + entry->last_clean_time = jiffies; + break; + } + } + } +#endif + + /* + * Unlink the connection match entry from the hash. + */ + if (cm->prev) { + cm->prev->next = cm->next; + } else { + unsigned int conn_match_idx + = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol, + cm->match_src_ip, cm->match_src_port, + cm->match_dest_ip, cm->match_dest_port); + si->conn_match_hash[conn_match_idx] = cm->next; + } + + if (cm->next) { + cm->next->prev = cm->prev; + } + + /* + * If the connection match entry is in the active list remove it. + */ + if (cm->active) { + if (likely(cm->active_prev)) { + cm->active_prev->active_next = cm->active_next; + } else { + si->active_head = cm->active_next; + } + + if (likely(cm->active_next)) { + cm->active_next->active_prev = cm->active_prev; + } else { + si->active_tail = cm->active_prev; + } + } +} + +/* + * sfe_ipv6_get_connection_hash() + * Generate the hash used in connection lookups. + */ +static inline unsigned int sfe_ipv6_get_connection_hash(u8 protocol, struct sfe_ipv6_addr *src_ip, __be16 src_port, + struct sfe_ipv6_addr *dest_ip, __be16 dest_port) +{ + u32 idx, hash = 0; + + for (idx = 0; idx < 4; idx++) { + hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx]; + } + hash = hash ^ protocol ^ ntohs(src_port ^ dest_port); + return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK; +} + +/* + * sfe_ipv6_find_connection() + * Get the IPv6 connection info that corresponds to a particular 5-tuple. + * + * On entry we must be holding the lock that protects the hash table. + */ +static inline struct sfe_ipv6_connection *sfe_ipv6_find_connection(struct sfe_ipv6 *si, u32 protocol, + struct sfe_ipv6_addr *src_ip, __be16 src_port, + struct sfe_ipv6_addr *dest_ip, __be16 dest_port) +{ + struct sfe_ipv6_connection *c; + unsigned int conn_idx = sfe_ipv6_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port); + c = si->conn_hash[conn_idx]; + + /* + * If we don't have anything in this chain then bale. + */ + if (unlikely(!c)) { + return NULL; + } + + /* + * Hopefully the first entry is the one we want. + */ + if ((c->src_port == src_port) + && (c->dest_port == dest_port) + && (sfe_ipv6_addr_equal(c->src_ip, src_ip)) + && (sfe_ipv6_addr_equal(c->dest_ip, dest_ip)) + && (c->protocol == protocol)) { + return c; + } + + /* + * Unfortunately we didn't find it at head, so we search it in chain. + */ + do { + c = c->next; + } while (c && (c->src_port != src_port + || c->dest_port != dest_port + || !sfe_ipv6_addr_equal(c->src_ip, src_ip) + || !sfe_ipv6_addr_equal(c->dest_ip, dest_ip) + || c->protocol != protocol)); + + /* + * Will need connection entry for next create/destroy metadata, + * So no need to re-order entry for these requests + */ + return c; +} + +/* + * sfe_ipv6_mark_rule() + * Updates the mark for a current offloaded connection + * + * Will take hash lock upon entry + */ +void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark) +{ + struct sfe_ipv6 *si = &__si6; + struct sfe_ipv6_connection *c; + + spin_lock_bh(&si->lock); + c = sfe_ipv6_find_connection(si, mark->protocol, + mark->src_ip.ip6, mark->src_port, + mark->dest_ip.ip6, mark->dest_port); + if (c) { + WARN_ON((0 != c->mark) && (0 == mark->mark)); + c->mark = mark->mark; + } + spin_unlock_bh(&si->lock); + + if (c) { + DEBUG_TRACE("Matching connection found for mark, " + "setting from %08x to %08x\n", + c->mark, mark->mark); + } +} + +/* + * sfe_ipv6_insert_connection() + * Insert a connection into the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static void sfe_ipv6_insert_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) +{ + struct sfe_ipv6_connection **hash_head; + struct sfe_ipv6_connection *prev_head; + unsigned int conn_idx; + + /* + * Insert entry into the connection hash. + */ + conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, + c->dest_ip, c->dest_port); + hash_head = &si->conn_hash[conn_idx]; + prev_head = *hash_head; + c->prev = NULL; + if (prev_head) { + prev_head->prev = c; + } + + c->next = prev_head; + *hash_head = c; + + /* + * Insert entry into the "all connections" list. + */ + if (si->all_connections_tail) { + c->all_connections_prev = si->all_connections_tail; + si->all_connections_tail->all_connections_next = c; + } else { + c->all_connections_prev = NULL; + si->all_connections_head = c; + } + + si->all_connections_tail = c; + c->all_connections_next = NULL; + si->num_connections++; + + /* + * Insert the connection match objects too. + */ + sfe_ipv6_insert_connection_match(si, c->original_match); + sfe_ipv6_insert_connection_match(si, c->reply_match); +} + +/* + * sfe_ipv6_remove_connection() + * Remove a sfe_ipv6_connection object from the hash. + * + * On entry we must be holding the lock that protects the hash table. + */ +static void sfe_ipv6_remove_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c) +{ + /* + * Remove the connection match objects. + */ + sfe_ipv6_remove_connection_match(si, c->reply_match); + sfe_ipv6_remove_connection_match(si, c->original_match); + + /* + * Unlink the connection. + */ + if (c->prev) { + c->prev->next = c->next; + } else { + unsigned int conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port, + c->dest_ip, c->dest_port); + si->conn_hash[conn_idx] = c->next; + } + + if (c->next) { + c->next->prev = c->prev; + } + + /* + * Unlink connection from all_connections list + */ + if (c->all_connections_prev) { + c->all_connections_prev->all_connections_next = c->all_connections_next; + } else { + si->all_connections_head = c->all_connections_next; + } + + if (c->all_connections_next) { + c->all_connections_next->all_connections_prev = c->all_connections_prev; + } else { + si->all_connections_tail = c->all_connections_prev; + } + + si->num_connections--; +} + +/* + * sfe_ipv6_gen_sync_connection() + * Sync a connection. + * + * On entry to this function we expect that the lock for the connection is either + * already held or isn't required. + */ +static void sfe_ipv6_gen_sync_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c, + struct sfe_connection_sync *sis, sfe_sync_reason_t reason, + u64 now_jiffies) +{ + struct sfe_ipv6_connection_match *original_cm; + struct sfe_ipv6_connection_match *reply_cm; + + /* + * Fill in the update message. + */ + sis->is_v6 = 1; + sis->protocol = c->protocol; + sis->src_ip.ip6[0] = c->src_ip[0]; + sis->src_ip_xlate.ip6[0] = c->src_ip_xlate[0]; + sis->dest_ip.ip6[0] = c->dest_ip[0]; + sis->dest_ip_xlate.ip6[0] = c->dest_ip_xlate[0]; + sis->src_port = c->src_port; + sis->src_port_xlate = c->src_port_xlate; + sis->dest_port = c->dest_port; + sis->dest_port_xlate = c->dest_port_xlate; + + original_cm = c->original_match; + reply_cm = c->reply_match; + sis->src_td_max_window = original_cm->protocol_state.tcp.max_win; + sis->src_td_end = original_cm->protocol_state.tcp.end; + sis->src_td_max_end = original_cm->protocol_state.tcp.max_end; + sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win; + sis->dest_td_end = reply_cm->protocol_state.tcp.end; + sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end; + + sis->src_new_packet_count = original_cm->rx_packet_count; + sis->src_new_byte_count = original_cm->rx_byte_count; + sis->dest_new_packet_count = reply_cm->rx_packet_count; + sis->dest_new_byte_count = reply_cm->rx_byte_count; + + sfe_ipv6_connection_match_update_summary_stats(original_cm); + sfe_ipv6_connection_match_update_summary_stats(reply_cm); + + sis->src_dev = original_cm->match_dev; + sis->src_packet_count = original_cm->rx_packet_count64; + sis->src_byte_count = original_cm->rx_byte_count64; + + sis->dest_dev = reply_cm->match_dev; + sis->dest_packet_count = reply_cm->rx_packet_count64; + sis->dest_byte_count = reply_cm->rx_byte_count64; + + sis->reason = reason; + + /* + * Get the time increment since our last sync. + */ + sis->delta_jiffies = now_jiffies - c->last_sync_jiffies; + c->last_sync_jiffies = now_jiffies; +} + +/* + * sfe_ipv6_flush_connection() + * Flush a connection and free all associated resources. + * + * We need to be called with bottom halves disabled locally as we need to acquire + * the connection hash lock and release it again. In general we're actually called + * from within a BH and so we're fine, but we're also called when connections are + * torn down. + */ +static void sfe_ipv6_flush_connection(struct sfe_ipv6 *si, + struct sfe_ipv6_connection *c, + sfe_sync_reason_t reason) +{ + struct sfe_connection_sync sis; + u64 now_jiffies; + sfe_sync_rule_callback_t sync_rule_callback; + + rcu_read_lock(); + spin_lock_bh(&si->lock); + si->connection_flushes++; + sync_rule_callback = rcu_dereference(si->sync_rule_callback); + spin_unlock_bh(&si->lock); + + if (sync_rule_callback) { + /* + * Generate a sync message and then sync. + */ + now_jiffies = get_jiffies_64(); + sfe_ipv6_gen_sync_connection(si, c, &sis, reason, now_jiffies); + sync_rule_callback(&sis); + } + + rcu_read_unlock(); + + /* + * Release our hold of the source and dest devices and free the memory + * for our connection objects. + */ + dev_put(c->original_dev); + dev_put(c->reply_dev); + kfree(c->original_match); + kfree(c->reply_match); + kfree(c); +} + +/* + * sfe_ipv6_recv_udp() + * Handle UDP packet receives and forwarding. + */ +static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) +{ + struct sfe_ipv6_udp_hdr *udph; + struct sfe_ipv6_addr *src_ip; + struct sfe_ipv6_addr *dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv6_connection_match *cm; + struct net_device *xmit_dev; + + /* + * Is our packet too short to contain a valid UDP header? + */ + if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_udp_hdr) + ihl))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for UDP header\n"); + return 0; + } + + /* + * Read the IP address and port information. Read the IP header data first + * because we've almost certainly got that in the cache. We may not yet have + * the UDP header cached though so allow more time for any prefetching. + */ + src_ip = &iph->saddr; + dest_ip = &iph->daddr; + + udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); + src_port = udph->source; + dest_port = udph->dest; + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. + */ +#ifdef CONFIG_NF_FLOW_COOKIE + cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; + if (unlikely(!cm)) { + cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); + } +#else + cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port); +#endif + if (unlikely(!cm)) { + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found\n"); + return 0; + } + + /* + * If our packet has beern marked as "flush on find" we can't actually + * forward it in the fast path, but now that we've found an associated + * connection we can flush that out before we process the packet. + */ + if (unlikely(flush_on_find)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("flush on find\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + +#ifdef CONFIG_XFRM + /* + * We can't accelerate the flow on this direction, just let it go + * through the slow path. + */ + if (unlikely(!cm->flow_accel)) { + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + return 0; + } +#endif + + /* + * Does our hop_limit allow forwarding? + */ + if (unlikely(iph->hop_limit < 2)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("hop_limit too low\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * If our packet is larger than the MTU of the transmit interface then + * we can't forward it easily. + */ + if (unlikely(len > cm->xmit_dev_mtu)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("larger than mtu\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * From this point on we're good to modify the packet. + */ + + /* + * Check if skb was cloned. If it was, unshare it. Because + * the data area is going to be written in this path and we don't want to + * change the cloned skb's data section. + */ + if (unlikely(skb_cloned(skb))) { + DEBUG_TRACE("%p: skb is a cloned skb\n", skb); + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + DEBUG_WARN("Failed to unshare the cloned skb\n"); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + return 0; + } + + /* + * Update the iph and udph pointers with the unshared skb's data area. + */ + iph = (struct sfe_ipv6_ip_hdr *)skb->data; + udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); + } + + /* + * Update DSCP + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { + sfe_ipv6_change_dsfield(iph, cm->dscp); + } + + /* + * Decrement our hop_limit. + */ + iph->hop_limit -= 1; + + /* + * Do we have to perform translations of the source address/port? + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { + u16 udp_csum; + + iph->saddr = cm->xlate_src_ip[0]; + udph->source = cm->xlate_src_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + udp_csum = udph->check; + if (likely(udp_csum)) { + u32 sum = udp_csum + cm->xlate_src_csum_adjustment; + sum = (sum & 0xffff) + (sum >> 16); + udph->check = (u16)sum; + } + } + + /* + * Do we have to perform translations of the destination address/port? + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { + u16 udp_csum; + + iph->daddr = cm->xlate_dest_ip[0]; + udph->dest = cm->xlate_dest_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + udp_csum = udph->check; + if (likely(udp_csum)) { + u32 sum = udp_csum + cm->xlate_dest_csum_adjustment; + sum = (sum & 0xffff) + (sum >> 16); + udph->check = (u16)sum; + } + } + + /* + * Update traffic stats. + */ + cm->rx_packet_count++; + cm->rx_byte_count += len; + + /* + * If we're not already on the active list then insert ourselves at the tail + * of the current list. + */ + if (unlikely(!cm->active)) { + cm->active = true; + cm->active_prev = si->active_tail; + if (likely(si->active_tail)) { + si->active_tail->active_next = cm; + } else { + si->active_head = cm; + } + si->active_tail = cm; + } + + xmit_dev = cm->xmit_dev; + skb->dev = xmit_dev; + + /* + * Check to see if we need to write a header. + */ + if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { + if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { + dev_hard_header(skb, xmit_dev, ETH_P_IPV6, + cm->xmit_dest_mac, cm->xmit_src_mac, len); + } else { + /* + * For the simple case we write this really fast. + */ + struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); + eth->h_proto = htons(ETH_P_IPV6); + eth->h_dest[0] = cm->xmit_dest_mac[0]; + eth->h_dest[1] = cm->xmit_dest_mac[1]; + eth->h_dest[2] = cm->xmit_dest_mac[2]; + eth->h_source[0] = cm->xmit_src_mac[0]; + eth->h_source[1] = cm->xmit_src_mac[1]; + eth->h_source[2] = cm->xmit_src_mac[2]; + } + } + + /* + * Update priority of skb. + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { + skb->priority = cm->priority; + } + + /* + * Mark outgoing packet. + */ + skb->mark = cm->connection->mark; + if (skb->mark) { + DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); + } + + si->packets_forwarded++; + spin_unlock_bh(&si->lock); + + /* + * We're going to check for GSO flags when we transmit the packet so + * start fetching the necessary cache line now. + */ + prefetch(skb_shinfo(skb)); + + /* + * Mark that this packet has been fast forwarded. + */ + skb->fast_forwarded = 1; + + /* + * Send the packet on its way. + */ + dev_queue_xmit(skb); + + return 1; +} + +/* + * sfe_ipv6_process_tcp_option_sack() + * Parse TCP SACK option and update ack according + */ +static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const u32 data_offs, + u32 *ack) +{ + u32 length = sizeof(struct sfe_ipv6_tcp_hdr); + u8 *ptr = (u8 *)th + length; + + /* + * Ignore processing if TCP packet has only TIMESTAMP option. + */ + if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1) + && likely(ptr[0] == TCPOPT_NOP) + && likely(ptr[1] == TCPOPT_NOP) + && likely(ptr[2] == TCPOPT_TIMESTAMP) + && likely(ptr[3] == TCPOLEN_TIMESTAMP)) { + return true; + } + + /* + * TCP options. Parse SACK option. + */ + while (length < data_offs) { + u8 size; + u8 kind; + + ptr = (u8 *)th + length; + kind = *ptr; + + /* + * NOP, for padding + * Not in the switch because to fast escape and to not calculate size + */ + if (kind == TCPOPT_NOP) { + length++; + continue; + } + + if (kind == TCPOPT_SACK) { + u32 sack = 0; + u8 re = 1 + 1; + + size = *(ptr + 1); + if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK)) + || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK)) + || (size > (data_offs - length))) { + return false; + } + + re += 4; + while (re < size) { + u32 sack_re; + u8 *sptr = ptr + re; + sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3]; + if (sack_re > sack) { + sack = sack_re; + } + re += TCPOLEN_SACK_PERBLOCK; + } + if (sack > *ack) { + *ack = sack; + } + length += size; + continue; + } + if (kind == TCPOPT_EOL) { + return true; + } + size = *(ptr + 1); + if (size < 2) { + return false; + } + length += size; + } + + return true; +} + +/* + * sfe_ipv6_recv_tcp() + * Handle TCP packet receives and forwarding. + */ +static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find) +{ + struct sfe_ipv6_tcp_hdr *tcph; + struct sfe_ipv6_addr *src_ip; + struct sfe_ipv6_addr *dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv6_connection_match *cm; + struct sfe_ipv6_connection_match *counter_cm; + u32 flags; + struct net_device *xmit_dev; + + /* + * Is our packet too short to contain a valid UDP header? + */ + if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_tcp_hdr) + ihl))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for TCP header\n"); + return 0; + } + + /* + * Read the IP address and port information. Read the IP header data first + * because we've almost certainly got that in the cache. We may not yet have + * the TCP header cached though so allow more time for any prefetching. + */ + src_ip = &iph->saddr; + dest_ip = &iph->daddr; + + tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); + src_port = tcph->source; + dest_port = tcph->dest; + flags = tcp_flag_word(tcph); + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. + */ +#ifdef CONFIG_NF_FLOW_COOKIE + cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match; + if (unlikely(!cm)) { + cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); + } +#else + cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port); +#endif + if (unlikely(!cm)) { + /* + * We didn't get a connection but as TCP is connection-oriented that + * may be because this is a non-fast connection (not running established). + * For diagnostic purposes we differentiate this here. + */ + if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) { + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found - fast flags\n"); + return 0; + } + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found - slow flags: 0x%x\n", + flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); + return 0; + } + + /* + * If our packet has beern marked as "flush on find" we can't actually + * forward it in the fast path, but now that we've found an associated + * connection we can flush that out before we process the packet. + */ + if (unlikely(flush_on_find)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("flush on find\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + +#ifdef CONFIG_XFRM + /* + * We can't accelerate the flow on this direction, just let it go + * through the slow path. + */ + if (unlikely(!cm->flow_accel)) { + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + return 0; + } +#endif + + /* + * Does our hop_limit allow forwarding? + */ + if (unlikely(iph->hop_limit < 2)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("hop_limit too low\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * If our packet is larger than the MTU of the transmit interface then + * we can't forward it easily. + */ + if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("larger than mtu\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN + * set is not a fast path packet. + */ + if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP flags: 0x%x are not fast\n", + flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + counter_cm = cm->counter_match; + + /* + * Are we doing sequence number checking? + */ + if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) { + u32 seq; + u32 ack; + u32 sack; + u32 data_offs; + u32 end; + u32 left_edge; + u32 scaled_win; + u32 max_end; + + /* + * Is our sequence fully past the right hand edge of the window? + */ + seq = ntohl(tcph->seq); + if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("seq: %u exceeds right edge: %u\n", + seq, cm->protocol_state.tcp.max_end + 1); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Check that our TCP data offset isn't too short. + */ + data_offs = tcph->doff << 2; + if (unlikely(data_offs < sizeof(struct sfe_ipv6_tcp_hdr))) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Update ACK according to any SACK option. + */ + ack = ntohl(tcph->ack_seq); + sack = ack; + if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP option SACK size is wrong\n"); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Check that our TCP data offset isn't past the end of the packet. + */ + data_offs += sizeof(struct sfe_ipv6_ip_hdr); + if (unlikely(len < data_offs)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n", + data_offs, len); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + end = seq + len - data_offs; + + /* + * Is our sequence fully before the left hand edge of the window? + */ + if (unlikely((s32)(end - (cm->protocol_state.tcp.end + - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("seq: %u before left edge: %u\n", + end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Are we acking data that is to the right of what has been sent? + */ + if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ack: %u exceeds right edge: %u\n", + sack, counter_cm->protocol_state.tcp.end + 1); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Is our ack too far before the left hand edge of the window? + */ + left_edge = counter_cm->protocol_state.tcp.end + - cm->protocol_state.tcp.max_win + - SFE_IPV6_TCP_MAX_ACK_WINDOW + - 1; + if (unlikely((s32)(sack - left_edge) < 0)) { + struct sfe_ipv6_connection *c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge); + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; + } + + /* + * Have we just seen the largest window size yet for this connection? If yes + * then we need to record the new value. + */ + scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale; + scaled_win += (sack - ack); + if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) { + cm->protocol_state.tcp.max_win = scaled_win; + } + + /* + * If our sequence and/or ack numbers have advanced then record the new state. + */ + if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) { + cm->protocol_state.tcp.end = end; + } + + max_end = sack + scaled_win; + if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) { + counter_cm->protocol_state.tcp.max_end = max_end; + } + } + + /* + * From this point on we're good to modify the packet. + */ + + /* + * Check if skb was cloned. If it was, unshare it. Because + * the data area is going to be written in this path and we don't want to + * change the cloned skb's data section. + */ + if (unlikely(skb_cloned(skb))) { + DEBUG_TRACE("%p: skb is a cloned skb\n", skb); + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + DEBUG_WARN("Failed to unshare the cloned skb\n"); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + return 0; + } + + /* + * Update the iph and tcph pointers with the unshared skb's data area. + */ + iph = (struct sfe_ipv6_ip_hdr *)skb->data; + tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); + } + + /* + * Update DSCP + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) { + sfe_ipv6_change_dsfield(iph, cm->dscp); + } + + /* + * Decrement our hop_limit. + */ + iph->hop_limit -= 1; + + /* + * Do we have to perform translations of the source address/port? + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) { + u16 tcp_csum; + u32 sum; + + iph->saddr = cm->xlate_src_ip[0]; + tcph->source = cm->xlate_src_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + tcp_csum = tcph->check; + sum = tcp_csum + cm->xlate_src_csum_adjustment; + sum = (sum & 0xffff) + (sum >> 16); + tcph->check = (u16)sum; + } + + /* + * Do we have to perform translations of the destination address/port? + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) { + u16 tcp_csum; + u32 sum; + + iph->daddr = cm->xlate_dest_ip[0]; + tcph->dest = cm->xlate_dest_port; + + /* + * Do we have a non-zero UDP checksum? If we do then we need + * to update it. + */ + tcp_csum = tcph->check; + sum = tcp_csum + cm->xlate_dest_csum_adjustment; + sum = (sum & 0xffff) + (sum >> 16); + tcph->check = (u16)sum; + } + + /* + * Update traffic stats. + */ + cm->rx_packet_count++; + cm->rx_byte_count += len; + + /* + * If we're not already on the active list then insert ourselves at the tail + * of the current list. + */ + if (unlikely(!cm->active)) { + cm->active = true; + cm->active_prev = si->active_tail; + if (likely(si->active_tail)) { + si->active_tail->active_next = cm; + } else { + si->active_head = cm; + } + si->active_tail = cm; + } + + xmit_dev = cm->xmit_dev; + skb->dev = xmit_dev; + + /* + * Check to see if we need to write a header. + */ + if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) { + if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) { + dev_hard_header(skb, xmit_dev, ETH_P_IPV6, + cm->xmit_dest_mac, cm->xmit_src_mac, len); + } else { + /* + * For the simple case we write this really fast. + */ + struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN); + eth->h_proto = htons(ETH_P_IPV6); + eth->h_dest[0] = cm->xmit_dest_mac[0]; + eth->h_dest[1] = cm->xmit_dest_mac[1]; + eth->h_dest[2] = cm->xmit_dest_mac[2]; + eth->h_source[0] = cm->xmit_src_mac[0]; + eth->h_source[1] = cm->xmit_src_mac[1]; + eth->h_source[2] = cm->xmit_src_mac[2]; + } + } + + /* + * Update priority of skb. + */ + if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) { + skb->priority = cm->priority; + } + + /* + * Mark outgoing packet + */ + skb->mark = cm->connection->mark; + if (skb->mark) { + DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark); + } + + si->packets_forwarded++; + spin_unlock_bh(&si->lock); + + /* + * We're going to check for GSO flags when we transmit the packet so + * start fetching the necessary cache line now. + */ + prefetch(skb_shinfo(skb)); + + /* + * Mark that this packet has been fast forwarded. + */ + skb->fast_forwarded = 1; + + /* + * Send the packet on its way. + */ + dev_queue_xmit(skb); + + return 1; +} + +/* + * sfe_ipv6_recv_icmp() + * Handle ICMP packet receives. + * + * ICMP packets aren't handled as a "fast path" and always have us process them + * through the default Linux stack. What we do need to do is look for any errors + * about connections we are handling in the fast path. If we find any such + * connections then we want to flush their state so that the ICMP error path + * within Linux has all of the correct state should it need it. + */ +static int sfe_ipv6_recv_icmp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev, + unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl) +{ + struct icmp6hdr *icmph; + struct sfe_ipv6_ip_hdr *icmp_iph; + struct sfe_ipv6_udp_hdr *icmp_udph; + struct sfe_ipv6_tcp_hdr *icmp_tcph; + struct sfe_ipv6_addr *src_ip; + struct sfe_ipv6_addr *dest_ip; + __be16 src_port; + __be16 dest_port; + struct sfe_ipv6_connection_match *cm; + struct sfe_ipv6_connection *c; + u8 next_hdr; + + /* + * Is our packet too short to contain a valid ICMP header? + */ + len -= ihl; + if (!pskb_may_pull(skb, ihl + sizeof(struct icmp6hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("packet too short for ICMP header\n"); + return 0; + } + + /* + * We only handle "destination unreachable" and "time exceeded" messages. + */ + icmph = (struct icmp6hdr *)(skb->data + ihl); + if ((icmph->icmp6_type != ICMPV6_DEST_UNREACH) + && (icmph->icmp6_type != ICMPV6_TIME_EXCEED)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->icmp6_type); + return 0; + } + + /* + * Do we have the full embedded IP header? + * We should have 8 bytes of next L4 header - that's enough to identify + * the connection. + */ + len -= sizeof(struct icmp6hdr); + ihl += sizeof(struct icmp6hdr); + if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ip_hdr) + sizeof(struct sfe_ipv6_ext_hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Embedded IP header not complete\n"); + return 0; + } + + /* + * Is our embedded IP version wrong? + */ + icmp_iph = (struct sfe_ipv6_ip_hdr *)(icmph + 1); + if (unlikely(icmp_iph->version != 6)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("IP version: %u\n", icmp_iph->version); + return 0; + } + + len -= sizeof(struct sfe_ipv6_ip_hdr); + ihl += sizeof(struct sfe_ipv6_ip_hdr); + next_hdr = icmp_iph->nexthdr; + while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { + struct sfe_ipv6_ext_hdr *ext_hdr; + unsigned int ext_hdr_len; + + ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); + if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { + struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; + unsigned int frag_off = ntohs(frag_hdr->frag_off); + + if (frag_off & SFE_IPV6_FRAG_OFFSET) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("non-initial fragment\n"); + return 0; + } + } + + ext_hdr_len = ext_hdr->hdr_len; + ext_hdr_len <<= 3; + ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); + len -= ext_hdr_len; + ihl += ext_hdr_len; + /* + * We should have 8 bytes of next header - that's enough to identify + * the connection. + */ + if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("extension header %d not completed\n", next_hdr); + return 0; + } + + next_hdr = ext_hdr->next_hdr; + } + + /* + * Handle the embedded transport layer header. + */ + switch (next_hdr) { + case IPPROTO_UDP: + icmp_udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl); + src_port = icmp_udph->source; + dest_port = icmp_udph->dest; + break; + + case IPPROTO_TCP: + icmp_tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl); + src_port = icmp_tcph->source; + dest_port = icmp_tcph->dest; + break; + + default: + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", next_hdr); + return 0; + } + + src_ip = &icmp_iph->saddr; + dest_ip = &icmp_iph->daddr; + + spin_lock_bh(&si->lock); + + /* + * Look for a connection match. Note that we reverse the source and destination + * here because our embedded message contains a packet that was sent in the + * opposite direction to the one in which we just received it. It will have + * been sent on the interface from which we received it though so that's still + * ok to use. + */ + cm = sfe_ipv6_find_connection_match(si, dev, icmp_iph->nexthdr, dest_ip, dest_port, src_ip, src_port); + if (unlikely(!cm)) { + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("no connection found\n"); + return 0; + } + + /* + * We found a connection so now remove it from the connection list and flush + * its state. + */ + c = cm->connection; + sfe_ipv6_remove_connection(si, c); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH); + return 0; +} + +/* + * sfe_ipv6_recv() + * Handle packet receives and forwaring. + * + * Returns 1 if the packet is forwarded or 0 if it isn't. + */ +int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb) +{ + struct sfe_ipv6 *si = &__si6; + unsigned int len; + unsigned int payload_len; + unsigned int ihl = sizeof(struct sfe_ipv6_ip_hdr); + bool flush_on_find = false; + struct sfe_ipv6_ip_hdr *iph; + u8 next_hdr; + + /* + * Check that we have space for an IP header and an uplayer header here. + */ + len = skb->len; + if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("len: %u is too short\n", len); + return 0; + } + + /* + * Is our IP version wrong? + */ + iph = (struct sfe_ipv6_ip_hdr *)skb->data; + if (unlikely(iph->version != 6)) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_V6]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("IP version: %u\n", iph->version); + return 0; + } + + /* + * Does our datagram fit inside the skb? + */ + payload_len = ntohs(iph->payload_len); + if (unlikely(payload_len > (len - ihl))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("payload_len: %u, exceeds len: %u\n", payload_len, (len - sizeof(struct sfe_ipv6_ip_hdr))); + return 0; + } + + next_hdr = iph->nexthdr; + while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) { + struct sfe_ipv6_ext_hdr *ext_hdr; + unsigned int ext_hdr_len; + + ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl); + if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) { + struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr; + unsigned int frag_off = ntohs(frag_hdr->frag_off); + + if (frag_off & SFE_IPV6_FRAG_OFFSET) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("non-initial fragment\n"); + return 0; + } + } + + ext_hdr_len = ext_hdr->hdr_len; + ext_hdr_len <<= 3; + ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr); + ihl += ext_hdr_len; + if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) { + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("extension header %d not completed\n", next_hdr); + return 0; + } + + flush_on_find = true; + next_hdr = ext_hdr->next_hdr; + } + + if (IPPROTO_UDP == next_hdr) { + return sfe_ipv6_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find); + } + + if (IPPROTO_TCP == next_hdr) { + return sfe_ipv6_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find); + } + + if (IPPROTO_ICMPV6 == next_hdr) { + return sfe_ipv6_recv_icmp(si, skb, dev, len, iph, ihl); + } + + spin_lock_bh(&si->lock); + si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++; + si->packets_not_forwarded++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", next_hdr); + return 0; +} + +/* + * sfe_ipv6_update_tcp_state() + * update TCP window variables. + */ +static void +sfe_ipv6_update_tcp_state(struct sfe_ipv6_connection *c, + struct sfe_connection_create *sic) +{ + struct sfe_ipv6_connection_match *orig_cm; + struct sfe_ipv6_connection_match *repl_cm; + struct sfe_ipv6_tcp_connection_match *orig_tcp; + struct sfe_ipv6_tcp_connection_match *repl_tcp; + + orig_cm = c->original_match; + repl_cm = c->reply_match; + orig_tcp = &orig_cm->protocol_state.tcp; + repl_tcp = &repl_cm->protocol_state.tcp; + + /* update orig */ + if (orig_tcp->max_win < sic->src_td_max_window) { + orig_tcp->max_win = sic->src_td_max_window; + } + if ((s32)(orig_tcp->end - sic->src_td_end) < 0) { + orig_tcp->end = sic->src_td_end; + } + if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) { + orig_tcp->max_end = sic->src_td_max_end; + } + + /* update reply */ + if (repl_tcp->max_win < sic->dest_td_max_window) { + repl_tcp->max_win = sic->dest_td_max_window; + } + if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) { + repl_tcp->end = sic->dest_td_end; + } + if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) { + repl_tcp->max_end = sic->dest_td_max_end; + } + + /* update match flags */ + orig_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + repl_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { + orig_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + repl_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + } +} + +/* + * sfe_ipv6_update_protocol_state() + * update protocol specified state machine. + */ +static void +sfe_ipv6_update_protocol_state(struct sfe_ipv6_connection *c, + struct sfe_connection_create *sic) +{ + switch (sic->protocol) { + case IPPROTO_TCP: + sfe_ipv6_update_tcp_state(c, sic); + break; + } +} + +/* + * sfe_ipv6_update_rule() + * update forwarding rule after rule is created. + */ +void sfe_ipv6_update_rule(struct sfe_connection_create *sic) +{ + struct sfe_ipv6_connection *c; + struct sfe_ipv6 *si = &__si6; + + spin_lock_bh(&si->lock); + + c = sfe_ipv6_find_connection(si, + sic->protocol, + sic->src_ip.ip6, + sic->src_port, + sic->dest_ip.ip6, + sic->dest_port); + if (c != NULL) { + sfe_ipv6_update_protocol_state(c, sic); + } + + spin_unlock_bh(&si->lock); +} + +/* + * sfe_ipv6_create_rule() + * Create a forwarding rule. + */ +int sfe_ipv6_create_rule(struct sfe_connection_create *sic) +{ + struct sfe_ipv6 *si = &__si6; + struct sfe_ipv6_connection *c; + struct sfe_ipv6_connection_match *original_cm; + struct sfe_ipv6_connection_match *reply_cm; + struct net_device *dest_dev; + struct net_device *src_dev; + + dest_dev = sic->dest_dev; + src_dev = sic->src_dev; + + if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) || + (src_dev->reg_state != NETREG_REGISTERED))) { + return -EINVAL; + } + + spin_lock_bh(&si->lock); + si->connection_create_requests++; + + /* + * Check to see if there is already a flow that matches the rule we're + * trying to create. If there is then we can't create a new one. + */ + c = sfe_ipv6_find_connection(si, + sic->protocol, + sic->src_ip.ip6, + sic->src_port, + sic->dest_ip.ip6, + sic->dest_port); + if (c != NULL) { + si->connection_create_collisions++; + + /* + * If we already have the flow then it's likely that this + * request to create the connection rule contains more + * up-to-date information. Check and update accordingly. + */ + sfe_ipv6_update_protocol_state(c, sic); + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n" + " s: %s:%pM:%pI6:%u, d: %s:%pM:%pI6:%u\n", + sic->mark, sic->protocol, + sic->src_dev->name, sic->src_mac, sic->src_ip.ip6, ntohs(sic->src_port), + sic->dest_dev->name, sic->dest_mac, sic->dest_ip.ip6, ntohs(sic->dest_port)); + return -EADDRINUSE; + } + + /* + * Allocate the various connection tracking objects. + */ + c = (struct sfe_ipv6_connection *)kmalloc(sizeof(struct sfe_ipv6_connection), GFP_ATOMIC); + if (unlikely(!c)) { + spin_unlock_bh(&si->lock); + return -ENOMEM; + } + + original_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); + if (unlikely(!original_cm)) { + spin_unlock_bh(&si->lock); + kfree(c); + return -ENOMEM; + } + + reply_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC); + if (unlikely(!reply_cm)) { + spin_unlock_bh(&si->lock); + kfree(original_cm); + kfree(c); + return -ENOMEM; + } + + /* + * Fill in the "original" direction connection matching object. + * Note that the transmit MAC address is "dest_mac_xlate" because + * we always know both ends of a connection by their translated + * addresses and not their public addresses. + */ + original_cm->match_dev = src_dev; + original_cm->match_protocol = sic->protocol; + original_cm->match_src_ip[0] = sic->src_ip.ip6[0]; + original_cm->match_src_port = sic->src_port; + original_cm->match_dest_ip[0] = sic->dest_ip.ip6[0]; + original_cm->match_dest_port = sic->dest_port; + original_cm->xlate_src_ip[0] = sic->src_ip_xlate.ip6[0]; + original_cm->xlate_src_port = sic->src_port_xlate; + original_cm->xlate_dest_ip[0] = sic->dest_ip_xlate.ip6[0]; + original_cm->xlate_dest_port = sic->dest_port_xlate; + original_cm->rx_packet_count = 0; + original_cm->rx_packet_count64 = 0; + original_cm->rx_byte_count = 0; + original_cm->rx_byte_count64 = 0; + original_cm->xmit_dev = dest_dev; + original_cm->xmit_dev_mtu = sic->dest_mtu; + memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN); + memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN); + original_cm->connection = c; + original_cm->counter_match = reply_cm; + original_cm->flags = 0; + if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { + original_cm->priority = sic->src_priority; + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; + } + if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { + original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT; + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; + } +#ifdef CONFIG_NF_FLOW_COOKIE + original_cm->flow_cookie = 0; +#endif +#ifdef CONFIG_XFRM + original_cm->flow_accel = sic->original_accel; +#endif + original_cm->active_next = NULL; + original_cm->active_prev = NULL; + original_cm->active = false; + + /* + * For PPP links we don't write an L2 header. For everything else we do. + */ + if (!(dest_dev->flags & IFF_POINTOPOINT)) { + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; + + /* + * If our dev writes Ethernet headers then we can write a really fast + * version. + */ + if (dest_dev->header_ops) { + if (dest_dev->header_ops->create == eth_header) { + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; + } + } + } + + /* + * Fill in the "reply" direction connection matching object. + */ + reply_cm->match_dev = dest_dev; + reply_cm->match_protocol = sic->protocol; + reply_cm->match_src_ip[0] = sic->dest_ip_xlate.ip6[0]; + reply_cm->match_src_port = sic->dest_port_xlate; + reply_cm->match_dest_ip[0] = sic->src_ip_xlate.ip6[0]; + reply_cm->match_dest_port = sic->src_port_xlate; + reply_cm->xlate_src_ip[0] = sic->dest_ip.ip6[0]; + reply_cm->xlate_src_port = sic->dest_port; + reply_cm->xlate_dest_ip[0] = sic->src_ip.ip6[0]; + reply_cm->xlate_dest_port = sic->src_port; + reply_cm->rx_packet_count = 0; + reply_cm->rx_packet_count64 = 0; + reply_cm->rx_byte_count = 0; + reply_cm->rx_byte_count64 = 0; + reply_cm->xmit_dev = src_dev; + reply_cm->xmit_dev_mtu = sic->src_mtu; + memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN); + memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN); + reply_cm->connection = c; + reply_cm->counter_match = original_cm; + reply_cm->flags = 0; + if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) { + reply_cm->priority = sic->dest_priority; + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK; + } + if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) { + reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT; + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK; + } +#ifdef CONFIG_NF_FLOW_COOKIE + reply_cm->flow_cookie = 0; +#endif +#ifdef CONFIG_XFRM + reply_cm->flow_accel = sic->reply_accel; +#endif + reply_cm->active_next = NULL; + reply_cm->active_prev = NULL; + reply_cm->active = false; + + /* + * For PPP links we don't write an L2 header. For everything else we do. + */ + if (!(src_dev->flags & IFF_POINTOPOINT)) { + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR; + + /* + * If our dev writes Ethernet headers then we can write a really fast + * version. + */ + if (src_dev->header_ops) { + if (src_dev->header_ops->create == eth_header) { + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR; + } + } + } + + + if (!sfe_ipv6_addr_equal(sic->dest_ip.ip6, sic->dest_ip_xlate.ip6) || sic->dest_port != sic->dest_port_xlate) { + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; + } + + if (!sfe_ipv6_addr_equal(sic->src_ip.ip6, sic->src_ip_xlate.ip6) || sic->src_port != sic->src_port_xlate) { + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC; + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST; + } + + c->protocol = sic->protocol; + c->src_ip[0] = sic->src_ip.ip6[0]; + c->src_ip_xlate[0] = sic->src_ip_xlate.ip6[0]; + c->src_port = sic->src_port; + c->src_port_xlate = sic->src_port_xlate; + c->original_dev = src_dev; + c->original_match = original_cm; + c->dest_ip[0] = sic->dest_ip.ip6[0]; + c->dest_ip_xlate[0] = sic->dest_ip_xlate.ip6[0]; + c->dest_port = sic->dest_port; + c->dest_port_xlate = sic->dest_port_xlate; + c->reply_dev = dest_dev; + c->reply_match = reply_cm; + c->mark = sic->mark; + c->debug_read_seq = 0; + c->last_sync_jiffies = get_jiffies_64(); + + /* + * Take hold of our source and dest devices for the duration of the connection. + */ + dev_hold(c->original_dev); + dev_hold(c->reply_dev); + + /* + * Initialize the protocol-specific information that we track. + */ + switch (sic->protocol) { + case IPPROTO_TCP: + original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale; + original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1; + original_cm->protocol_state.tcp.end = sic->src_td_end; + original_cm->protocol_state.tcp.max_end = sic->src_td_max_end; + reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale; + reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1; + reply_cm->protocol_state.tcp.end = sic->dest_td_end; + reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end; + if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) { + original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK; + } + break; + } + + sfe_ipv6_connection_match_compute_translations(original_cm); + sfe_ipv6_connection_match_compute_translations(reply_cm); + sfe_ipv6_insert_connection(si, c); + + spin_unlock_bh(&si->lock); + + /* + * We have everything we need! + */ + DEBUG_INFO("new connection - mark: %08x, p: %d\n" + " s: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n" + " d: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n", + sic->mark, sic->protocol, + sic->src_dev->name, sic->src_mac, sic->src_mac_xlate, + sic->src_ip.ip6, sic->src_ip_xlate.ip6, ntohs(sic->src_port), ntohs(sic->src_port_xlate), + dest_dev->name, sic->dest_mac, sic->dest_mac_xlate, + sic->dest_ip.ip6, sic->dest_ip_xlate.ip6, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate)); + + return 0; +} + +/* + * sfe_ipv6_destroy_rule() + * Destroy a forwarding rule. + */ +void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid) +{ + struct sfe_ipv6 *si = &__si6; + struct sfe_ipv6_connection *c; + + spin_lock_bh(&si->lock); + si->connection_destroy_requests++; + + /* + * Check to see if we have a flow that matches the rule we're trying + * to destroy. If there isn't then we can't destroy it. + */ + c = sfe_ipv6_find_connection(si, sid->protocol, sid->src_ip.ip6, sid->src_port, + sid->dest_ip.ip6, sid->dest_port); + if (!c) { + si->connection_destroy_misses++; + spin_unlock_bh(&si->lock); + + DEBUG_TRACE("connection does not exist - p: %d, s: %pI6:%u, d: %pI6:%u\n", + sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), + sid->dest_ip.ip6, ntohs(sid->dest_port)); + return; + } + + /* + * Remove our connection details from the hash tables. + */ + sfe_ipv6_remove_connection(si, c); + spin_unlock_bh(&si->lock); + + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); + + DEBUG_INFO("connection destroyed - p: %d, s: %pI6:%u, d: %pI6:%u\n", + sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port), + sid->dest_ip.ip6, ntohs(sid->dest_port)); +} + +/* + * sfe_ipv6_register_sync_rule_callback() + * Register a callback for rule synchronization. + */ +void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback) +{ + struct sfe_ipv6 *si = &__si6; + + spin_lock_bh(&si->lock); + rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback); + spin_unlock_bh(&si->lock); +} + +/* + * sfe_ipv6_get_debug_dev() + */ +static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct sfe_ipv6 *si = &__si6; + ssize_t count; + int num; + + spin_lock_bh(&si->lock); + num = si->debug_dev; + spin_unlock_bh(&si->lock); + + count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num); + return count; +} + +/* + * sfe_ipv6_destroy_all_rules_for_dev() + * Destroy all connections that match a particular device. + * + * If we pass dev as NULL then this destroys all connections. + */ +void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev) +{ + struct sfe_ipv6 *si = &__si6; + struct sfe_ipv6_connection *c; + +another_round: + spin_lock_bh(&si->lock); + + for (c = si->all_connections_head; c; c = c->all_connections_next) { + /* + * Does this connection relate to the device we are destroying? + */ + if (!dev + || (dev == c->original_dev) + || (dev == c->reply_dev)) { + break; + } + } + + if (c) { + sfe_ipv6_remove_connection(si, c); + } + + spin_unlock_bh(&si->lock); + + if (c) { + sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY); + goto another_round; + } +} + +/* + * sfe_ipv6_periodic_sync() + */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) +static void sfe_ipv6_periodic_sync(struct timer_list *arg) +#else +static void sfe_ipv6_periodic_sync(unsigned long arg) +#endif /*KERNEL_VERSION(4, 15, 0)*/ +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) + struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg->cust_data; +#else + struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg; +#endif /*KERNEL_VERSION(4, 15, 0)*/ + u64 now_jiffies; + int quota; + sfe_sync_rule_callback_t sync_rule_callback; + + now_jiffies = get_jiffies_64(); + + rcu_read_lock(); + sync_rule_callback = rcu_dereference(si->sync_rule_callback); + if (!sync_rule_callback) { + rcu_read_unlock(); + goto done; + } + + spin_lock_bh(&si->lock); + sfe_ipv6_update_summary_stats(si); + + /* + * Get an estimate of the number of connections to parse in this sync. + */ + quota = (si->num_connections + 63) / 64; + + /* + * Walk the "active" list and sync the connection state. + */ + while (quota--) { + struct sfe_ipv6_connection_match *cm; + struct sfe_ipv6_connection_match *counter_cm; + struct sfe_ipv6_connection *c; + struct sfe_connection_sync sis; + + cm = si->active_head; + if (!cm) { + break; + } + + /* + * There's a possibility that our counter match is in the active list too. + * If it is then remove it. + */ + counter_cm = cm->counter_match; + if (counter_cm->active) { + counter_cm->active = false; + + /* + * We must have a connection preceding this counter match + * because that's the one that got us to this point, so we don't have + * to worry about removing the head of the list. + */ + counter_cm->active_prev->active_next = counter_cm->active_next; + + if (likely(counter_cm->active_next)) { + counter_cm->active_next->active_prev = counter_cm->active_prev; + } else { + si->active_tail = counter_cm->active_prev; + } + + counter_cm->active_next = NULL; + counter_cm->active_prev = NULL; + } + + /* + * Now remove the head of the active scan list. + */ + cm->active = false; + si->active_head = cm->active_next; + if (likely(cm->active_next)) { + cm->active_next->active_prev = NULL; + } else { + si->active_tail = NULL; + } + cm->active_next = NULL; + + /* + * Sync the connection state. + */ + c = cm->connection; + sfe_ipv6_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies); + + /* + * We don't want to be holding the lock when we sync! + */ + spin_unlock_bh(&si->lock); + sync_rule_callback(&sis); + spin_lock_bh(&si->lock); + } + + spin_unlock_bh(&si->lock); + rcu_read_unlock(); + +done: + mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); +} + +/* + * sfe_ipv6_debug_dev_read_start() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + si->debug_read_seq++; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_connections_start() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_connections_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_connections_connection() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + struct sfe_ipv6_connection *c; + struct sfe_ipv6_connection_match *original_cm; + struct sfe_ipv6_connection_match *reply_cm; + int bytes_read; + int protocol; + struct net_device *src_dev; + struct sfe_ipv6_addr src_ip; + struct sfe_ipv6_addr src_ip_xlate; + __be16 src_port; + __be16 src_port_xlate; + u64 src_rx_packets; + u64 src_rx_bytes; + struct net_device *dest_dev; + struct sfe_ipv6_addr dest_ip; + struct sfe_ipv6_addr dest_ip_xlate; + __be16 dest_port; + __be16 dest_port_xlate; + u64 dest_rx_packets; + u64 dest_rx_bytes; + u64 last_sync_jiffies; + u32 mark, src_priority, dest_priority, src_dscp, dest_dscp; +#ifdef CONFIG_NF_FLOW_COOKIE + int src_flow_cookie, dst_flow_cookie; +#endif + + spin_lock_bh(&si->lock); + + for (c = si->all_connections_head; c; c = c->all_connections_next) { + if (c->debug_read_seq < si->debug_read_seq) { + c->debug_read_seq = si->debug_read_seq; + break; + } + } + + /* + * If there were no connections then move to the next state. + */ + if (!c) { + spin_unlock_bh(&si->lock); + ws->state++; + return true; + } + + original_cm = c->original_match; + reply_cm = c->reply_match; + + protocol = c->protocol; + src_dev = c->original_dev; + src_ip = c->src_ip[0]; + src_ip_xlate = c->src_ip_xlate[0]; + src_port = c->src_port; + src_port_xlate = c->src_port_xlate; + src_priority = original_cm->priority; + src_dscp = original_cm->dscp >> SFE_IPV6_DSCP_SHIFT; + + sfe_ipv6_connection_match_update_summary_stats(original_cm); + sfe_ipv6_connection_match_update_summary_stats(reply_cm); + + src_rx_packets = original_cm->rx_packet_count64; + src_rx_bytes = original_cm->rx_byte_count64; + dest_dev = c->reply_dev; + dest_ip = c->dest_ip[0]; + dest_ip_xlate = c->dest_ip_xlate[0]; + dest_port = c->dest_port; + dest_port_xlate = c->dest_port_xlate; + dest_priority = reply_cm->priority; + dest_dscp = reply_cm->dscp >> SFE_IPV6_DSCP_SHIFT; + dest_rx_packets = reply_cm->rx_packet_count64; + dest_rx_bytes = reply_cm->rx_byte_count64; + last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies; + mark = c->mark; +#ifdef CONFIG_NF_FLOW_COOKIE + src_flow_cookie = original_cm->flow_cookie; + dst_flow_cookie = reply_cm->flow_cookie; +#endif + spin_unlock_bh(&si->lock); + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t\n", + protocol, + src_dev->name, + &src_ip, &src_ip_xlate, + ntohs(src_port), ntohs(src_port_xlate), + src_priority, src_dscp, + src_rx_packets, src_rx_bytes, + dest_dev->name, + &dest_ip, &dest_ip_xlate, + ntohs(dest_port), ntohs(dest_port_xlate), + dest_priority, dest_dscp, + dest_rx_packets, dest_rx_bytes, +#ifdef CONFIG_NF_FLOW_COOKIE + src_flow_cookie, dst_flow_cookie, +#endif + last_sync_jiffies, mark); + + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + return true; +} + +/* + * sfe_ipv6_debug_dev_read_connections_end() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_connections_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_exceptions_start() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_exceptions_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_exceptions_exception() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_exceptions_exception(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + u64 ct; + + spin_lock_bh(&si->lock); + ct = si->exception_events64[ws->iter_exception]; + spin_unlock_bh(&si->lock); + + if (ct) { + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, + "\t\t\n", + sfe_ipv6_exception_events_string[ws->iter_exception], + ct); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + } + + ws->iter_exception++; + if (ws->iter_exception >= SFE_IPV6_EXCEPTION_EVENT_LAST) { + ws->iter_exception = 0; + ws->state++; + } + + return true; +} + +/* + * sfe_ipv6_debug_dev_read_exceptions_end() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_exceptions_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_stats() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_stats(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + unsigned int num_connections; + u64 packets_forwarded; + u64 packets_not_forwarded; + u64 connection_create_requests; + u64 connection_create_collisions; + u64 connection_destroy_requests; + u64 connection_destroy_misses; + u64 connection_flushes; + u64 connection_match_hash_hits; + u64 connection_match_hash_reorders; + + spin_lock_bh(&si->lock); + sfe_ipv6_update_summary_stats(si); + + num_connections = si->num_connections; + packets_forwarded = si->packets_forwarded64; + packets_not_forwarded = si->packets_not_forwarded64; + connection_create_requests = si->connection_create_requests64; + connection_create_collisions = si->connection_create_collisions64; + connection_destroy_requests = si->connection_destroy_requests64; + connection_destroy_misses = si->connection_destroy_misses64; + connection_flushes = si->connection_flushes64; + connection_match_hash_hits = si->connection_match_hash_hits64; + connection_match_hash_reorders = si->connection_match_hash_reorders64; + spin_unlock_bh(&si->lock); + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\n", + num_connections, + packets_forwarded, + packets_not_forwarded, + connection_create_requests, + connection_create_collisions, + connection_destroy_requests, + connection_destroy_misses, + connection_flushes, + connection_match_hash_hits, + connection_match_hash_reorders); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * sfe_ipv6_debug_dev_read_end() + * Generate part of the XML output. + */ +static bool sfe_ipv6_debug_dev_read_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length, + int *total_read, struct sfe_ipv6_debug_xml_write_state *ws) +{ + int bytes_read; + + bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\n"); + if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) { + return false; + } + + *length -= bytes_read; + *total_read += bytes_read; + + ws->state++; + return true; +} + +/* + * Array of write functions that write various XML elements that correspond to + * our XML output state machine. + */ +static sfe_ipv6_debug_xml_write_method_t sfe_ipv6_debug_xml_write_methods[SFE_IPV6_DEBUG_XML_STATE_DONE] = { + sfe_ipv6_debug_dev_read_start, + sfe_ipv6_debug_dev_read_connections_start, + sfe_ipv6_debug_dev_read_connections_connection, + sfe_ipv6_debug_dev_read_connections_end, + sfe_ipv6_debug_dev_read_exceptions_start, + sfe_ipv6_debug_dev_read_exceptions_exception, + sfe_ipv6_debug_dev_read_exceptions_end, + sfe_ipv6_debug_dev_read_stats, + sfe_ipv6_debug_dev_read_end, +}; + +/* + * sfe_ipv6_debug_dev_read() + * Send info to userspace upon read request from user + */ +static ssize_t sfe_ipv6_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset) +{ + char msg[CHAR_DEV_MSG_SIZE]; + int total_read = 0; + struct sfe_ipv6_debug_xml_write_state *ws; + struct sfe_ipv6 *si = &__si6; + + ws = (struct sfe_ipv6_debug_xml_write_state *)filp->private_data; + while ((ws->state != SFE_IPV6_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) { + if ((sfe_ipv6_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) { + continue; + } + } + + return total_read; +} + +/* + * sfe_ipv6_debug_dev_write() + * Write to char device resets some stats + */ +static ssize_t sfe_ipv6_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset) +{ + struct sfe_ipv6 *si = &__si6; + + spin_lock_bh(&si->lock); + sfe_ipv6_update_summary_stats(si); + + si->packets_forwarded64 = 0; + si->packets_not_forwarded64 = 0; + si->connection_create_requests64 = 0; + si->connection_create_collisions64 = 0; + si->connection_destroy_requests64 = 0; + si->connection_destroy_misses64 = 0; + si->connection_flushes64 = 0; + si->connection_match_hash_hits64 = 0; + si->connection_match_hash_reorders64 = 0; + spin_unlock_bh(&si->lock); + + return length; +} + +/* + * sfe_ipv6_debug_dev_open() + */ +static int sfe_ipv6_debug_dev_open(struct inode *inode, struct file *file) +{ + struct sfe_ipv6_debug_xml_write_state *ws; + + ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; + if (ws) { + return 0; + } + + ws = kzalloc(sizeof(struct sfe_ipv6_debug_xml_write_state), GFP_KERNEL); + if (!ws) { + return -ENOMEM; + } + + ws->state = SFE_IPV6_DEBUG_XML_STATE_START; + file->private_data = ws; + + return 0; +} + +/* + * sfe_ipv6_debug_dev_release() + */ +static int sfe_ipv6_debug_dev_release(struct inode *inode, struct file *file) +{ + struct sfe_ipv6_debug_xml_write_state *ws; + + ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data; + if (ws) { + /* + * We've finished with our output so free the write state. + */ + kfree(ws); + } + + return 0; +} + +/* + * File operations used in the debug char device + */ +static struct file_operations sfe_ipv6_debug_dev_fops = { + .read = sfe_ipv6_debug_dev_read, + .write = sfe_ipv6_debug_dev_write, + .open = sfe_ipv6_debug_dev_open, + .release = sfe_ipv6_debug_dev_release +}; + +#ifdef CONFIG_NF_FLOW_COOKIE +/* + * sfe_ipv6_register_flow_cookie_cb + * register a function in SFE to let SFE use this function to configure flow cookie for a flow + * + * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE + * can use this function to configure flow cookie for a flow. + * return: 0, success; !=0, fail + */ +int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) +{ + struct sfe_ipv6 *si = &__si6; + + BUG_ON(!cb); + + if (si->flow_cookie_set_func) { + return -1; + } + + rcu_assign_pointer(si->flow_cookie_set_func, cb); + return 0; +} + +/* + * sfe_ipv6_unregister_flow_cookie_cb + * unregister function which is used to configure flow cookie for a flow + * + * return: 0, success; !=0, fail + */ +int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb) +{ + struct sfe_ipv6 *si = &__si6; + + RCU_INIT_POINTER(si->flow_cookie_set_func, NULL); + return 0; +} + +/* + * sfe_ipv6_get_flow_cookie() + */ +static ssize_t sfe_ipv6_get_flow_cookie(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct sfe_ipv6 *si = &__si6; + return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable); +} + +/* + * sfe_ipv6_set_flow_cookie() + */ +static ssize_t sfe_ipv6_set_flow_cookie(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct sfe_ipv6 *si = &__si6; + strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable); + + return size; +} + +/* + * sysfs attributes. + */ +static const struct device_attribute sfe_ipv6_flow_cookie_attr = + __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv6_get_flow_cookie, sfe_ipv6_set_flow_cookie); +#endif /*CONFIG_NF_FLOW_COOKIE*/ + +/* + * sfe_ipv6_init() + */ +static int __init sfe_ipv6_init(void) +{ + struct sfe_ipv6 *si = &__si6; + int result = -1; + + DEBUG_INFO("SFE IPv6 init\n"); + + /* + * Create sys/sfe_ipv6 + */ + si->sys_sfe_ipv6 = kobject_create_and_add("sfe_ipv6", NULL); + if (!si->sys_sfe_ipv6) { + DEBUG_ERROR("failed to register sfe_ipv6\n"); + goto exit1; + } + + /* + * Create files, one for each parameter supported by this module. + */ + result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); + if (result) { + DEBUG_ERROR("failed to register debug dev file: %d\n", result); + goto exit2; + } + +#ifdef CONFIG_NF_FLOW_COOKIE + result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); + if (result) { + DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result); + goto exit3; + } +#endif /* CONFIG_NF_FLOW_COOKIE */ + + /* + * Register our debug char device. + */ + result = register_chrdev(0, "sfe_ipv6", &sfe_ipv6_debug_dev_fops); + if (result < 0) { + DEBUG_ERROR("Failed to register chrdev: %d\n", result); + goto exit4; + } + + si->debug_dev = result; + + /* + * Create a timer to handle periodic statistics. + */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) + timer_setup(&si->timer, sfe_ipv6_periodic_sync, 0); + si->timer.cust_data = (unsigned long)si; +#else + setup_timer(&si->timer, sfe_ipv6_periodic_sync, (unsigned long)si); +#endif /*KERNEL_VERSION(4, 15, 0)*/ + mod_timer(&si->timer, jiffies + ((HZ + 99) / 100)); + + spin_lock_init(&si->lock); + + return 0; + +exit4: +#ifdef CONFIG_NF_FLOW_COOKIE + sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); + +exit3: +#endif /* CONFIG_NF_FLOW_COOKIE */ + sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); + +exit2: + kobject_put(si->sys_sfe_ipv6); + +exit1: + return result; +} + +/* + * sfe_ipv6_exit() + */ +static void __exit sfe_ipv6_exit(void) +{ + struct sfe_ipv6 *si = &__si6; + + DEBUG_INFO("SFE IPv6 exit\n"); + + /* + * Destroy all connections. + */ + sfe_ipv6_destroy_all_rules_for_dev(NULL); + + del_timer_sync(&si->timer); + + unregister_chrdev(si->debug_dev, "sfe_ipv6"); + +#ifdef CONFIG_NF_FLOW_COOKIE + sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr); +#endif /* CONFIG_NF_FLOW_COOKIE */ + sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr); + + kobject_put(si->sys_sfe_ipv6); +} + +module_init(sfe_ipv6_init) +module_exit(sfe_ipv6_exit) + +EXPORT_SYMBOL(sfe_ipv6_recv); +EXPORT_SYMBOL(sfe_ipv6_create_rule); +EXPORT_SYMBOL(sfe_ipv6_destroy_rule); +EXPORT_SYMBOL(sfe_ipv6_destroy_all_rules_for_dev); +EXPORT_SYMBOL(sfe_ipv6_register_sync_rule_callback); +EXPORT_SYMBOL(sfe_ipv6_mark_rule); +EXPORT_SYMBOL(sfe_ipv6_update_rule); +#ifdef CONFIG_NF_FLOW_COOKIE +EXPORT_SYMBOL(sfe_ipv6_register_flow_cookie_cb); +EXPORT_SYMBOL(sfe_ipv6_unregister_flow_cookie_cb); +#endif + +MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv6 support"); +MODULE_LICENSE("Dual BSD/GPL"); +