/**************************************************************************\
*//*! \file ef_bend_fwd.c Forwarding packets from back end to front end driver

Copyright 2006 Solarflare Communications Inc,
               9501 Jeronimo Road, Suite 250,
               Irvine, CA 92618, USA

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation, incorporated herein by reference.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*//*
\**************************************************************************/

#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <asm/io.h>
#include "ci/driver/virtual/ef_hyperops.h"
#include "ci/driver/efab/open.h"
#include "ef_bend.h"
#include "ef_char_bend.h"
#include "ef_bend_fwd.h"
#include "ef_bend_vnic.h"
#include "ef_filter.h"
#include "ef_mcast.h"
#include "ef_iputil.h"
#include "ci/xen/ef_cuckoo_hash.h"

typedef struct fwd_list_s {
#define LIST_END -1
  ci_int32 head;
  ci_int32 tail;
} fwd_list_t;

/* State stored in the forward table */
struct fwd_struct {
  ci_int32 next; /* Forms list */
  void * context;
  __u8 valid;
  __u8 mac[ETH_ALEN];
};

/* Max value we support */
#define NUM_FWDS_BITS 8
#define NUM_FWDS (1 << NUM_FWDS_BITS)
#define FWD_MASK (NUM_FWDS - 1)

/* Hash table to store the fwd_structs */
static ef_cuckoo_hash_table fwd_hash_table;
/* The array of fwd_structs */
static struct fwd_struct *fwd_array;
/* Linked list of entries in use. */
static fwd_list_t fwd_list;
/* Could do something clever with a reader/writer lock. */
static spinlock_t fwd_lock;

static inline int fwd_index(struct fwd_struct *fwd)
{
  return fwd - fwd_array;
}

/* Look up a MAC in the hash table. Caller should hold table lock. */
struct fwd_struct *fwd_find_entry(const __u8 *mac)
{
  ef_cuckoo_hash_value value;

  if(ef_cuckoo_hash_lookup(&fwd_hash_table, mac, &value)){
    struct fwd_struct *fwd = &fwd_array[value];
    ci_assert(memcmp(fwd->mac, mac, ETH_ALEN) == 0);
    return fwd;
  }

  return NULL;
}


inline int fwd_list_is_empty(fwd_list_t *flist)
{
  return flist->head == LIST_END;
}


inline int fwd_list_not_empty(fwd_list_t *flist)
{
  return flist->head != LIST_END;
}


/* Add a fwd_struct to a fwd_list.  It must not currently be in any
   lists */
inline void fwd_list_enqueue(fwd_list_t *flist, struct fwd_struct *fwd)
{
  ci_assert_equal(fwd->next, LIST_END);
  ci_assert(fwd->valid);
  ci_assert_ge(fwd_index(fwd), 0);
  ci_assert_lt(fwd_index(fwd), NUM_FWDS);

  ci_log("adding fwd %d to list", fwd_index(fwd));

  if(fwd_list_is_empty(flist))
    flist->head = fwd_index(fwd);
  else
    fwd_array[flist->tail].next = fwd_index(fwd);
  fwd->next = LIST_END;
  flist->tail = fwd_index(fwd);
}


/* Remove a fwd_struct from the fwd_list.  It must be present in the
   list.
   
   Could do a more efficient version of this that will only dequeue
   the head of the list, but at the moment we have no use for that.
   Implement if necessary, see ci_async_op_queue_dequeue_head() for an
   example  */
inline void fwd_list_dequeue(fwd_list_t *flist, struct fwd_struct *fwd)
{
  ci_int32 id = fwd_index(fwd), prev_id = LIST_END;
  ci_int32 *p_next;

  ci_assert(fwd_list_not_empty(flist));
  ci_assert_ge(id, 0);
  ci_assert_lt(id, NUM_FWDS);
  ci_assert_impl(fwd->next == LIST_END, id == flist->tail);

  ci_log("removing fwd %d from list", fwd_index(fwd));

  /* Search through the list, maintaining a pointer to the previous
     "next" value.  Stop if we find one that points to the entry we're
     removing.  Assert that we don't reach the end of the list without
     finding it */
  for(p_next = &flist->head; *p_next != id; 
      p_next = &(fwd_array[*p_next].next)){
    ci_assert_ge(*p_next, 0);
    prev_id = *p_next;
  }

  /* Remove this entry from the list */
  *p_next = fwd->next;
  if(id == flist->tail)
    flist->tail = prev_id;

  CI_DEBUG(fwd->next = LIST_END);
}


/* TODO would be nice to be able to do this without needing the lock */
#define for_each_vnic( _bend, _code)                    \
  do {                                                  \
    struct ef_bend * _bend;                             \
    unsigned _i, _flags;                                \
    spin_lock_irqsave(&fwd_lock, _flags);               \
    _i = fwd_list.head;                                 \
    while(_i != LIST_END) {                             \
      struct fwd_struct *_fwd;                          \
      BEND_VERB(ci_log("doing thing to fwd %d", _i));   \
      _fwd = &fwd_array[_i];                            \
      ci_assert(_fwd->valid);                           \
      _bend = _fwd->context;                            \
      if(_bend != NULL) _code;                          \
      _i = _fwd->next;                                  \
    }                                                   \
    spin_unlock_irqrestore(&fwd_lock, _flags);          \
  } while(0)


/* Update the vnic's mtu */
static void bend_set_vnic_mtu(struct ef_bend *bend, int new_mtu)
{
  /* This could overwrite any currently in-progress MTU
     update. Assuming we have atomic integer writes, should at least
     be consistent */
  bend->shared_page->mtu = new_mtu;
  if(!ci_bit_test_and_set(&bend->shared_page->aflags, MSG_AFLAGS_MTUCHANGE_B))
    ef_hyperop_remote_irq(bend->channel);
  else
    ci_log("mtu change already in progress, not signalling");
}

/* Called from netdriver when MTU changed */
void ef_bend_mtu_changed(int new_mtu)
{
  ci_log("MTU changed to %d.", new_mtu);

  /* Propogate this info to all the vnics */
  for_each_vnic(bend, bend_set_vnic_mtu(bend, new_mtu));
}

/* Update the vnic's link status */
static void bend_set_vnic_linkstate(struct ef_bend *bend, int link_up)
{
  /* This could overwrite any currently in-progress link state
     change. Assuming we have atomic integer writes, should at least
     be consistent */
  bend->shared_page->link_state = link_up;
  if(!ci_bit_test_and_set(&bend->shared_page->aflags, MSG_AFLAGS_LINKSTATE_B))
    ef_hyperop_remote_irq(bend->channel);
  else
    ci_log("link state change already in progress, not signalling");
}

/* Called from netdriver when link state changes */
void ef_bend_link_change(int link_up )
{
  /* Propogate this info to all the vnics */
  for_each_vnic(bend, bend_set_vnic_linkstate(bend, link_up));
}

/* Request that a packet is forward from back end to front end driver.
   This is internally protected by a spin lock, and makes multiple
   accesses to the fifo, but should leave it consistent despite
   multiple concurrent accesses to this function.  However, other
   uses of the fifo may not be protected by the same lock */
int forward_to_vnic(struct ef_bend *bend, struct sk_buff *skb, int have_lock )
{
  unsigned space, flags = 0;
  char *dest;
  /* The MAC header has been stripped by the time we get here. Put it back for
   * the benefit if the recipient domain */
  skb_push(skb, ETH_HLEN);

  BEND_VERB(ci_log("Forward pkt to domain %d (%d bytes)", 
            bend->far_end, skb->len));

  /* Take lock before fiddling with fifo.  This is necessary as
     although access from driverlink is already serialised, the slow
     path can call into here too */
  if (!have_lock)
    spin_lock_irqsave(&fwd_lock, flags);

  /* Check for space in the fifo */
  if (sh_fifo2_space(&bend->rcv_pkts) < skb->len + 2) {
    ci_log("No FIFO space. Have %d, need %d", 
           sh_fifo2_space(&bend->rcv_pkts), skb->len);
    /* Re-strip the mac header before giving up */
    skb_pull(skb, ETH_HLEN);
    if (!have_lock)
      spin_unlock_irqrestore(&fwd_lock, flags);
    return -ENOMEM;
  }
  /* Tell it how much - fifo is byte access so do in two chunks */
  sh_fifo2_put(&bend->rcv_pkts, skb->len & 0xff);
  sh_fifo2_put(&bend->rcv_pkts, (skb->len >> 8) & 0xff);

  space = sh_fifo2_contig_space(&bend->rcv_pkts);
  /* Virtual ring buffer means we can copy in one chunk even if that
     extends over the end of the fifo as we map the first page twice
     (once at start, once just after the end).  Only works if the
     amount over the end is less than a page in size */
  ci_assert(space > skb->len || skb->len - space < PAGE_SIZE);
  /* Take the address to copy to while we hold the lock */
  dest = sh_fifo2_pokep(&bend->rcv_pkts);
  /* Copy the data into the queue - would be nice to do this without
     the lock. */
  memcpy(dest, skb->data, skb->len);

  /* Advance the write pointer to the end of the packet */
  sh_fifo2_wr_adv(&bend->rcv_pkts, skb->len);

  /* Release the lock - we're done fiddling with the fifo now */
  if (!have_lock)
    spin_unlock_irqrestore(&fwd_lock, flags);
  
  /* Tell the other end there's something waiting for it */
  ef_hyperop_remote_irq(bend->net_irq);

  /* Put the SKB back the way we found it. */
  skb_pull(skb, ETH_HLEN);

  return 0;
}


/* Returns whether or not we have a match in our forward table for the
   this skb */
static struct ef_bend *for_a_vnic(struct sk_buff *skb )
{
  struct fwd_struct *fwd;
  struct ef_bend *retval = NULL;
  unsigned flags;
  BEND_VERB(ci_log("Looking up mac " MAC_FMT, MAC_ARG(skb->mac.raw)));
  spin_lock_irqsave(&fwd_lock, flags);
  fwd = fwd_find_entry(skb->mac.raw);
  spin_unlock_irqrestore(&fwd_lock, flags);
  if (fwd != NULL)
    retval = fwd->context;
  BEND_VERB(ci_log(retval ? "Found it" : "Not found"));
  return retval;
}


/* Send to them all.  Used for broadcast */
static void replicate_to_vnics(struct sk_buff *skb )
{
  for_each_vnic(bend, forward_to_vnic(bend, skb, 1) );
}

/* Simple test to see if it's a broadcast packet */
static inline int packet_is_bcast(struct sk_buff *skb)
{
  return (skb->mac.raw[0] & skb->mac.raw[1] & skb->mac.raw[2] &
      skb->mac.raw[3] & skb->mac.raw[4] & skb->mac.raw[5]) == 0xff;
}

/* Simple test to see if it's a broadcast packet */
static inline int packet_is_mcast(struct sk_buff *skb)
{
  return skb->mac.raw[0] & 1;
}



/* Receive a packet and do something appropriate with it. Return true to
 * take exclusive ownership of the packet. */
int ef_bend_rx_packet(struct sk_buff *skb )
{
  struct ef_bend *bend;
  int rc = 0;
  BEND_VERB(ci_log("MAC: " MAC_FMT, MAC_ARG(skb->mac.raw)));
  /* Checking for bcast is cheaper so do that first */
  if (packet_is_bcast(skb)) {
    BEND_VERB(ci_log("Will replicate bcast"));
    replicate_to_vnics(skb);
  } else if (packet_is_mcast(skb)) {
    struct ef_bend **bends;
    struct ef_filter_spec fs;
    struct ef_filter_spec *pfs;
    int nbends;
    int i;
    /* We will happily forward any packet with the right ethernet mcast
    * but we can only add filters for IP ones */
    if (skb->protocol == htons(ETH_P_IP)) {
      pfs = &fs;
      ef_iphdr_to_filt(skb->data, pfs);
    } else {
      pfs = NULL;
    }
    bends = ef_mcast_process_pkt( skb->mac.raw, pfs, &nbends);
    for (i = 0; i < nbends; i++) {
      forward_to_vnic(bends[i], skb, 0);
    }
  } else{
    /* If we pass it off to a VNIC and it is unicast we should deny it */
    if ((bend = for_a_vnic(skb)) != NULL ) {
      /* It's for one of us, we're going to send it ourselves, so
         prevent Xen passing it up too */
      rc = 1;
      if (skb->protocol == htons(ETH_P_IP)) {
        struct ef_filter_spec fs;
        ef_iphdr_to_filt(skb->data, &fs);
        /* We don't need to hold onto the handle returned because fastpath unicast
         * filters never need explicit deletion (except when we tear the bend down),
         * they just hang around until something else evicts them. */
        (void)ef_bend_filter_check_add(bend, &fs);
      }
      forward_to_vnic(bend, skb, 0);
    }
  }
  return rc;
}


/* Initialise the forwarding table, including its lock, and register
   with driverlink */
int ef_bend_init_fwd()
{
  int rc = 0, i;
  spin_lock_init(&fwd_lock);
  fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS, GFP_KERNEL);
  if (fwd_array == NULL) {
    rc = -ENOMEM;
    goto out;
  }

  if((rc = ef_cuckoo_hash_init(&fwd_hash_table, NUM_FWDS_BITS)) != 0){
    kfree(fwd_array);
    goto out;
  }

  /* Initialise the new entries to have suitable "next" list
     entries */
  for(i = 0; i < NUM_FWDS; i++){
    fwd_array[i].next = LIST_END;
  }
  fwd_list.head = fwd_list.tail = LIST_END;
out:
  return rc;
}

void ef_bend_shutdown_fwd()
{
  /* At this point, all sources of packets have been shut down, so
   * no further callbacks will happen, and the forward table can go. */
  ef_cuckoo_hash_destroy(&fwd_hash_table);
  kfree(fwd_array);
  fwd_array = NULL;
}


/* Search the fwd_array for an unused entry */
static int find_free_fwd_struct(void){
  static int last_free_index = 0;
  int index = last_free_index;
  
  do{
    if(!fwd_array[index].valid){
      last_free_index = index;
      return index;
    }
    index++;
    if(index >= NUM_FWDS)
      index = 0;
  }while(index != last_free_index);

  return -ENOMEM;
}


/* Add an entry to the forwarding table.  Returns -ENOMEM if no
 * space. This may sleep, do not call in atomic. */
int ef_bend_fwd_add(const __u8 *mac, void *context)
{
  unsigned flags;
  struct fwd_struct *fwd;
  int rc = 0, index;
  spin_lock_irqsave(&fwd_lock, flags);
  BEND_VERB(ci_log("Adding mac " MAC_FMT, MAC_ARG(mac)));
  rc = find_free_fwd_struct();

  if(rc < 0){
    spin_unlock_irqrestore(&fwd_lock, flags);
    return rc;
  }

  index = rc;

  if((rc = ef_cuckoo_hash_add(&fwd_hash_table, mac, index,
                              1)) == 0){
    fwd = &fwd_array[index];
    fwd->valid = 1;
    fwd->context = context;
    memcpy(fwd->mac, mac, ETH_ALEN);
    fwd_list_enqueue(&fwd_list, fwd);
  }

  spin_unlock_irqrestore(&fwd_lock, flags);

  /* Tell the world about this mac address being local */
  ef_bend_for_each_bend(ef_bend_vnic_new_localmac, mac);

  return rc;
}

/* Set the context pointer for a hash table entry. */
int ef_bend_fwd_set_context(const __u8 *mac, void *context)
{
  struct fwd_struct *fwd;
  unsigned flags;
  int rc = -ENOENT;
  spin_lock_irqsave(&fwd_lock, flags);
  fwd = fwd_find_entry(mac);
  if (fwd != NULL) {
    fwd->context = context;
    rc = 0;
  }
  spin_unlock_irqrestore(&fwd_lock, flags);
  return rc;
}

/* remove an entry from the forwarding table. */
void ef_bend_fwd_remove(const __u8 *mac)
{
  struct fwd_struct *fwd;
  unsigned flags;
  spin_lock_irqsave(&fwd_lock, flags);
  BEND_VERB(ci_log("Removing mac " MAC_FMT, MAC_ARG(mac)));
  fwd = fwd_find_entry(mac);
  if (fwd != NULL) {
    fwd_list_dequeue(&fwd_list, fwd);
    fwd->valid = 0;
    ef_cuckoo_hash_remove(&fwd_hash_table, fwd->mac);
  }
  spin_unlock_irqrestore(&fwd_lock, flags);

  /* Tell the world about this mac address no longer being local */
  ef_bend_for_each_bend(ef_bend_vnic_old_localmac, mac);
}

/* Iterate the function provided over the forwarding table */
void ef_bend_for_each_bend(void (*callback)(struct ef_bend*, const void *),
                           const void *context)
{
  for_each_vnic(bend, callback(bend, context));
}
