/**************************************************************************\
*//*! \file ef_bend.c Xen back end driver

Copyright 2006 Solarflare Communications Inc,
               9501 Jeronimo Road, Suite 250,
               Irvine, CA 92618, USA

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License version 2 as published by the Free
Software Foundation, incorporated herein by reference.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*//*
\**************************************************************************/

#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/if_ether.h>
#include <asm/io.h>
#include <xen/xenbus.h>
#include <xen/evtchn.h>
#include "ci/xen/ef_xen_util.h"
#include "ci/xen/ef_msg_iface.h"
#include "ef_bend.h"
#include "ef_char_bend.h"
#include "ef_bend_fwd.h"
#include "ef_bend_vnic.h"
#include "ef_filter.h"
#include "ef_bend_netdev.h"
#include "ef_mcast.h"
#include "ef_bend_accel.h"

#define XEN_VM_NAMELEN (45)

static int init_etherfabric_bend(void);
static void cleanup_etherfabric_bend(void);

/* Values for the unsafe_unload parameter */
/* UU_RIGOROUS : cannot unload driver until all H/W resources are free,
 * cannot free hardware resources until domain they were passed to is gone. */
#define UU_RIGOROUS (0)
/* UU_GULLIBLE: cannot unload driver until all H/W resources are free, but we
 * believe the domU when it says it has let go of them and free them */
#define UU_GULLIBLE (1)
/* UU_WASTEFUL: we don't trust the domU to let go of resources, but we are
 * prepared to unload anyway and just leak the resources. */
#define UU_WASTEFUL (2)
static int unsafe_unload = UU_RIGOROUS;

/*  We distinguish between the DomU going away, in which case we free the
 * resources and destroy the backend, and simply disconnecting (e.g.
 * due to driver unload) in which case we hang on the backend and
 * resources and give them back to the domain if it tries to reconnect.
 * It isn't safe to give them to another domain since if the original
 * owner still exists we have no proof that it won't touch them again, but
 * giving them back to the same owner is safe. */

/* Module system hooks */
module_init(init_etherfabric_bend);
module_exit(cleanup_etherfabric_bend);
MODULE_LICENSE("GPL");
module_param(unsafe_unload, int, 0);

static int bend_probe(struct xenbus_device *dev,
		      const struct xenbus_device_id *id);
static int bend_resume(struct xenbus_device *dev);

static void bend_vnic_changed(struct xenbus_device *dev, 
                              XenbusState vnic_state);
static int bend_resume(struct xenbus_device *dev);
static int bend_schedule_remove(struct xenbus_device *dev);
static void bend_remove(void *context /* Not used */);

static void bend_domu_change(struct xenbus_watch *,
                              const char **vec, unsigned int len);

/* Global state for workqueue work lists and protection */ 
struct workqueue_workitem {
  struct workqueue_workitem *next;
  void *context;
};

struct workqueue_workitem *remove_work_head = NULL;
struct workqueue_workitem *remove_work_tail = NULL;
spinlock_t remove_work_lock;
struct work_struct remove_work;     /*!< Defer bend cleanup */

static struct xenbus_device_id bend_ids[] = {
        { "ef1" },
        { "" }
};

/* Function hooks for xen to call us */
static struct xenbus_driver bend_driver = {
        .name = "ef1",
        .owner = THIS_MODULE,
        .ids = bend_ids,
        .probe = bend_probe,
        .remove = bend_schedule_remove,
        .resume = bend_resume,
        .otherend_changed = bend_vnic_changed,
};

/* List of all the bends currently in existence. I think the 
 * Xen callbacks are serialised such that we don't need a lock
 * round this, but it may be a good place to look in the event
 * of mysterious crashes. */
struct ef_bend *bend_list = NULL;

static void link_bend(struct ef_bend *bend)
{
  bend->next_bend = bend_list;
  bend_list = bend;
}

static void unlink_bend(struct ef_bend *bend)
{
  struct ef_bend *tmp = bend_list;
  struct ef_bend *prev = NULL;
  while(tmp != NULL) {
    if (tmp == bend) {
      if (prev != NULL)
        prev->next_bend = bend->next_bend;
      else
        bend_list = bend->next_bend;
      break;
    }
    prev = tmp;
    tmp = tmp->next_bend;
  }
}

/* Called by linux when module initialised */
int init_etherfabric_bend()
{
  int rc; 
  ci_set_log_prefix("BEND:" );
  ci_log("So far so good...\n");

  /* Set up hash table, lock, and driverlink stuff to keep track of
     connections that we've forwarded */
  rc = ef_bend_init_fwd();

  if (rc == 0)
    rc = ef_mcast_init();

  if (rc == 0)
    rc = ef_bend_accel_init();

  if (rc == -ENOENT) {
    /* This is not an error that prevent module loading. */
    ci_log("No acceleration found.");
    rc = 0;
  }

  spin_lock_init(&remove_work_lock);
  INIT_WORK(&remove_work, bend_remove, NULL);

  if(rc == 0) 
    rc = xenbus_register_backend(&bend_driver);
  return rc;
}


/* Called by linux on module exit */
void cleanup_etherfabric_bend()
{
  ci_log("%s", __FUNCTION__);
  /* Make all the backend instances go away. This must be done before
   * we let go of any symbols we need to release the acceleration
   * resources.  This should stick a load of stuff on the work
   * queue */
  xenbus_unregister_driver(&bend_driver);
  ci_log("%s: Shutting down acceleration", __FUNCTION__);
  /* This must be done before we make the forwarding table go wawy. */
  ef_bend_accel_shutdown();

  /* Flush work queue to ensure all deferred tasks are done */
  ci_log("%s: Flushing work queue", __FUNCTION__);
  flush_scheduled_work();

  ci_log("%s: Calling ef_bend_shutdown_fwd", __FUNCTION__);
  ef_bend_shutdown_fwd();
  ef_mcast_shutdown();

  ci_log("%s: All done.", __FUNCTION__);
}


/* Back end driver's implementation of the xenbus_driver probe
   function */
int bend_probe(struct xenbus_device *dev,
               const struct xenbus_device_id *id)
{
  ci_log("Probe passed device %s\n", dev->nodename);
  xenbus_switch_state(dev, XenbusStateInitialised);
  return 0;
}


/* Back end driver's implementation of the xenbus_driver resume
   function.  We don't really support this at the moment  */
int bend_resume(struct xenbus_device *dev)
{
  return 0;
}

/* Note that although this calls accel_shutdown, it may not result in
 * the hardware being freed. In particular, it it was called due to
 * a module unload, the hardware will be kept and re-associated
 * with the VNIC if the module is reloaded */
static void cleanup_bend(struct xenbus_device *dev, struct ef_bend *bend)
{
  ci_log("%s: Detach from network stack.", __FUNCTION__);
  ef_bend_netdev_remove(bend);
  ci_log("%s: Remove %p's mac from fwd table...", __FUNCTION__, bend);
  ef_bend_fwd_remove(bend->mac);
  ci_log("%s: Release hardware resources...", __FUNCTION__);
  bend->accel_shutdown(bend);
  unbind_from_irqhandler(bend->net_irq, dev);
  unbind_from_irqhandler(bend->channel, dev);
  ci_log("%s: Unmap grants", __FUNCTION__);
  if (bend->sh_pages_unmap) {
    ef_hyperop_unmap_contig(dev, bend->sh_pages_unmap);
    bend->sh_pages_unmap = NULL;
  }
  if (bend->tx_fifo_unmap) {
    ef_hyperop_unmap_contig(dev, bend->tx_fifo_unmap);
    bend->tx_fifo_unmap = NULL;
  } 
  if (bend->rx_fifo_unmap) {
    ef_hyperop_unmap_contig(dev, bend->rx_fifo_unmap);
    bend->rx_fifo_unmap = NULL;
  }
}


static void bend_remove(void *context /* Not used */)
{
  unsigned flags;
  struct ef_bend *bend;
  struct workqueue_workitem *workitem;

 again:
  workitem = NULL;
  spin_lock_irqsave(&remove_work_lock, flags);
  if(remove_work_head != NULL){
    workitem = remove_work_head;
    if(remove_work_head == remove_work_tail){
      remove_work_head = remove_work_tail = NULL;
    }
    else{
      ci_assert(remove_work_head->next != NULL);
      remove_work_head = remove_work_head->next;
    }
  }
  spin_unlock_irqrestore(&remove_work_lock, flags);

  if(workitem == NULL)
    return;

  bend = (struct ef_bend *)workitem->context;
  kfree(workitem);

  ci_log("Unwatch xenbus");
  if (bend->hinfo.domu_watch.node) {
    unregister_xenbus_watch(&bend->hinfo.domu_watch);
    kfree(bend->hinfo.domu_watch.node);
    bend->hinfo.domu_watch.node = NULL;
  }

  cleanup_bend((struct xenbus_device *)bend->hdev_data, bend);
  unlink_bend(bend);
  kfree(bend);

  /* Loop and see if there's anything else in the workqueue work
     list */
  goto again;
}



/* Back end driver's implementation of the xenbus_driver remove
   function. */
static int bend_schedule_remove(struct xenbus_device *dev)
{
  unsigned flags, do_schedule = 1;
  struct ef_bend *bend;
  struct workqueue_workitem *workitem;

  spin_lock_irqsave(&remove_work_lock, flags);

  /* Store bend in a list for processing by the work queue */
  bend = (struct ef_bend *)dev->dev.driver_data;
  dev->dev.driver_data = NULL;
  if (bend != NULL) {
    workitem = kzalloc(sizeof(struct workqueue_workitem), GFP_ATOMIC);
    if(!workitem){
      /* aww, crap */
      ci_log("%s: couldn't allocate memory, dropping remove", __FUNCTION__);
      spin_unlock_irqrestore(&remove_work_lock, flags);
      return 0;
    }
    else{
      workitem->context = bend;
      if(remove_work_tail != NULL){
        remove_work_tail->next = workitem;
      }
      else
        remove_work_head = workitem;
      remove_work_tail = workitem;
    }
  }
  else
    do_schedule = 0;
  spin_unlock_irqrestore(&remove_work_lock, flags);
  
  /* Kick the work queue into life if it's not already on it */
  if(do_schedule)
    schedule_work(&remove_work);
  
  return 0;
}


#define WATCH_PATH_FMT "/local/domain/%d"

/* Callback iterating over fwd tabel entries. The fwd lock is held
 * when we are called. */
static int it_is_an_ex_domain(struct ef_bend *bend, int domain)
{
  int rc = 0;
  if (bend->far_end == domain) {
    ci_log("bend %p is for dead domain %d", bend, domain);
    if (bend->hw_state == EF_RES_ALLOC) {
      /* They won't be needing it again... */
      bend->hw_state = EF_RES_DONE;
    }
    ef_bend_mark_dead(bend, -ENOTTY, "domain has gone away");
    rc = 1;
  }
  return rc;
}


void bend_domu_change(struct xenbus_watch *watch,
                       const char **vec, unsigned int len)
{
  if (!xenbus_exists(XBT_NIL, watch->node, "")) {
    struct ef_bend *bend = bend_list;
    int n;
    ci_log("DomU '%s' has gone away", watch->node);
    sscanf(watch->node, WATCH_PATH_FMT, &n);
    ci_log("Updating bends associated with domain %d", n);
    while(bend != NULL) {
      if (it_is_an_ex_domain(bend, n))
        break;
      bend = bend->next_bend;
    }
  }
}

/* Read the limits values of the xenbus structure. */
static int ef_bend_cfg_hw_quotas(struct xenbus_device *dev, struct ef_bend *bend)
{
  int err = xenbus_gather(XBT_NIL, dev->nodename,
                      "limits/max-filters", "%d", &bend->quotas.max_filters,
                      "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages,
                      "limits/max-tx-rate", "%d", &bend->quotas.max_tx_rate,
                      "limits/max-mcasts", "%d", &bend->quotas.max_mcasts,
                      NULL);
  if (err)
    ci_log("Failed to read quotas from xenbus: %d", err);
  return err;
}

/* Carry out a state change (we can't do from IRQ as it may sleep) */
static void bend_state_change(void *context)
{
  struct xenbus_device *dev = context;
  struct ef_bend *bend = (struct ef_bend *)dev->dev.driver_data;
  if (bend->newt_state >= 0) {
    xenbus_switch_state(dev, bend->newt_state);
  } else {
    switch(bend->newt_state) {
      case -1:
        bend_schedule_remove(dev);
        break;
      default:
        ci_log("Request for unknown state %d", bend->newt_state);
        break;
    }
  }
}

static inline void bend_schedule_state(struct ef_bend *bend, int state)
{
  /* Store state for scheduled function to access.  This should really
     be a list, and protected with a lock */
  bend->newt_state = state;
  schedule_work(&bend->state_change);
}

/* Move the xenbus device to XenbusStateConnected, or rather schedule it
 * to happen in a sleepable context. This can be called in atomic. */
void ef_bend_mark_connected(struct ef_bend *bend)
{
  bend_schedule_state(bend, XenbusStateConnected);
}

void ef_bend_mark_dead(struct ef_bend *bend, int reason, const char *where)
{
  xenbus_dev_fatal(bend->hdev_data, reason, where);
  if (atomic_inc_and_test(&bend->destroying))
    bend_schedule_state(bend, -1);
}



/*  Demultiplex an IRQ from the frontend driver.  */
static irqreturn_t irq_from_vnic(int irq, void *context, struct pt_regs *unused)
{
  struct xenbus_device *dev = context;
  struct ef_bend *bend_data = (struct ef_bend *)dev->dev.driver_data;
  BEND_VERB(ci_log("irq %d from device %s\n", irq, dev->nodename));
  schedule_work(&bend_data->handle_msg);
  return IRQ_HANDLED;
}

typedef void (*workhandler)(void *);

/* Get the uuid of the VM for the otherend and set a watch on, so we
 * can tell if it gets destroyed. */
static int setup_domu_watch(struct ef_bend *bend)
{
  int err;
  bend->hinfo.domu_watch.node = kmalloc(XEN_VM_NAMELEN, GFP_KERNEL);
  if (bend->hinfo.domu_watch.node == NULL) {
    return -ENOMEM;
  }
  /* It's tempting to watch for /vm/<uuid> to go away, but that doesn't
   * seem to happen until an explicit destroy is done (so it won't happen
   * if a domain crashes and reboots, for example) */
  sprintf((char *)bend->hinfo.domu_watch.node, WATCH_PATH_FMT, bend->far_end);
  bend->hinfo.domu_watch.callback = bend_domu_change;
  err = register_xenbus_watch(&bend->hinfo.domu_watch);
  if (err) {
    ci_log("Failed to register xenbus watch: %d", err);
    goto fail;
  }
  return 0;
fail:
   /* The shutdown code takes a non-NULL node pointer to mean that
    * the watch is registered and needs unregistering */
  kfree(bend->hinfo.domu_watch.node);
  bend->hinfo.domu_watch.node = NULL;
  return err;
}


/* Setup all the comms needed to chat with the front end driver */
static int setup_vnic(struct xenbus_device *dev)
{
  struct ef_bend *bend_data;
  unsigned int channel, net_chan;
  int grants[2];
  int err, irq, msgs_per_queue;
  int domain;

  /* Allocate structure to store all our state... */
  if (dev->dev.driver_data == NULL) {
    bend_data = kzalloc(sizeof(struct ef_bend), GFP_KERNEL);
    if (bend_data == NULL) {
      err = -ENOMEM;
      goto fail0;
    }
  
    /* ...and store it where we can get at it */
    dev->dev.driver_data = bend_data;
    /* And vice-versa */
    bend_data->hdev_data = dev;
  } else {
    bend_data = dev->dev.driver_data;
    ci_log("dev '%s' already has a bend structure at %p", dev->nodename, bend_data);
  }

  link_bend(bend_data);

  /* Set up the deferred work handlers */
  INIT_WORK(&bend_data->state_change, bend_state_change, dev);
  INIT_WORK(&bend_data->handle_msg, (workhandler)ef_bend_vnic_msghandler, bend_data);

  /* Set up destruction marker so inc_and_test will return true once when called */
  atomic_set(&bend_data->destroying, -1);

  /* Get some info from xenbus on the event channel and shmem grant */
  err = xenbus_gather(XBT_NIL, dev->otherend, 
                      "event-channel", "%u", &channel, 
                      "ctrl-page", "%d", grants,
                      "msg-page", "%d", grants + 1,
                      "net-channel", "%u", &net_chan,
                      NULL);

  if (err) {
    ci_log("failed to read event channel or shmem grant: error %d\n", err);
    goto fail1;
  }
  /* Request the info passed from userspace: the frontend ID and the port
   * number */
  err = xenbus_gather(XBT_NIL, dev->nodename, "frontend-id", "%d", &domain,
                      "phys-port", "%d", &bend_data->phys_port,
                      "dev-name", "%s", bend_data->nicname,
                      "no-accel", "%d", &bend_data->unaccelerated,
                      NULL);
  if (err) {
    ci_log("failed to read event channel: error %d\n", err);
    goto fail1;
  }

  bend_data->far_end = domain;

  if (bend_data->hinfo.domu_watch.node == NULL ){
    err = setup_domu_watch(bend_data);
    if (err) {
      goto fail1;
    }
  }

  /* Request the frontend mac */
  err = xen_net_read_mac(dev, bend_data->mac);
  if (err)
    goto fail1a;


  /* Bind the message event channel to a handler */
  ci_log("vnic is using event channel %d\n", channel);
  err = xenbus_bind_evtchn(dev, channel, &bend_data->channel);
  if (err) {
    ci_log("failed to bind event channel: %d\n", err);
    goto fail1b;
  }
  ci_log("our end of the event channel is %d\n", bend_data->channel);

  /* Likewise the event channel used for network packets. */
  err = xenbus_bind_evtchn(dev, net_chan, &bend_data->net_irq);
  if (err) {
    ci_log("failed to bind net event channel: %d\n", err);
    goto fail1b;
  }

  /* Set up the shared page. */
  bend_data->shared_page = ef_map_grants_contig(dev, grants, 2, 
                                                &bend_data->sh_pages_unmap);

  if (bend_data->shared_page == NULL) {
    ci_log("failed to map shared page for %s\n", dev->otherend);
    goto fail1b;
  }

  /* Initialise the shared page(s) used for comms */
  ef_msg_init_page(bend_data->shared_page, PAGE_SIZE);

  msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct ef_msg);

  ef_msg_init_queue(&bend_data->to_domU, &bend_data->shared_page->queue0,
                    (struct ef_msg *)((__u8*)bend_data->shared_page + PAGE_SIZE),
                    msgs_per_queue);

  ef_msg_init_queue(&bend_data->from_domU, &bend_data->shared_page->queue1, 
                    (struct ef_msg *)((__u8*)bend_data->shared_page + (3 * PAGE_SIZE / 2)),
                    msgs_per_queue);

  /* Note that we will probably get a spurious interrupt when we do this, so 
  * it must not be done until we have set up everything we need to handle it. */
  irq = bind_evtchn_to_irqhandler(bend_data->channel, irq_from_vnic,
                                  SA_SAMPLE_RANDOM, "ef1back", dev);
  if (irq < 0) {
    ci_log("failed to bind event channel to IRQ: %d\n", err);
    err = irq;
    goto fail1b;
  }

  /* Look up the device name in the list of NICs provided by driverlink to
  * get the hardware type. */
  err = ef_bend_hwtype(bend_data);
  if (err)
    goto fail2;

  /* Grab ourselves an entry in the forwarding hash table. We do this now so
   * we don't have the embarassmesnt of sorting out an allocation failure
   * while at IRQ. Because we pass NULL as the context, the actual hash lookup
   * will succeed for this NIC, but the check for somewhere to forward to will
   * fail. */
  err = ef_bend_fwd_add(bend_data->mac, NULL);
  if (err)
    goto fail2;

  /* Get the hardware quotas for the VNIC in question.  This is required to
   * succeed: someone must have populated xenbus for us to be running, so
   * there is no good reason for the values not to be there. */
  err = ef_bend_cfg_hw_quotas(dev, bend_data);
  if (err)
    goto fail3;

  /* Say hello */
  ef_bend_vnic_hello(bend_data);
  return err;

 fail3:
  ef_bend_fwd_remove(bend_data->mac);
 fail2:
  unbind_from_irqhandler(irq, dev);
 fail1b:
  unregister_xenbus_watch(&bend_data->hinfo.domu_watch);
 fail1a:
  kfree(bend_data->hinfo.domu_watch.node);
 fail1:
  kfree(bend_data);
 fail0:
  return err;
}



/* Contact point with xenbus when other end changes state */
void bend_vnic_changed(struct xenbus_device *dev, XenbusState vnic_state)
{
  struct ef_bend *bend = (struct ef_bend *)dev->dev.driver_data;
  int err;
  ci_log("bend_vnic_changed called, state now %d.\n", vnic_state);
  switch(vnic_state) {
  case XenbusStateInitialised:
    err = setup_vnic(dev);
    if(!err) /* create the bridge network device */
      err = ef_bend_netdev_probe((struct ef_bend *)dev->dev.driver_data);
    break;
  case XenbusStateConnected:
    ci_log("VNIC %s is now connected.\n", dev->otherend);
    break;
  case XenbusStateClosed:
    if (bend) {
      if(unsafe_unload == UU_GULLIBLE) {
        if(bend->hw_state == EF_RES_ALLOC)
          bend->hw_state = EF_RES_DONE;
      }
      /* This will do a bend_cleanup() if not already done */
      ef_bend_mark_dead(bend, -ENOTTY, "domain has gone away");
    }
    break;
  default:
    break;
  }
}

