/*-*-C-*-
 * Copyright 2006  Petter Urkedal
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This file is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef CUFLOW_CACHE_H
#define CUFLOW_CACHE_H

#include <cuflow/fwd.h>
#include <cu/thread.h>
#include <cu/clos.h>

CU_BEGIN_DECLARATIONS
/*!\defgroup cuflow_cache_h cuflow/cache.h: Function Call Cache (unfinished)
 *@{\ingroup cuflow_mod
 */

#define CUFLOWP_CACHE_LOG2_BIN_COUNT 5
#define CUFLOWP_CACHE_BIN_COUNT (1 << CUFLOWP_CACHE_LOG2_BIN_COUNT)

/*!Returns a function code identifying slot number \a index in a cache which
 * takes a key of \a key_wsize words size. */
#define CUFLOW_FNCODE(index, key_wsize) (((index) << 16) + (key_wsize))

/*!The key size in words of \a code. */
#define CUFLOW_FNCODE_KEY_SIZEW(code) ((code) & 0xffff)

#define CUFLOW_FNCODE_SLOT(code) ((code) >> 16)

#define CUFLOWP_CACHEOBJ_HDR(obj) ((cuflowP_cacheobjhdr_t)(obj) - 1)

typedef struct cuflow_cacheconf_s *cuflow_cacheconf_t;
typedef struct cuflowP_cachebin_s *cuflowP_cachebin_t;
typedef struct cuflow_cache_s *cuflow_cache_t;
typedef struct cuflowP_cacheobjhdr_s *cuflowP_cacheobjhdr_t;
typedef struct cuflow_cacheobj_s *cuflow_cacheobj_t;

struct cuflow_cacheconf_s
{
    /* The multiplier for the walltime used in the exponent of the weighting
     * function for the integral over past accesses.  The inverse of this is
     * roughly the period of time over which accesses are summed. */
    float decay_per_walltime_unit;

    /* The cost of the CPU running for one unit of process time. */
    float cost_per_proctime_unit;

    /* The cost of keeping one byte for one cycle. */
    float cost_per_cycle_per_byte;
};

struct cuflowP_cachebin_s
{
    cu_mutex_t mutex;
    size_t cap;
    size_t size;
    cuflow_cacheobj_t *link_arr;
};

struct cuflow_cache_s
{
    cuflow_cacheconf_t conf;
    struct cuflowP_cachebin_s bin_arr[CUFLOWP_CACHE_BIN_COUNT];
    cu_clop(*fn_arr, cuflow_cacheobj_t, cuflow_cacheobj_t key);
};

struct cuflowP_cacheobjhdr_s
{
    cuflow_cacheobj_t next;
    float t_access;
    float access_function;

    /* gain = cost_of_call / size_of_cache_object */
    float gain;
};

/*!The base struct for both cache keys and cache objects. Typically use is to
 * \ref cu_inherit this in the cache key, and cu_inherit the cache key in the
 * full cache object.  This contains one field, \e fncode, which must be
 * assigned an integer obtaind with \ref CUFLOW_FNCODE, which identifies the
 * callback and key size. */
struct cuflow_cacheobj_s
{
    /*!The function code, identifying the slot number of the cache callback
     * and the size of the key, including this struct. */
    cu_word_t fncode;
};

/*!Creates a cache with a unique set of callbacks stored in \a fn_arr. */
void
cuflow_cache_cct(cuflow_cache_t cache, cuflow_cacheconf_t conf,
		 cu_clop(*fn_arr, cuflow_cacheobj_t, cuflow_cacheobj_t key));

/*!\copydoc cuflow_cache_cct */
cuflow_cache_t
cuflow_cache_new(cuflow_cacheconf_t conf,
		 cu_clop(*fn_arr, cuflow_cacheobj_t, cuflow_cacheobj_t key));

/*!Return a pointer into the data area of a newly allocated cache object.  The
 * cache callbacks must use this to allocate objects and must call \ref
 * cuflow_cacheobj_set_gain before returning them. */
cuflow_cacheobj_t
cuflow_cacheobj_new(cuflow_cacheobj_t key, size_t full_size);

/*!Set the gain of \a obj to \a gain.  The gain is an estimate of
 * <i>C</i>/<i>S</i> where <i>C</i> is the cost of computing the retured
 * object in CPU cycles, and <i>S</i> is the size of the returned object in
 * bytes including a fixed overhead of about 5 words.  The <i>C</i> may
 * include cost of using other resources multiplied with suitable constants to
 * make them cycle-equivanent according to the desired resource utilization.
 *
 * These quastities are hard to determine precisely.  Available CPU clocks are
 * typically not precise enough to measure <i>C</i>, and computing <i>S</i>
 * may be expensive for tree-structures or even ambiguous when sharing is
 * involved.  Therefore, rule of thumb estimates will have to do.  Some
 * suggestions:
 * <ul>
 *   <li>If the complexity of the computation is linear in the size of
 *   \a obj, then <i>C</i>/<i>S</i> can be taken to be a constant.  Note that
 *   there is no need to know the size of \a obj, since it cancels out.</li>
 *
 *   <li>If the complexity of the computation is quadratic, make an estimate
 *   of the final size of \a obj and multiply with a constant to get
 *   \a gain.  Assuming that the size can be computed in linear time, the real
 *   computiation will dominate for sufficiently large input.  Alternatively,
 *   time the computation and use the square root to estimate the object size.
 *   If the time is not granular enough, then neglect the quadratic
 *   behaviour.</li>
 * </ul> */
CU_SINLINE void
cuflow_cacheobj_set_gain(cuflow_cacheobj_t obj, float gain)
{ CUFLOWP_CACHEOBJ_HDR(obj)->gain = gain; }

/*!Return the computed object with key-part equal to \a key.  \a key may be a
 * stack object or static storage.  The callback and key size is determined
 * from the \e fncode field of \a key.  The callback is only called if \a
 * cache does not already contain the requested object. */
cuflow_cacheobj_t
cuflow_cache_call(cuflow_cache_t cache, cuflow_cacheobj_t key);

/*!@}*/
CU_END_DECLARATIONS

#endif
