;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;; ;;;;;;;;
;;;;;; All files in this directory or any subdirectories are ;;;;;;;;
;;;;;; copyright 1997, 1998, 1999, 2000, 2002, 2003. ;;;;;;;;
;;;;;; by Rafael D. Sorkin. All rights reserved. ;;;;;;;;
;;;;;; ;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; bibliotek.general Time-stamp: < 2003-Nov-14 03:03:03 16308.35895 >
; This file contains functions of general utility.
;: Roster of functions defined herein (OR IN SUB-BIBLIOTEKS OF THIS ONE)
; iff (the usual predicate of that name)
;
; firstn (initial segment of a list)
;
; omitnth (a list with the nth element omitted)
;
; merge-lists (merges two ordered lists of numbers)
;
; sort-list (essentially redundant since `sort' is built in)
;
; menge-p
;
; menge%%
; n-menge%
; menge
; n-menge
;
; member-member-p
;
; equal%%
; equal-as-sets
;
; card (merely an alias for `length')
;
; singleton-p
;
; meetp%%
; meetp (asks whether two sets overlap)
;
; disjointp%%
; disjointp
;
; less%%
; less (set difference, relative complement)
; n-less
;
; intersection%%
; intersect (intersection of two sets, using `eq')(CL has `intersection')
; intersect-alt
;
; symmetric-difference%%
; symmetric-difference (CL has `set-exclusive-or')
;
; union%%
; union*/unite (union of two arbitrary lists-as-sets)
; (CL has `union' built-in, hence this name)
; union-m%%/union%%-m
; union-m (union of a list of sets)
; (the `m' stands for `members' or `menge' or `multiple')
;
; subsetp%% (the suffix "%%" connotes versions for sets of symbols)
;
; relate
;
; leaves (the set of "leaves" of a "tree of cons cells")
;
; fibers (forms equvalence classes)
;
; multiplicity (number of times an element occurs in a multiset (list))
; (duplicates the CL function `count')
;
; plist-to-alist
; alist-to-plist
;
; random-draw
;
; shuffle (randomly permutes the elements of a list)
;
; sum-m/sum (sums the elements of a list)
; product-m/product (multiplies the elements of a list)
;
; ! (factorial function of integer argument)
;
; choose (binomial coefficient)
;
; sgn [currently in dialect-specific biblioteks]
;
; square
;
; reciprocal (alias 1/) [currently in bibliotek.elisp.coda]
;
; log_2
;
; sup
; inf
;
; median
; mean
; sigma/std-dev/standard-deviation
; std-err-in-mean
; stats
; stats-xy
; correlator
; combine-stats (commented out)
;
; sharep
; set-of-cells
;
; invert-cons-cell
;
; interpolate
;
;: package specification
; Omit this unless some lisp demands it: (in-package user) (in-package 'user)
;: The functions themselves
(defun iff (x y) " True when x and y are either both nil or both not"
(or
(and x y)
(not (or x y))))
(defun firstn (n A)
" Returns a (new) list comprising the first n elements of A."
;; (declare (type list A) (type fixnum n) (optimize (speed 3)))
(nreverse (nthcdr (- (length A) n) (reverse A))))
;
; This function is almost useless, because you
; can get the same sublist using (subseq A 0 n) (which seems to copy too).
;
; We get a new list because `reverse' is used.
;
; Localization is unnecessary, because nothing is evaluated inside it other
; than the parameters n and A themselves.
(defun omitnth (n L)
" Returns a new list omitting element number n from L"
;; (declare (type list L) (type fixnum n) (optimize (speed 3)))
(append (firstn n L) (nthcdr (1+ n) L) nil))
;
; Comments
;
; This has not proved very useful
;
; The first element is numbered `0'.
;
; The list returned is "new": it shares no cons-cells with L (which would
; not have been the case had `nil' been omitted from the end).
;
; Alternate plan: copy the list first, then splice out by resetting the
; relevant cdr-pointer.
(deff merge-lists (A B)
"\
Merges two NUMERICAL lists which are ASSUMED to be in increasing order. \
"
(varbind
x (car A)
y (car B))
(cond
( (null A) B ) ; return B if A is empty
( (null B) A ) ; or vice versa
( (< x y) (cons x (merge-lists (cdr A) B)))
( t (cons y (merge-lists (cdr B) A)))))
;
; Comments
;
; 1. If extend to more general lists than just numerical, remember to
; localize the internal variables.
;
; 2. In general the merged list will share cons-cells with A and B.
;
; 3. Has not found much use yet
(deff sort-list (A)
"\
Sorts a list of NUMBERS into increasing order.
The built-in function `sort' is (in elisp) much faster (tho destructive). \
"
(declare (optimize (speed 3)))
(cond
((o null cdr A) A) ; case of length 0 or 1
(t ; general case
(varbind N1 (truncate (/ (length A) 2)))
(merge-lists
(sort-list (firstn N1 A))
(sort-list (nthcdr N1 A))))))
;
; Comments
;
; The built in function `sort' is much faster but destructive.
; To use it instead, just copy arg first, if desire to protect it.
;
; In TCL `truncate' is really needed; in elisp its use is only for style,
; since integer division automatically truncates (unfortunately).
;
; In case the list is of length 1 it is not copied.
;
; Could speed this up by not using firstn, which does a lot of
; copying, instead could really break the list in the middle.
;
; Not localized since intended only for numbers
(deff shuffle (L)
" Randomly permutes the elements of a list. Returns a totally new list. "
(&bind-too n x y z)
(loop
;-------------------------
;/Copy list to protect it.
;-------------------------
with L = (copy-list L)
;-----------
;/start loop
;-----------
with length = (length L)
while (< 0 length)
;; do (debug L)
;------------------------------------------
;/Randomly select a cons y of the list,
;------------------------------------------
do (setq n (Random length))
;---------------
;/pluck it out,
;---------------
do
(cond
((= n 0)
(setf
y L
L (cdr L)))
(t
(setf
x (nthcdr (1- n) L) ; the preceding cons
y (cdr x) ; the selected cons
z (cdr y) ; the succeding cons
(cdr x) z)))
;--------------------------------
;/and collect the @ list element.
;--------------------------------
collect (car y)
;-------------------------------
;/Decrement `length' and repeat.
;-------------------------------
do (decf length)))
(deff menge%% (R)
"\
A version of `menge' for lists of lisp SYMBOLS.
It non-destructively returns a new list with all duplications removed.
The ordering is ``stable from the left''.
HATARI If this is interrupted, some plists can be ruined! \
"
;------------------
;/make a new marker
;------------------
(varbind iko (cons nil nil))
;--------------------------------------------------------------------
;/examine each x in R, if x is unmarked, collect it in M and mark it.
;--------------------------------------------------------------------
(varbind M
(loop for x in R
unless (eq (o car symbol-plist x) iko)
collect x
and do (setf (symbol-plist x) (cons iko (symbol-plist x)))))
;---------------------------------
;/erase all the marks and return M
;---------------------------------
(image on M of (setf (symbol-plist $) (o cdr symbol-plist $)))
M)
(defun n-menge (L)
"\
Destructively removes duplicates from a list of lisp objects.
Comparison done with `eq'.
Preserves order from left, assuming `delq' does.
BEWARE not all equal integers are `eq' \
"
(if (null L) ()
(cons (car L) (n-menge (delq (car L) (cdr L))))))
(deff menge (X)
" This is just `n-menge' but it copies the list first to protect it."
(o n-menge copy-list X))
(deff n-menge% (L)
"\
A DESTRUCTIVE version of `menge' for SORTED NUMERICAL lists.
It removes duplicate elements from such a list (or adjacent duplicates from
any numerical list) using only the storage for the list itself. \
"
(varbind cons L)
(while (cdr cons)
(if (= (car cons) (cadr cons))
(setf (cdr cons) (cddr cons))
(setq cons (cdr cons))))
L)
;
; This "zipper" version uses no recursion. It is overall much faster than
; menge% was (except where the list is mostly repetition).
(defun member-member-p (x y)
"\
Returns t if x is an element of an element of y, nil otherwise.
Membership assessed by `member', which tests equality with `equal'.\
"
(cond
((null y) nil)
((atom (car y)) (member-member-p x (cdr y)))
((member x (car y)) t)
((member-member-p x (cdr y)))))
;
; Notes
;
; This version is not paralellizable, as we might want for big sets.
;
; No need for localization, it would seem
(defun singleton-p (S) " True if S is a singleton"
(and
(consp S)
(null (cdr S))))
(deff union-m%% (SS)
"\
A version of `union-m' for sets of SYMBOLS.
The single argument should be a list, each of whose members is a set
of symbols.
Their union is returned, non-destructively, as a new list
ordered ``stably from the left''.
If *carefully* then we check that the elements really are symbols.
HATARI If this is interrupted, some plists may be ruined!
(The ``m'' in the name stands for ``multiple'' or ``menge''.) "
;------------------------------------------------------------------
;/check that the elements are all symbols (if *carefully*)
;------------------------------------------------------------------
(when *carefully*
(loop for S in SS do
(loop for x in S unless (symbolp x) do
(error "Some argument to `union-m%%' is not a list of symbols"))))
;------------------------------------------------------------
;/make an anonymous marker and prepare `U' to hold the union
;------------------------------------------------------------
(varbind iko (cons nil nil))
(varbind U nil)
;----------------------------------------------------------------------
;/examine each element x, if it's unmarked, put it into U and mark it
;----------------------------------------------------------------------
(loop for S in SS do
(loop for x in S do
(unless (eq (o car symbol-plist x) iko)
(push x U)
(setf (symbol-plist x) (cons iko (symbol-plist x))))))
;;(push iko (symbol-plist x))
;----------------------------------------------------------------
;/unmark the elements (each occurs precisely once in U of course)
;----------------------------------------------------------------
(image on U of (setf (symbol-plist $) (o cdr symbol-plist $)))
;------------------------------
;/reverse to get original order
;------------------------------
(nreverse U))
;
; NB It is supposed to be an error to use this on lists with repetitions,
; but no harm should come of it. Any duplications will get eliminated
; automatically.
(defalias 'union%%-m 'union-m%%)
(deff union%% (&rest SS)
"\
The set theoretic union of any number of sets of lisp SYMBOLS.
See `union-m%%' for more info."
(union-m%% SS))
(deff union-m (L)
"\
Accepts a single argument, which should be a list of sets,
and returns the union of those sets (as an entirely new set)
by repeatedly calling `union*'.
The ``m'' in the name stands for ``multiple'' or ``menge''. \
"
(varbind
Y nil
length-L (length L))
(&bind-too j)
(kwa
j from 0 upto length-L
(setq Y (union* Y (nth j L))))
Y)
;
; Could also use `reduce' to do this more compactly, but it does more
; rearrangement (see tests)
; (reduce 'union L)
;
; "Union" might be a nicer name, but you can't use it with case-folding
; lisps (which unfortunately, are most of them)
(deff union* (A B)
"\
Accepts a pair of ``sets'' (lists bila duplications) and returns their
union (as an entirely new set of cons cells).
Equality predicate is `eq'.
The efect is to delete from A all elements shared with B and then form
(append A B).
For sets of symbols, union%% should be much faster. \
"
(cond
((null A) (copy-list B))
((memq (car A) B) (union* (cdr A) B))
(t (cons (car A) (union* (cdr A) B)))))
;
; To get a nicer order, do this:
;
; (reverse (union* (reverse B) (reverse A)))
(defalias 'unite 'union*)
(defalias 'union-of 'unite)
(deff meetp%% (A B)
"\
Asks whether two sets of symbols overlap. If they do, a common element is
returned, otherwise nil.
BEWARE If this is interrupted (eg if you get impatient or if B contains a
non-symbol) it will ruin some of the plists! \
"
(&localize A B x)
;--------------
;/make a marker
;--------------
(varbind mark (cons t nil))
;-----------------------
;/mark the elements of A
;-----------------------
(loop for x in A do (setf (symbol-plist x) (cons mark (symbol-plist x))))
;---------------------------------------------
;/look for a marked element of B, then cleanup
;---------------------------------------------
(prog1
;----------------------------------------------------------------
;/look for a marked element of B and return it if found, else nil
;----------------------------------------------------------------
(loop for x in B if (eq mark (o car (symbol-plist x))) return x)
;-----------------
;/remove the marks
;-----------------
(loop for x in A do (setf (symbol-plist x) (cdr (symbol-plist x))))))
(defun meetp (A B)
"\
Tests whether two sets A and B overlap.
Their elements can be any lisp objects, equality in sense of `eq' "
(cond
((null A) nil)
((memq (car A) B) t)
(t (meetp (cdr A) B))))
(deff disjointp%% (A B)
"\
Are these two sets of symbols disjoint?
BEWARE See warning with `meetp%%' !
"
(&localize A B)
(not (meetp%% A B)))
(defun disjointp (A B) "True iff A and B are disjoint." (not (meetp A B)))
(deff subsetp%% (A B)
"\
An O(N) version of `subsetp' for sets of lisp symbols.
HATARI Will invalidate some plists if interrupted, eg by the occurrence of an
error or by an impatient user.
NOTE Officially A and B must be free of duplications, but even if they are
not, everything should still work and the result should be as if we had first
removed the duplicates. \
"
;--------------
;/make a marker
;--------------
(varbind iko (cons nil nil))
;-----------------------
;/mark all elements of B
;-----------------------
(loop for x in B do (setf (symbol-plist x) (cons iko (symbol-plist x))))
;----------------------------------------------------
;/check A for unmarked elements and return the result
;----------------------------------------------------
(prog1
(loop for x in A
unless (eq (o car symbol-plist x) iko)
return nil
finally (return t))
;---------------------------------
;/clean up: unmark the elts of B
;---------------------------------
(loop for x in B do (setf (symbol-plist x) (o cdr symbol-plist x)))))
;
; REMARK Since this is intended for sets, B should not contain
; duplications. If it does, some of its element symbols will be marked
; twice, but then they'll be unmarked twice at the end, so no harm done.
(deff less%% (A &rest BB)
"\
Relative complement (``set difference'') for sets of lisp SYMBOLS.
The call `(less%% A B C...D)' yields A \\ (B U C U...U D).
Non-destructive, returns new set, preserves order within A.
Equality in the sense of `eq'.
HATARI If it gets interrupted for any reason (eg an error) some of the
elements' plists can have been invalidated. "
;--------------
;/make a marker
;--------------
(varbind iko (cons nil nil))
;-----------------------
;/mark the elements of A
;-----------------------
(loop for x in A do (setf (symbol-plist x) (cons iko (symbol-plist x))))
;---------------------------------
;/unmark the marked elements of BB
;---------------------------------
(loop for B in BB do
(loop for x in B
if (eq (o car symbol-plist x) iko)
do (setf (symbol-plist x) (o cdr symbol-plist x))))
;--------------------------------------------
;/collect and unmark the marked elements of A
;--------------------------------------------
(loop for x in A
if (eq (o car symbol-plist x) iko)
collect x
and
do (setf (symbol-plist x) (o cdr symbol-plist x))))
(defun less (A B &rest C)
"\
Returns A\\B for sets A and B. The surviving elements appear in the same
order as in A and the original lists A and B are unchanged.
Comparison is done with `eq'.
If there are further arguments we treat them as with `-' or `/'. \
"
(cond
((not C)
(if (null A) nil
(if
(memq (car A) B) (less (cdr A) B)
(cons (car A) (less (cdr A) B)))))
(t
(apply (function less) (cons (less A B) C)))))
(defun n-less (A B)
" Destructively returns A\\B for sets A and B, equality in sense of `eq'"
(loop for bw in B do (setq A (delq bw A)))
A)
;
; This is of same speed, but could be useful since not recursive
; aside from that, could be deleted
(deff intersection%% (A B)
"\
The intersection of a pair of sets, A and B, of lisp symbols.
Non-destructive, returns new set, preserves order within A.
Equality in the sense of `eq'.
HATARI If this is interrupted, some plists can be invalidated! \
"
;------------------------------------------------------------------
;/check both args are sets (japo lists) of symbols (if *carefully*)
;------------------------------------------------------------------
;; still to be done
;--------------
;/make a marker
;--------------
(varbind iko (cons nil nil))
;------------------------
;/mark the elements of B
;------------------------
(loop for x in B do (setf (symbol-plist x) (cons iko (symbol-plist x))))
;---------------------------------
;/collect the marked elements of A
;---------------------------------
(varbind C nil)
(loop for x in A if (eq (o car symbol-plist x) iko) do (push x C))
;-------------------------
;/unmark the elements of B
;-------------------------
(loop for x in B do (setf (symbol-plist x) (o cdr symbol-plist x)))
;------------------------------
;/reverse to get original order
;------------------------------
(nreverse C))
;
; REMARK It is an error to use this on lists with repetitions,
; but no harm should come of it.
(deff intersect (A B)
"\
Returns the intersection of two ``sets as lists''.
Comparison done with `eq' (not `equal') \
"
(&bind-too x)
(varbind C nil)
(while A
(setq
x (car A)
A (cdr A))
(if
(memq x B)
(setq C (cons x C))))
(reverse C))
(defun intersect-alt (A B)
" Returns the intersection of two ``sets as lists''."
(less A (less A B)))
;
; This more perspicuous version also works.
; It may be slowed down by copying lists and recursion, and in fact it
; can sometimes be much slower, though often not.
; seems of comparable speed if smaller set comes first (as it clearly should,
; so add this feature if use it!)
(deff symmetric-difference%% (A B)
" Symmetric difference of two sets of lisp symbols"
(append (less%% A B) (less%% B A)))
(defun symmetric-difference (A B)
" Self-explanatory set function. Comparison done however `less' does it."
(append (less A B) (less B A)))
(deff equal%% (A B)
"\
Are these two sets of symbols equal (as sets)?
(Officially A and B must be free of duplications, but if they are not
everything should still work, ie the result should be as if we had first
removed the duplicates.) \
"
(and
(subsetp%% A B)
(subsetp%% B A)))
(defun equal-as-sets (A B)
"\
Self-explanatory set function.
Comparison of elements done however `symmetric-difference' does it. \
"
(not (symmetric-difference A B)))
(deff fibers (S &key ((:key zug) (function identity))
((:test equiv) (function eq)))
"\
The equivalence classes into which the set S is fibered,
as specified by :key and :test
(the defaults being :key = `identity' and :test = `eq').
The order of elements in S is preserved in the fibers.
Remark: S can be a multiset, and then the fibers will be too. \
"
(&localize S zug equiv fibres x zug-x @matching-fibre f)
(&bind-too zug-x @matching-fibre)
(varbind fibres nil)
;------------------------
;/construct the fibration
;------------------------
(loop for x in S do
(setq zug-x (funcall zug x))
;---------------------------
;/is there a matching fibre?
;---------------------------
(setq @matching-fibre
(loop for f on fibres
if (funcall equiv zug-x (funcall zug (caar f)))
return f))
;------------------------------------------------
;/if so, add x to it, else make a new fibre for x
;------------------------------------------------
(if @matching-fibre (push x (car @matching-fibre)) (push (list x) fibres)))
;------------------------------------
;/reorder the elements and the fibers
;------------------------------------
(nreverse (mapcar (function nreverse) fibres)))
;
; We take each member of S in turn. If it has a matching fibre we add it to
; that fibre, otherwise we make a new fibre for it.
(deff relate (R x &key (test 'equal))
"\
Apply the RELATION R to the element x. That is, we return the set
(NOT the multiset) of all y such that (x . y) is an element of R.
Arguments:
R x &key test [default: equal]
"
(setq R (Member x R :key (function car) :test test))
(when R (adjoin (cdar R) (relate (cdr R) x))))
(defun leaves (x)
"\
The set of leaves of a so-called ``tree of cons-cells''.
If x is a cons cell then (leaves x) => a list of the ``leaves'' at the ends
of the ``branches'' of the tissue of cons cells originating with x.
If x is not a cons cell [and is not the symbol nil] then (leaves x) => (x).
If x is the symbol `nil' then (leaves x) => ().
[The symbol `nil' can never occur as a leaf (in elisp and TCL anyhow), since
it is identified (in these lisp dialects anyhow) with the empty list `()', and
it is also used to mark the end of a list.]
REMARK Although a tissue of cons cells is often called a ``tree'' it in general
is not a tree in the graph theoretic sense of the word. \
"
(cond
((null x) nil)
((atom x) (list x))
(t
(union*
(leaves (car x))
(leaves (cdr x))))))
(deff multiplicity (x M)
"\
The multiplicity of x in the list (or multiset, if you will) M.
Equality in sense of `eq' (not `eql').
This essentially the CL function `count'. \
"
(loop for y in M count (eq x y)))
;
; This duplicates `count' but with less flexibility. However, it's much
; faster in gcl (for some reason).
; (deff sgn (x)
; "\
; The signum function, returning -1 0 1
; This turns out to coincide with the builtin `signum' (except for complex
; arguments in the case of TCL). \
; "
; (cond
; ((> x 0) 1)
; ((< x 0) -1)
; (t 0)))
(defun square (x) " (square x) == (* x x)" (* x x))
(defun log_2 (x) " Base 2 logarithm" (log (Float x 1.0) 2.0))
;
; The `Float' and the 2.0 are for cmucl nuisance, elisp doesn't need it!
;:: The function `!'
; The elisp version is in bib.float rather than here, because it converts its
; argument to float. The TCL versions are each in their own
; sub-sub-bibliotek. Since `!' is built into some TCL implementations, we
; don't define it commonly for all. (It seems not to be present in CLtL1,
; cmucl or gcl, but Clisp definitely does have it.)
;:: Separate definitions of some arithmetic functions for different lisps
; Here, we define some arithmetic fcns differently in elisp, in order to
; circumvent three problems: first and foremost the "large integer problem"
; (i.e. that integer overflow goes undetected!) and second the stupidity that
; integer division is truncated in elisp (as in fortran and `C'. This seems
; wrong japo elisp -- unlike TCL -- lacks rationals.) To avoid these elisp
; problems, we convert integers to floats where it seems appropriate. See also
; some similar defs in bibliotek.elisp.el. The third problem is with gcl,
; which limits length of arg lists, so we avoid using `apply' for it.
; Actually, it might be we could quitar this problem by raising the value of
; `call-arguments-limit'. If so we could use the same defs for gcl as for
; cmucl.
; In following must add appropriate defs for dialects besides elisp, cmucl and
; GCL (eg for Clisp SCL)
;;; NB GCL def's have gone to their own sub-bibliotek. Perhaps all TCL defs
;;; should, to make sure they will be compiled
;::: definitions of `sum-m' and `product-m'
(when *elisp*
(deff product-m (X)
"\
The argument should be a list of numbers.
We convert them to float (sigh) and return their product."
(apply (function *) (mapcar 'float X)))
(defun sum-m (X)
" Adds up the numbers in a list (beware integer wrapping!)"
(apply (function +) X))
(defalias 'product 'product-m)
(defalias 'sum 'sum-m))
;::: definitions of `sup' and `inf'
(when *elisp*
(defsubst sup (seq)
"\
Maximum of the numbers in a sequence. This is just like `max' but
instead of multiple arguments it takes a single sequence."
(apply (function max) (coerce seq 'list)))
(defsubst inf (seq)
"\
Minimum of the numbers in a sequence. This is just like `min' but
instead of multiple arguments it takes a single sequence."
(apply (function min) (coerce seq 'list))))
;:: Some statistical functions
(deff median (L)
"\
Median of a list of numbers: if N is odd we take the middle value, if N is
even we take the mean of the two middle values.
(If the list is empty, we return nil.) \
"
(varbind N (length L))
(setq L (sort (copy-list L) '<))
;-------------------------------------
; handle odd and even cases separately
;-------------------------------------
(cond
((zerop N) nil)
((oddp N) (nth (/ (1- N) 2) L))
(else
(mean
(list
(nth (/ N 2) L)
(nth (1- (/ N 2)) L))))))
(when *elisp*
(defun mean (L) " The arithmetic mean of a list of numbers (elisp version)"
(setq L (mapcar 'float L))
(ratio
(sum-m L)
(length L))))
(deff sigma (L) " The ``sample standard deviation'' of a list of numbers"
(varbind
N (length L)
mu (mean L)
sigma (mean (image of (square (- $ mu)) on L))
r (ratio N (1- N)))
(sqrt (* r sigma)))
;
(defalias 'std-dev 'sigma)
(defalias 'standard-deviation 'sigma)
(defun std-err-in-mean (L)
"\
The ``std error in the mean'' of a list of numbers, defined as the
sample standard deviation divided by sqrt N. \
"
(ratio (std-dev L) (sqrt (length L))))
(deff stats (x)
"\
The argument should be a list of numbers (regarded as independent samplings
of some random variable x). We return a plist containing a grab bag of
statistics derived from the list. The main plist indicators
are
N mean variance sigma U-mean U-var U-sigma
and have
obvious meanings: `N' is sample size, while `mean,' `variance', `sigma'
estimate the corresponding parameters of the underlying probability
distribution of x. For example the underlying variance, var(x):=C(x,x),
is estimated as the mean-square variation in the list, weighted by the
standard factor of N/N-1.
The U-prefix is short for ``uncertainty in'' (or ``standard error in'', to
use another term). For example `U-mean' means ``stderr-in-the-mean''.
In addition to these statistics, we return in raw form the second third
and fourth sample moments about the sample mean as `P2' `P3' and `P4'.
They can be useful for further processing.
More explanation of U-var and U-sigma:
The new statistic `U-var' is an estimate of the amount by which the estimated
variance deviates from the true variance C(x,x). This estimate is slightly
on the conservative side for finite sample size N, but ``asymptotically just''
as N--> infty. More precisely, let var-est be our estimated variance, let
var-true be the true variance, and let U := (var-est - var-true)^2 be the
squared error in our estimate. We employ an estimator of U whose expectation
value is asymptotically equal to that of U (thus equal to the the mean square
fluctuation of var-est about var-true), and greater than it for finite N.
This estimator is
_________________
N^2 ____ 2
-------------- ( x^x^ - x^x^ )
(N-1) (N-2)^2
where x^ = x - {bar x} and bar denotes sample mean. Our value
for U-var is then the square root of this formula.
This approach should yield reasonable results in general, though it clearly
can be ``way off'' for certain cooked up data (eg one can easily concoct
data for which U-var = 0).
The estimator U-sigma is a bit dodgy, being just trivially derived
from U-var. It is sensible when U-var << variance, but arguably too small
otherwise. You might want to double it in that case. \
"
;-------------------------------------------
;/ count data points and signal error if < 3
;-------------------------------------------
(varbind
N_points (length x) ; number of data points
N (* 1.0 N_points)) ; the same number as a float
(if
(< N_points 3)
(error "Can't compute all the statistics with fewer than 3 points"))
;------------------------------
;/ define some local functions
;------------------------------
(fbind minus (X y) (image on X of (- $ y))) ; subtract y from every elt of X
(fbind bar (L) (/ (sum L) N)) ; sample mean
(fbind cube (x) (* x x x))
(fbind quart (x) (o square square x))
;----------------------------------------------
;/ compute the statistics (recall N is a float)
;----------------------------------------------
(varbind
N-1 (1- N)
N-2 (1- N-1)
N/N-1 (/ N N-1) ; the ubiquitous correction factor
fac2 (/ (square N) N-1 N-2 N-2) ; another correction factor
xbar (bar x) ; sample mean of x, bar(x)
x^ (minus x xbar) ; x-hat = x - xbar
x^sqr (mapcar #'square x^)
Pxx (bar x^sqr) ; 2nd sample moment about mean
Pxxx (bar (mapcar #'cube x^)) ; third sample moment about mean
Pxxxx (bar (mapcar #'quart x^)) ; fourth sample moment about mean
Cxx (* N/N-1 Pxx) ; estimates (variance x)
sigma (sqrt Cxx) ; estimates (std-deviation x)
U-var (sqrt ; uncertainty of (variance x) estimate
(*
fac2
(bar (mapcar #'square (minus x^sqr (bar x^sqr))))))
U-sigma (max
(-
(sqrt(+ Cxx U-var))
sigma)
(-
sigma
(sqrt(max 0 (- Cxx U-var))))))
; ^^^^^ protection for sqrt
;-----------------------
;/ return the statistics
;-----------------------
(list
'N N_points ; return an integer, not a float
'mean xbar
'U-mean (sqrt (/ Cxx N))
'sigma sigma
'U-sigma U-sigma
'variance Cxx
'U-var U-var
'P2 Pxx
'P3 Pxxx
'P4 Pxxxx))
;
; Notes
;
; It may be that the sqrt above doesn't really need its "protection",
; because its arg can never really be negative. Modulo arithmetic, the
; condition is that <= (N^2-3N+3)/(N-1) for any vble y of
; mean zero and N>2), so for big N it's almost certain.
;
; We compute the third moment p(xxx) even though it isn't needed for any of
; the other statistics. We do need it in order to combine two samples, and
; it's also useful in general, eg to test for Gaussian.
;
; Changing N to a float by (* 1.0 N) is equivalent to using (Float N 0.0)
; We have to do this for elisp to avoid the integer division problem, and
; even in TCL we really wouldn't want to be computing sigma as a rational
; number.
;
; Following is more efficient, but no longer used for reason given.
; U-var (sqrt ; uncertainty of (variance x) estimate
; (*
; fac2
; (- Pxxxx (square Pxx)))) ;;; can be negative due to roundoff!
;;; Next probably has same trouble stats had with roundoff error fix it in same
;; way!
(deff stats-xy (x y)
"\
Input is a pair of numerical lists regarded as corresponding samples
of the random variables x and y. We return a plist beginning with
Cxy
UCxy
where Cxy estimates the correlator
C(x,y):=- ,
and UCxy
estimates the uncertainty in our estimate of C(x,y) (and may be called
the ``standard error of Cxy''. Additionally, the plist contains N and the
raw sample moments (about the sample means), Pxy, Pxxy, Pxyy, Pxxyy.
The moments Pxxxy and Pxyyy are NOT given.
The moments Pxx, Pxxx, Pxxxx and (x-->y) are not given either,
but they can be obtained from `stats'.
For the correlator C(x,y) we use the standard unbiased
estimate
N __ _ _
------- ( xy - x y ) (C-est)
N - 1
where bar
denotes sample mean. To estimate the uncertainty UC in C-est we first
estimate {UC}^2 := (variance C-est). In order that this latter estimate be
identically non-negative, we use a formula which is slightly biased in a
conservative direction: its expectation is slightly greater than the true
value of (variance C-est). The bias is only O(1/N) however. This formula is
the following expression (where x^ := x - x-bar, and similarly for y^):
_________________
N^2 ____ 2
-------------- ( x^y^ - x^y^ )
(N-1) (N-2)^2
Finally, to get UC itself, we just take sqrt of this expression. \
"
;--------------------------------------
;/ define the auxiliary function `hat'
;--------------------------------------
(fbind hat (w) "deviation of w from its mean"
(varbind wbar (mean w))
(image on w of (- $ wbar)))
;-----------------------------------------------
;/ count the data points and signal error if < 3
;-----------------------------------------------
(varbind
N_points (length x)
N (float N_points)) ; to avoid integer division in elisp
(if
(< N 3)
(error
(concat
"Can't compute all the requested correlation "
"coefficients with < 3 points.")))
;------------------------------------
;/ check that x and y have same length
;------------------------------------
(unless (= N (length y))
(error "Can't compute correlator for lists of unequal length."))
;------------------------------------------------------------
;/ compute x^ y^ etc. and then the desired statistics
;------------------------------------------------------------
(varbind
N/N-1 (/ N (- N 1))
fac2 (/
(square N)
(- N 1)
(square (- N 2)))
x^ (hat x)
y^ (hat y)
x^y^ (map 'list (function *) x^ y^)
Pxy (mean x^y^)
Pxxy (mean (map 'list (function *) x^ x^y^))
Pxyy (mean (map 'list (function *) x^y^ y^))
Pxxyy (mean (map 'list (function *) x^y^ x^y^))
Cxy (* N/N-1 Pxy)
UCxy (sqrt (* fac2 (- Pxxyy (square Pxy)))))
;-------------------------------------------------------------
;/ return Cxy and its "std-err", as well as certain moments
;-------------------------------------------------------------
(list
'Cxy Cxy
'UCxy UCxy
'Pxy Pxy
'Pxxy Pxxy
'Pxyy Pxyy
'Pxxyy Pxxyy
'N N))
;
; Things like Pxxy are sample moments, things like Cxy are correlators.
(defun correlator (x y)
" The correlator of two lists of numbers (see stats-xy)"
(getf (stats-xy x y) 'Cxy))
(deff random-draw (seq) "Select at random one element of a sequence"
(elt seq (o Random length seq)))
(deff set-of-cells (L)
"\
The argument should be a nil-terminated and non-circular list L.
We return a list whose members are the cells of L.
"
(o assert listp L)
(if (null L) nil (cons L (set-of-cells (cdr L)))))
;
; Now re-written to be safer. Could be slower, but would it ever be used st
; speed would matter?
(defun sharep (A B)
" Do these two (undotted and non-circular) lists share cons-cells?"
(meetp (set-of-cells A) (set-of-cells B)))
(defun plist-to-alist (P)
" Converts a plist to an alist (checking that its length is even)"
(assert (o evenp length P)
nil "Argument not a valid plist, has odd length of %d." (length P))
(if (null P) nil
(cons
(cons (car P) (cadr P))
(plist-to-alist (cddr P)))))
(defun alist-to-plist (A) " Converts an alist to a plist"
(if (null A) nil
(append
(list (caar A) (cdar A))
(alist-to-plist (cdr A)))))
(defun invert-cons-cell (cell)
" (X . Y) --> (Y . X) Apparently not in CLtL1 !"
(cons (cdr cell) (car cell)))
(deff interpolate (x1 y1 x2 y2 x)
" Find y, assuming that (x1 y1) (x2 y2) and (x y) lie on a straight line"
(ratio
(+
(* (- x1 x) y2)
(* (- x x2) y1))
(- x1 x2)))
(deff choose (n &optional m &rest D)
"\
Generalized binomial coefficient for INTEGER args.
(It should work now for negative integer arguments as well, at least in the
simple binomial case.)
What is computed depends on the number of arguments:
if 1 arg then return factorial
if 2 args n m then return ``n choose m''
if 3 or more args n a b ... c then return n!/(a! b!...c!)
BEWARE Elisp may not yield an exact integer if the arguments are too big. "
;-------------------------------------
;/check that all the args are integers
;-------------------------------------
(assert (integerp n))
(if m (assert (integerp m)))
(ewe of (assert (integerp $)) on D)
;--------------
;/various cases
;--------------
(cond
;----------------
;/single argument
;----------------
((not m) (! n))
;--------------
;/two arguments
;--------------
((not D)
;--------------------------------------------------------
;/let `a' be the bigger of m and n-m with `b' the smaller
;--------------------------------------------------------
(varbind
n-m (- n m)
a (max m n-m)
b (min m n-m))
;----------------------------------------------
;/different cases according to signs of n a b
;----------------------------------------------
(case (sgn n)
;---------
;/n = 0
;---------
((0) (if (= a 0) 1 0))
;-----------------------
;/n > 0 (three subcases)
;-----------------------
((+1)
(case (sgn b)
((0) 1)
((-1) 0)
((+1)
(/ (product (loop for k from (1+ a) to n collect k)) (! b)))))
;-----------------------
;/n < 0 (three subcases)
;-----------------------
((-1)
(case (sgn a)
((-1) 0)
((0) 1)
((+1)
;-------------------------
;/convert to case of n > 0
;-------------------------
(varbind
-n (- n)
sign (if (evenp a) 1 -1))
; (assert (natnump -n))
(* sign (choose (atl -n + a - 1) a)))))))
;------------------------
;/three or more arguments
;------------------------
(t
(assert (= n (sum (cons m D))) nil "Invalid arguments to `choose'")
(*
(choose n (- n m))
(apply (function choose) (- n m) D)))))
;
; NOTES
; The various cases and subcases could probably be combined somewhat, but
; this is convenient and perspicuous
;
; KUFANYA
; 1. Trap negative args in D too (or do we need to?)
; 2. Allow 1/2 integer args?
;
; Should this just be called `!' itself, or should we reserve that name
; for (! n m) = n(n-1)(n-2)...(n-m+1)?
(deff menge-p (S &key test)
"\
SYNOPSIS (menge-p S :test)
Is S a list bila repetitions?
The keyword :test specifies the equality predicate to use.
It defaults to `eq' (NOT `equal' or `eql'). \
"
(and
(listp S)
(cond
;--------------------------------
;/case where comparson is by `eq'
;--------------------------------
((or (not test) (eq test #'eq) (eq test 'eq))
(loop for cell on S
never (memq (car cell) (cdr cell))))
;---------------------------------------------------
;/elisp case where comparson is by `equal'
;---------------------------------------------------
((and *elisp* (or (eq test #'equal) (eq test 'equal)))
(loop for cell on S
never (member (car cell) (cdr cell))))
;----------------------------------------------
;/non-elisp case where comparson is not by `eq'
;----------------------------------------------
(else
(loop for cell on S
never (Member (car cell) (cdr cell) :test test))))))
; (defalias 'setp 'menge-p)
(defalias 'card 'length)
;: End