@string{colt88 = {Proceedings of the 1988 Workshop on Computational
		  Learning Theory}}
@string{colt89 = {Proceedings of the Second Annual Workshop on Computational
		  Learning Theory}}
@string{colt90 = {Proceedings of the Third Annual Workshop on Computational
		  Learning Theory}}
@string{colt91 = {Proceedings of the Fourth Annual Workshop on
		  Computational Learning Theory}}
@string{colt92 = {Proceedings of the Fifth Annual ACM Workshop on
		  Computational Learning Theory}}
@string{colt93 = {Proceedings of the Sixth Annual ACM Conference on
		  Computational Learning Theory}}
@string{colt94 = {Proceedings of the Seventh Annual ACM Conference on
		  Computational Learning Theory}}
@string{colt95 = {Proceedings of the Eighth Annual Conference on
		  Computational Learning Theory}}
@string{colt96 = {Proceedings of the Ninth Annual Conference on
		  Computational Learning Theory}}
@string{colt97 = {Proceedings of the Tenth Annual Conference on
		  Computational Learning Theory}}
@string{colt98 = {Proceedings of the Eleventh Annual Conference on
		  Computational Learning Theory}}
@string{colt99 = {Proceedings of the Twelfth Annual Conference on
		  Computational Learning Theory}}
@string{colt00 = {Proceedings of the Thirteenth Annual Conference on
		  Computational Learning Theory}}
@string{colt01 = {Proceedings 14th Annual Conference on
		  Computational Learning Theory and 5th
                  European Conference on Computational Learning
                  Theory}}
@string{colt02 = {15th Annual Conference on Computational Learning Theory}}
@string{colt04 = {17th Annual Conference on Learning Theory}}

@string{eurocolt93 = {Computational Learning Theory: EuroCOLT '93}}
@string{eurocolt95 = {Computational Learning Theory: Second European
			Conference, EuroCOLT~'95}}
@string{eurocolt99 = {Computational Learning Theory: Fourth European
			Conference, EuroCOLT~'99}}

@string{focs79 = {20th Annual Symposium on Foundations of Computer Science}}
@string{focs80 = {21st Annual Symposium on Foundations of Computer Science}}
@string{focs81 = {22nd Annual Symposium on Foundations of Computer Science}}
@string{focs82 = {23rd Annual Symposium on Foundations of Computer Science}}
@string{focs83 = {24th Annual Symposium on Foundations of Computer Science}}
@string{focs84 = {25th Annual Symposium on Foundations of Computer Science}}
@string{focs85 = {26th Annual Symposium on Foundations of Computer Science}}
@string{focs86 = {27th Annual Symposium on Foundations of Computer Science}}
@string{focs87 = {28th Annual Symposium on Foundations of Computer Science}}
@string{focs88 = {29th Annual Symposium on Foundations of Computer Science}}
@string{focs89 = {30th Annual Symposium on Foundations of Computer Science}}
@string{focs90 = {31st Annual Symposium on Foundations of Computer Science}}
@string{focs91 = {32nd Annual Symposium on Foundations of Computer Science}}
@string{focs92 = {33rd Annual Symposium on Foundations of Computer Science}}
@string{focs93 = {34th Annual Symposium on Foundations of Computer Science}}
@string{focs94 = {35th Annual Symposium on Foundations of Computer Science}}
@string{focs95 = {36th Annual Symposium on Foundations of Computer Science}}
@string{focs96 = {37th Annual Symposium on Foundations of Computer Science}}

@string{soda90 = {Proceedings of the First Annual ACM-SIAM Symposium on
		  Discrete Algorithms}}
@string{soda91 = {Proceedings of the Second Annual ACM-SIAM Symposium on
		  Discrete Algorithms}}

@string{stoc80 = {Proceedings of the Twelfth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc81 = {Proceedings of the Thirteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc82 = {Proceedings of the Fourteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc83 = {Proceedings of the Fifteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc84 = {Proceedings of the Sixteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc85 = {Proceedings of the Seventeenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc86 = {Proceedings of the Eighteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc87 = {Proceedings of the Nineteenth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc88 = {Proceedings of the Twentieth Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc89 = {Proceedings of the Twenty First Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc90 = {Proceedings of the Twenty Second Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc91 = {Proceedings of the Twenty Third Annual ACM Symposium on
		  Theory of Computing}}
@string{stoc92 = {Proceedings of the Twenty-Fourth Annual ACM
		  Symposium on the Theory of Computing}}
@string{stoc93 = {Proceedings of the Twenty-Fifth Annual ACM
		  Symposium on the Theory of Computing}}
@string{stoc94 = {Proceedings of the Twenty-Sixth Annual ACM
		  Symposium on the Theory of Computing}}
@string{stoc95 = {Proceedings of the Twenty-Seventh Annual ACM
		  Symposium on the Theory of Computing}}
@string{stoc96 = {Proceedings of the Twenty-Eighth Annual ACM
		  Symposium on the Theory of Computing}}
@string{stoc97 = {Proceedings of the Twenty-Ninth Annual ACM
		  Symposium on the Theory of Computing}}

@string{ml94 =	{Machine Learning: Proceedings of the Eleventh
		 International Conference}}
@string{ml95 =	{Proceedings of the Twelfth International Conference
		 on Machine Learning}}
@string{ml96=	{Machine Learning: Proceedings of the Thirteenth
		 International Conference}}
@string{ml97=	{Machine Learning: Proceedings of the Fourteenth
		 International Conference}}		  
@string{ml98=	{Machine Learning: Proceedings of the Fifteenth
		 International Conference}}		  
@string{ml99=	{Machine Learning: Proceedings of the Sixteenth
		 International Conference}}		  
@string{ml00=	{ Proceedings of the Seventeenth
		 International Conference on Machine Learning}}		  
@string{ml02=	{ Proceedings of the Nineteenth
		 International Conference on Machine Learning}}		  
		  
@string{ecml94=	{Machine Learning: ECML-94}}
@string{aaai97= {Proceedings of the Fourteenth National Conference on
		  Artificial Intelligence}}
@string{aaai99= {Proceedings of the Sixteenth National Conference on
		  Artificial Intelligence}}
		  
@string{anprob=	{Annals of Probability}}
@string{anappprob={Annals of Applied Probability}}
@string{annstat={The Annals of Statistics}}
@string{cacm =	{Communications of the ACM}}
@string{eurosam79={Symbolic and Algebraic Computation}}
@string{icalp92 = {Automata, Languages and Programming: 19th
		   International Colloquium}}
@string{ijcai85 = {Proceedings of the 9th International Joint
		   Conference on Artificial Intelligence}}
@string{infcomp={Information and Computation}}
@string{infctrl={Information and Control}}
@string{ieeeit=	{IEEE Transactions on Information Theory}}
@string{jacm =	{Journal of the Association for Computing Machinery}}
@string{jair =	{Journal of Artificial Intelligence Research}}
@string{jcss = 	{Journal of Computer and System Sciences}}
@string{jmlr =  {Journal of Machine Learning Research}}
@string{mit =	{Massachusetts Institute of Technology}}
@string{mitlcs=	{MIT Laboratory for Computer Science}}
@string{ml =	{Machine Learning}}
@string{sicomp ={SIAM Journal on Computing}}
@string{symcomp={Journal of Symbolic Computation}}
@string{tcs =	{Theoretical Computer Science}}
@string{ucsccrl={University of California Santa Cruz,
		 Computer Research Laboratory}}

@string{nips5=	{Advances in Neural Information Processing Systems 5}}
@string{nips7=	{Advances in Neural Information Processing Systems 7}}
@string{nips8=	{Advances in Neural Information Processing Systems 8}}
@string{nips10=	{Advances in Neural Information Processing Systems 10}}
@string{nips11=	{Advances in Neural Information Processing Systems 11}}
@string{nips12=	{Advances in Neural Information Processing Systems 12}}
@string{nips13=	{Advances in Neural Information Processing Systems 13}}
@string{nips14=	{Advances in Neural Information Processing Systems 14}}
@string{nips15=	{Advances in Neural Information Processing Systems 15}}
@string{nips5eds={Stephen Jos\'e Hanson and Jack D. Cowan and C. Lee Giles}}
		  
@string{sigir88={Proceedings of the 11th Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
@string{sigir94={Proceedings of the 17th Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
@string{sigir95={Proceedings of the 18th Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
@string{sigir96={Proceedings of the 19th Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
@string{sigir97={Proceedings of the 20th Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
@string{sigir99={Proceedings of the 22nd Annual International
		  ACM SIGIR Conference on Research and Development in
		  Information Retrieval}}
		  
		  
@string{wiley=	{John Wiley \& Sons}}

@inproceedings{Abe89,
author = 	{Naoki Abe},
title = 	{Polynomial Learnability of Semilinear Sets (Extended
		 Abstract)},
booktitle = 	colt89,
month=		jul,
year = 		{1989}
}

@InProceedings{AbeMa98,
  author = 	 {Naoki Abe and Hiroshi Mamitsuka},
  title = 	 {Query Learning Strategies using Boosting and Bagging},
  booktitle = 	 ml98,
  year =	 1998
}

@inproceedings{AbeTaWa91,
author=		{Naoki Abe and {Jun-ichi} Takeuchi and Manfred K. Warmuth},
title=		{Polynomial Learnability of Probabilistic Concepts
		 with Respect to the {K}ullback-{L}iebler Divergence},
year=		1991,
month=		aug,
pages=		{277--289},
booktitle=	colt91
}

@article{AbeWa92,
author=		{Naoki Abe and Manfred K. Warmuth},
title=		{On the Computational Complexity of Approximating
		 Distributions by Probabilistic Automata},
journal=	ml,
volume=		9,
number=		{2--3},
year=		1992,
pages=		{205--260}
}

@InProceedings{AbneyScSi99,
  author = 	 {Steven Abney and Robert E. Schapire and Yoram Singer},
  title = 	 {Boosting applied to tagging and {PP} attachment},
  booktitle = 	 {Proceedings of the Joint SIGDAT Conference on
                  Empirical Methods in Natural Language Processing and
                  Very Large Corpora},
  year =	 1999
}

@article{AdlerPr92
,author=	{Michael Adler and Bhaskar Prasad}
,title=		{On universal currency hedges}
}

@inproceedings{AielloMi91,
author=		{William Aiello and Milena Mihail},
title=		{Learning the {F}ourier Spectrum of Probabilistic
		 Lists and Trees},
year=		1991,
month=		jan,
booktitle=	soda91
}

@Article{AizermanBrRo64,
  author = 	 {M. A. Aizerman and E. M. Braverman and L. I. Rozonoer},
  title = 	 {Theoretical foundations of the potential function
                  method in pattern recognition learning},
  journal = 	 {Automation and Remote Control},
  year = 	 1964,
  volume =	 25,
  pages =	 {821-837}
}

@inproceedings{AldousVa90,
author=		{David Aldous and Umesh Vazirani},
title=		{A {M}arkovian Extension of {V}aliant's Learning
		 Model},
booktitle=	focs90,
month=		oct,
year=		1990,
pages=		{392--404}
}

@inproceedings{AleliunasKaLiLoRa79,
author=   	{Aleliunas, Romas and Richard M. Karp and Richard J. Lipton and
          	Laszlo Lov\'asz and Charles Rackoff},
title=    	{Random Walks, Universal Traversal Sequences, and the 
		Complexity of Maze Problems},
booktitle= 	focs79,
year=     	1979,
month=    	Oct,
pages=    	{218--223}
}

@article{Algoet92
,author=	{Paul Algoet}
,title=		{Universal schemes for prediction, gambling and
		 portfolio selection}
}

@article{Algoet94
,author=	{Paul H. Algoet}
,title=		{The Strong Law of Large Numbers for Sequential
		 Decisions Under Uncertainty}
,journal=	ieeeit
,volume=	40
,number=	3
,year=		1994
,month=		may
,pages=		{609--633}
}

@article{AlgoetCo88
,author=	{P. Algoet and T. M. Cover}
,title=		{Asymptotic optimality and aymptotic equipartition
		property of log-optimal investment}
,journal=	{Annals of Probability}
,volume=	16
,pages=		{876--898}
,year=		1988
}

@article{AllweinScSi00,
  author = 	 {Erin L. Allwein and Robert E. Schapire and Yoram Singer},
  title = 	 {Reducing multiclass to binary:
             A unifying approach for margin classifiers},
journal = jmlr,
 pages=    {113-141},
volume = 1,
  year =	 2000
}

@book {AlonSp92,
    AUTHOR = {Alon, Noga and Spencer, Joel H.},
     TITLE = {The probabilistic method},
      NOTE = {With an appendix by Paul Erd\H os,
              A Wiley-Interscience Publication},
 PUBLISHER = {John Wiley \& Sons Inc.},
   ADDRESS = {New York},
      YEAR = {1992},
     PAGES = {xvi+254},
      ISBN = {0-471-53588-5},
   MRCLASS = {60-02 (05C80 11K99 60C05)},
  MRNUMBER = {93h:60002},
MRREVIEWER = {Bert Fristedt},
}

@Article{AmitGe97,
  author = 	 {Yali Amit and Donald Geman},
  title = 	 {Shape quantization and recognition with randomized trees},
  year = 	 1997
}

@mastersthesis{Amsterdam88,
author=   	{Amsterdam, Jonathan},
title=    	{The Valiant Learning Model:  Extensions and Assessment},
school=   	mit,
year=     	1988,
month=    	Jan
}

@inproceedings{Amsterdam88b,
author = 	{Amsterdam, Jonathan},
title = 	{Some Philosophical Problems with Formal Learning Theory},
booktitle = 	{Proceedings AAAI-88},
year = 		{1988},
pages = 	{580--584},
organization = 	{American Association for Artificial Intelligence},
address = 	{Saint Paul, Minn.},
month = 	{aug},
comment = 	{A philosophical attack on computational learning theory}
}

@book{AndersonRo88,
editor=   	{Anderson, James A. and Rosenfeld, Edward},
title=    	{Neurocomputing: Foundations of Research},
publisher=	{MIT Press},
year=     	1988
}

@techreport{Andreae85,
author=   	{Andreae, Peter Merrett},
title=    	{Justified Generalization: Acquiring Procedures from Examples},
institution= 	{MIT Artificial Intelligence Laboratory},
year=     	1985,
month=    	Jan,
number=   	{AI-TR-834},
comment=  	{Ph.D. thesis.  Examines traces of a correct procedure 
		(physical motion procedure) to infer loops, etc.}
}


@article{Angluin78,
author=   	{Angluin, Dana},
title=    	{On the complexity of minimum inference of regular sets},
journal=  	infctrl,
volume=   	39,
year=     	1978,
pages=    	{337--350}
}

@article{Angluin80,
author=   	{Angluin, Dana},
title=    	{Inductive Inference of Formal Languages from Positive Data},
journal=  	infctrl,
year=     	1980,
month=    	May,
volume=   	45,
number=   	2,
pages=    	{117--135},
comment=  	{Can be done iff each language Li contains a finite subset Ti 
		such that Ti is a subset of Li and if i!=j and Ti is a subset 
		of Lj, then Lj is not a proper subset of Li; and Ti is 
		computable from i.}
}

@article{Angluin80b,
author=   	{Angluin, Dana},
title=    	{Finding patterns common to a set of strings},
journal=  	jcss,
number=		1,
volume=  	21,
month=		aug,
year=     	1980,
pages=    	{46--62}
}

@article{Angluin81,
author=   	{Angluin, Dana},
title=    	{A note on the number of queries needed to identify regular 
		languages},
journal=  	infctrl,
volume=   	51,
year=     	1981,
pages=    	{76--87}
}

@article{Angluin82,
author=   	{Angluin, Dana},
title=    	{Inference of Reversible Languages},
journal=  	jacm,
year=     	1982,
volume=  	29,
number=   	3,
month=    	Jul,
pages=    	{741--765}
}

@techreport{Angluin86a,
author=   	{Angluin, Dana},
title=    	{Learning Regular Sets from queries and counter-examples},
institution= 	{Yale University Department of Computer Science},
number=   	{YALEU/DCS/TR-464},
year=     	1986,
month=    	Mar,
comment=  	{Learning regular sets from a teacher who answers membership 
		queries, and responds to false conjectures with 
		counterexamples.  Based on Gold's approach.  Also learning
		context-free languages from teacher.}
}

@techreport{Angluin86b,
author=   	{Angluin, Dana},
title=    	{Types of queries for concept learning},
institution=  	{Yale University Department of Computer Science},
address=	{New Haven, CT},
number=   	{YALEU/DCS/TR-479},
year=     	1986,
month=    	Jun,
comment=  	{Surveys kinds of learning possible with equivalence, 
		membership, subset, superset, and disjointness queries, and 
		with random samplying.}
}

@article{Angluin87,
author = 	{Angluin, Dana},
title =		{Learning Regular Sets from Queries and Counterexamples},
journal = 	infcomp,
year = 		1987,
month = 	Nov,
volume = 	75,
pages = 	{87--106},
comment=  {Learning regular sets from a teacher who answers membership queries,
	  and responds to false conjectures with counterexamples.  Based on
	  Gold's approach.  Also learning context-free languages from teacher.}
}

@unpublished{Angluin87b,
author=   	{Angluin, Dana},
title=    	{A Note on Diversity},
month=    	Dec,
year=     	1987,
note=     	{Unpublished}
}

@techreport{Angluin88,
author=   	{Angluin, Dana},
title=    	{Negative results for equivalence queries},
institution=   	{Yale University Department of Computer Science},
number=   	{YALEU/DCS/RR-648},
year=     	1988,
month=   	Sep
}

@article{Angluin88b,
author=		{Angluin, Dana},
title=		{Queries and Concept Learning},
journal=	ml,
volume=		2,
number=		4,
month=		apr,
pages=		{319--342},
year=		1988
}

@article{Angluin90,
author=   	{Angluin, Dana},
title=    	{Negative results for equivalence queries},
journal=	ml,
year=     	1990,
volume=		5,
number=		2,
pages=		{121--150}
}

@inproceedings{AngluinFrPi90,
author=		{Dana Angluin and Michael Frazier and Leonard Pitt},
title=		{Learning Conjunctions of {H}orn Clauses},
booktitle=	focs90,
pages=		{186--192},
month=		oct,
year=		1990
}

@article{AngluinFrPi92,
author=		{Dana Angluin and Michael Frazier and Leonard Pitt},
title=		{Learning Conjunctions of {H}orn Clauses},
journal=	ml,
volume=		9,
number=		{2/3},
pages=		{147--164},
year=		1992
}

@techreport{AngluinGaSm87,
author=		{Angluin, Dana and William I. Gasarch and Carl H. Smith},
title=		{Training Sequences},
institution =	{University of Maryland Institute for Advanced Computer
		Studies},
number=		{UMIACS-TR-87-37},
year=		1987,
month=		Aug,
comment =	{Shows how to learn hard total functions by being taught 
		relevant other functions first.}
}

@Book{boyd,
  author = 	 {Boyd Stephen and Vandenberghe Lieven},
  OPTALTeditor = 	 {},
  title = 	 {Convex Optimization},
  publisher = 	 {Cambridge University Press},
  year = 	 {2006},
  OPTkey = 	 {},
  OPTvolume = 	 {},
  OPTnumber = 	 {},
  OPTseries = 	 {},
  OPTaddress = 	 {},
  OPTedition = 	 {},
  OPTmonth = 	 {},
  OPTnote = 	 {},
  OPTannote = 	 {}
}

@techreport{AngluinHeKa89,
author= "Angluin, Dana and Lisa Hellerstein and Marek Karpinski",
title= "Learning Read-Once Formulas with Queries",
institution= "University of California Berkeley, Computer Science Division",
number=    "UCB/CSD 89/528",
year=      1989,
month=		aug,
note=		{To appear, {\it Journal of the Association for
		 Computing Machinery}}
}

@article{AngluinHeKa93,
author= "Angluin, Dana and Lisa Hellerstein and Marek Karpinski",
title= "Learning Read-Once Formulas with Queries",
journal=	jacm,
year=		1993,
volume=		40,
number=		1,
pages=		{185--210}
}

@inproceedings{AngluinKh91
,author=	{Dana Angluin and Michael Kharitonov}
,title=		{When won't membership queries help?}
,booktitle=	stoc91
,year=		1991
,month=		may
,pages=		{444--454}
}

@techreport{AngluinLa86,
author=  	{Angluin, Dana and P. D. Laird},
title=    	{Identifying k-CNF formulas from noisy examples},
institution=  	{Yale University Department of Computer Science},
number=   	{YALEU/DCS/TR-478},
year=     	1986,
month=    	Jun,
comment=  	{Error model assumes random error rate < 1/2 in classifications
		provided for examples. Algorithm tries to minimize number of 
		examples which are misclassified by chosen hypothesis.}
}

@article{AngluinLa88,
author=   	{Angluin, Dana and Philip Laird},
title=    	{Learning from noisy examples},
journal=  	ml,
year=     	1988,
volume=   	2,
number=   	4,
pages=    	{343--370},
comment=  	{Error model assumes random error rate < 1/2 in classifications
		provided for examples. Algorithm tries to minimize number of 
		examples which are misclassified by chosen hypothesis.}
}

@inproceedings{AngluinSl91,
author=		{Dana Angluin and Donna K. Slonim},
title=		{Learning Monotone {DNF} with an Incomplete Membership
		 Oracle},
booktitle=	colt91,
month=		aug,
year=		1991,
pages=		{139--146}
}

@article{AngluinSm83,
author=   	{Angluin, Dana and Carl H. Smith},
title=    	{Inductive Inference: Theory and Methods},
journal=  	{Computing Surveys},
year=     	1983,
month=    	Sep,
volume=   	15,
number=   	3,
pages=    	{237--269},
comment=  	{Comprehensive survey of inductive inference a la Gold [Go67].}
}

@article{AngluinVa79,
author=    	{Angluin, Dana and Leslie G. Valiant},
title=     	{Fast probabilistic algorithms for {H}amiltonian circuits and 
		matchings},
journal=   	jcss,
volume=    	18,
number=    	2,
pages=     	{155--193},
year=      	1979,
month=     	Apr,
comment=   	{States nice form of Chernoff bounds.}
}

@Article{AnlaufBi89,
  author = 	 {J. K. Anlauf and M. Biehl},
  title = 	 {The AdaTron: an adaptive perceptron algorithm},
  journal = 	 {Europhysics Letters},
  year = 	 1989,
  volume =	 10,
  number =	 7,
  month =	 {Dec},
  pages =	 {687--692}
}

@inproceedings{ApteDaWe94,
	author = {Chidanand {Apt\'e} and Fred Damerau and Sholom M. Weiss},
	booktitle = sigir94,
	pages = {23--30},
	title = {Towards Language Independent Automated Learning of
		Text Categorization Models},
	year = {1994}
}

@inproceedings{ArLiRuSu92,
author=		{Sigal Ar and Richard J. Lipton and Ronitt Rubinfeld
		 and Madhu Sudan},
title=		{Reconstructing Algebraic Functions from Mixed Data},
booktitle=	focs92,
year=		1992,
month=		oct,
pages=		{503--512}
}


@InProceedings{AslamDh91,
  author = 	 {Aslam, Javed A. and Dhagat, Aditi},
  title = 	 {Searching in the Presence of Linearly Bounded Errors},
  booktitle = 	 stoc91,
  year=		 1991,
  month=	 may
}

@article {AslamDh93,
    AUTHOR = {Aslam, Javed A. and Dhagat, Aditi},
     TITLE = {On-line algorithms for $2$-coloring hypergraphs via chip
              games},
   JOURNAL = {Theoret. Comput. Sci.},
  FJOURNAL = {Theoretical Computer Science},
    VOLUME = {112},
      YEAR = {1993},
    NUMBER = {2},
     PAGES = {355--369},
      ISSN = {0304-3975},
     CODEN = {TCSDI},
   MRCLASS = {05C85 (68R10 90D35)},
  MRNUMBER = {94b:05178},
MRREVIEWER = {J. Spencer},
}

@techreport{AslamDe94
,author=	{Javed A. Aslam and Scott E. Decatur}
,title=		{Improved noise-tolerant learning and generalized
		 statistical queries}
}

@phdthesis{Aslam95,
author=     {Aslam, Javed},
title=      {Noise Tolerant Algorithms for Learning and Searching},
school=     {Massachusetts Institute of Technology},
year=       1995,
note=       {{MIT} technical report {MIT/LCS/TR-657}}
}

@unpublished{AslamDe95
,author=	{Javed A. Aslam and Scott E. Decatur}
,title=		{General bounds on statistical query learning and
		 {PAC} learning with noise via hypothesis boosting}
,year=		1995
}


@Article{AslamDe96,
title={On the sample complexity of noise-tolerant learning},
author={Javed A. Aslam and Scott E. Decatur},
pages={189--195},
journal=ipl,
month={26~} # feb,
year=1996,
volume=57,
number=4
}
		  		  
@inproceedings{AuerCeFrSc95
,author=	{Peter Auer and Nicol\`o Cesa-Bianchi and Yoav Freund
		 and Robert E. Schapire}
,title=		{Gambling in a rigged casino: The adversarial
		 multi-armed bandit problem}
,pages=		{322-331}
,booktitle=	focs95
,year=		1995
}

@book{AumannHa92
,editor=	{Robert J. Aumann and Sergiu Hart}
,title=		{Handbook of Game Theory with Economic Applications}
,year=		1992
,publisher=	{North-Holland}
,volume=	1
}

@inproceedings{AzarBrKaLiPh92,
author=		{Yossi Azar and Andrei Z. Broder and Anna R. Karlin and
			Nathan Linial and Steven Phillips},
title=		{Biased Random Walks},
booktitle=	stoc92,
pages=		{1--9},
month=		may,
year=		1992
}

@Article{AzouryWa01,
  author = 	 {Katy S. Azoury and M. K. Warmuth},
  title = 	 {Relative Loss Bounds for On-Line Density Estimation
                  with the Exponential Family of Distributions},
  journal = 	 ml,
  year = 	 2001,
  volume =	 43,
  pages =	 {211-246}
}

@article{BahlJeMe83,
author=   	{Bahl, Lalit R. and Frederick Jelinek and Robert L. Mercer},
title=    	{A Maximum Likelihood Approach to Continuous Speech 
		Recognition},
journal=  	{IEEE Transactions on Pattern Analysis and Machine 
		Intelligence},
year=     	1983,
month=    	Mar,
volume=   	{PAMI-5},
number=   	2,
pages=    	{179--190},
comment=  	{Describes Markov modeling and analysis techniques.}
}

@incollection{Bainbridge77,
author=		{E. S. Bainbridge},
title=		{The fundamental duality of system theory},
editor=		{W. E. Hartnett},
booktitle=	{Systems: Approaches, Theories, Applications},
pages=		{45--61},
publisher=	{Reidel},
year=		1977
}

@article{Banos68
,author=   	{Alfredo Ba{\~{n}}os}
,title=		{On pseudo-games}
,journal=       {The Annals of Mathematical Statistics}
,volume=        {39}
,number=        {6}
,pages=         {1932--1945}
,year=          {1968}
}

@article{BarkaiSeSo94
,author=	{N. Barkai and H. S. Seung and H. Sompolinsky}
,title=		{On-line learning of dichotomies}
}

@unpublished{BarkaiSo??
,author=	{N. Barkai and H. Sompolinsky}
,title=		{Statistical mechanics of maximum-likelihood density
		 estimation}
}

@Article{Barron93,
  author = 	 {Andrew R. Barron},
  title = 	 {Universal approximation bounds for superposition of
                  a sigmoidal function},
  journal = 	 ieeeit,
  year = 	 1993,
  volume =	 39,
  number =       3,
  pages =	 {930--945}
}

@unpublished{Barron97
,author=         {Andrew R. Barron}
,title=          {Information theory in probability, statistics,
                  learning, and neural nets}
}


@article {BarronRiYu98,
    AUTHOR = {Barron, Andrew and Rissanen, Jorma and Yu, Bin},
     TITLE = {The minimum description length principle in coding and
              modeling},
      NOTE = {Information theory: 1948--1998},
   JOURNAL = {IEEE Trans. Inform. Theory},
    VOLUME = 44,
      YEAR = 1998,
    NUMBER = 6,
     PAGES = {2743--2760},
}

@inproceedings{BartellCoBe94,
	Author = "Brian T. Bartell and Garrison W. Cottrell and Richard K. Belew",
	Title = "Automatic combination of multiple ranked retrieval systems",
	Booktitle=sigir94,
	Year=1994
}

@article{Bartlett52,
author=   	{Bartlett, M. S.},
title=    	{The Statistical Significance of Odd Bits of Information},
journal=  	{Biometrika},
year=     	1952,
volume=   	39,
pages=    	{228--237},
comment=  	{Variation on likelihood measure proposed.}
}

@InProceedings{Bartlett96,
  author =       "Peter L. Bartlett",
title =        "For valid generalization, the size of the weights is
  more important than the size of the network",
  booktitle =    "Advances in Neural Information Processing
		Systems 9",
  year =         "1997",
}

@article{Bartlett98,
author = "Peter L. Bartlett",
title = "The sample complexity of pattern classification with neural
    networks: the size of the weights is more important than the size
    of the network",
journal = ieeeit,
volume = 44,
number= 2,
month= mar,
pages={525-536},
year = 1998
}

@article{Barzdin70,
author=		{Ya. M. Barzdin'},
title=		{Deciphering of Sequential Networks in the Absence of
		 an Upper Limit on the Number of States},
journal=	{Soviet Physics Doklady},
volume=		15,
number=		2,
month=		aug,
year=		1970,
pages=		{94--97}
}

@article{BarzdinFr72,
author = 	{J. M. Barzdin and R. V. Frievald},
title = 	{On the prediction of general recursive functions},
journal = 	{Soviet Mathematics Doklady},
year = 		{1972},
volume = 	{13},
pages = 	{1224--1228},
comment = 	{Introduces the on-line mistake bound learning model
		 (although in a Gold-style framework)}
}

@article{BauerKo99,
  author = 	 {Eric Bauer and Ron Kohavi},
  title = 	 {An empirical comparison of voting classification
                  algorithms: Bagging, boosting, and variants},
  journal=       ml,
  year =	 1999,
  volume=        36,
  number=        {1/2},
  pages=         {105-139}
}

@article{Baum72,
author=   	{Baum, Leonard E. and J. A. Eagon},
title=    	{An Inequality with Applications to Statistical Estimation for
  	  	Probabilistic Functions of Markov Processes and to a Model for 
		Ecology},
journal=  	{Bulletin of the American Mathematical Society},
year=     	1967,
volume=   	73,
pages=    	{360--363},
comment=  	{Gives a technique for maximizing a polynomial with 
		nonnegative coefficients and homogeneous of degree d.}
}

@unpublished{Baum89b,
author=		{Baum, Eric B.},
title=		{The Perceptron Algorithm Is Fast for
		 Non-malicious Distributions},
year=		1989,
month=		Jul,
note=		{Unpublished manuscript}
}

@article{Baum90,
author=		{Baum, Eric B.},
title=		{On Learning a Union of Half Spaces},
journal=	{Journal of Complexity},
month=		mar,
year=		1990,
volume=		6,
number=		1,
pages=		{67--101}
}

@Article{BaumHa89,
author = 	{Baum, Eric B. and David Haussler},
title = 	{What Size Net Gives Valid Generalization?},
  journal = 	 {Neural Computation},
  year = 	 1989,
  volume =	 1,
  number =	 1,
  pages =	 {151--160}
}

@article{BearCoEb87,
author = 	{Mark F. Bear and Leon N. Cooper and Ford F. Ebner},
title = 	{A Physiological Basis for a Theory of Synapse Modification},
journal = 	{Science},
volume = 	237,
year = 		1987,
month = 	{July 3},
pages = 	{42--48},
comment = 	{Proposes that change in weight w_ij from i to j goes as
			 d(w_ij)/dt = \phi(a_i,avg(a_i)) a_j
		 where \phi is negative below (say) avg(a_i)**2, and positive
		 above it.}
}

@phdthesis{Beer89,
author = 	{Randall Dean Beer},
title = 	{Intelligence as Adaptive Behavior: An Experiment in
		 Computational Neuroethology},
school = 	{Case Western Reserve University},
year = 		1989,
month = 	Aug,
comment = 	{Simulates a cockroach using specially-designed neural
		circuitry}
}

@inproceedings{Ben-DavidChGoLu89,
author=   	{Shai Ben-David and Benny Chor and Oded Goldreich and
		Michael Luby},
title=    	{On the Theory of Average Case Complexity},
booktitle=      stoc89,
month=          May,
year = 		1989,
pages=		{204--216}
}

@article{Ben-DavidChGoLu92,
author=   	{Shai Ben-David and Benny Chor and Oded Goldreich and
		Michael Luby},
title=    	{On the Theory of Average Case Complexity},
journal=	jcss,
volume=		44,
number=		2,
year=		1992,
pages=		{193--219}
}

@techreport{Ben-TalRo94
,author=	{Aharon Ben-Tal and Gil Roth}
,title=		{A truncated log barrier algorithm for large scale
		 convex programming and minmax problems:
		 implementation and computational results}
,year=		1994
}

@article{BenderWi85,
author=		{Edward A. Bender and Herbert S.Wilf},
title=		{A theoretical analysis of backtracking in the graph
		coloring problem},
journal=	{Journal of Algorithms},
volume=		6,
number=		2,
month=		jun,
year=		1985,
pages=		{275--282}
}

@unpublished{Benedek88,
author=   	{Benedek, Gyora M.},
title=    	{Ph.D. dissertation, in preparation},
year=     	1988,
note = 	  	{(To appear.)}
}

@inproceedings{BenedekIt88,
author=   	{Benedek, Gyora M. and Alon Itai},
title=    	{Nonuniform Learnability},
booktitle=	{ICALP},
month=    	Jul,
year=     	1988,
pages=    	{82--92}
}

@inproceedings{BenedekIt88a,
author = 	{Gyora M. Benedek and Alon Itai},
title =	        {Learnability by Fixed Distributions},
booktitle = 	colt88,
year = 		{1988},
month = 	aug,
pages = 	{80--90},
}

@article{BenedekIt91,
author = 	{Gyora M. Benedek and Alon Itai},
title =	        {Learnability with Respect to Fixed Distributions},
journal=	tcs,
year = 		1991,
volume=		86,
number=		2,
month = 	sep,
pages = 	{377--389}
}

@techreport{Bennett92
,author=	{Kristin P. Bennett}
,title=		{Decision tree construction via linear programming}
}

@InProceedings{BennettDeMa02,
  author = 	 {Kristin P. Bennett and Ayhan Demiriz and Richard Maclin},
  title = 	 {Exploiting Unlabeled Data in Ensemble Methods},
  booktitle = 	 {Proceedings of SIGKDD International Conference on
                  Knowledge Discovery and Data Mining},
  year =	 2002
}

@inproceedings{BenorTi88,
author=		{Michael Ben-Or and Prasoon Tiwari},
title=		{A deterministic algorithm for sparse multivariate
		 polynomial interpolation},
booktitle=	stoc88,
year=		1988,
month=		may,
pages=		{301--309}
}

@Article{BergerDeDe96,
  author = 	 {Adam L. Berger and Stephen A. Della Pietra and
                  Vincent J. Della Pietra},
  title = 	 {A maximum entropy approach to natural language processing},
  journal = 	 {Computational Linguistics},
  year = 	 1996,
  volume =	 22,
  pages=         {39-71},
  number =	 1
}

@article{BerlekampMcTi78,
author=		{E. Berlekamp and R. McEliece and H. van Tilborg},
title=		{On the inherent intractability of certain coding
		 problems},
journal=	{IEEE Transactions on Information Theory},
volume=		24,
year=		1978
}

@techreport{BerlinerGo84,
author=   	{Berliner, Hans and Gordon Goetsch},
title=    	{A Quantitative Study of Search Methods and the Effect of
	  	Constraint Satisfaction},
institution= 	{CMU Computer Science Department},
year=     	1984,
month=    	Jul,
number=   	{CMU-CS-84-147},
comment=  	{Empirical comparative study of search heuristics for 
		Superpuzz, a card solitaire game.}
}

@book{BermanPl79,
author=		{Abraham Berman and Robert J. Plemmons},
title=		{Nonnegative Matrices in the Mathematical Sciences},
publisher=	{Academic Press},
year=		1979
}

@Manual{AbramowitzSt70,
  title = 	 {Handbook of Mathematical Functions},
  author =	 {Abramowitz and Stegun},
  organization = {National Bureau of Standards},
  year =	 1970
}

@Article{Bernardo79,
  author = 	 {J. M. Bernardo},
  title = 	 {Reference posterior distributions for Bayesian inference},
  journal = 	 {J. Roy. Statistic. Soc. Ser. B.},
  year = 	 1979,
  volume =	 41,
  pages =	 {113-147}
}

@inproceedings{BernsteinVa93
,author=	{Ethan Bernstein and Umesh Vazirani}
,title=		{Quantum complexity theory}
,booktitle=	stoc93
,year=		1993
,month=		may
,pages=		{11--20}
}

@inproceedings{BiebricherFuLuScKn88
,author=        {Peter Biebricher and Norbert Fuhr and Gerhard Lustig
                  and Michael Schwantner and Gerhard Knorz}
,title=         {The automatic indexing system {AIR/PHYS} --- from
                  research to application}
,booktitle=     sigir88
,pages=         {333-342}
,year=          1988
}

@book{Billingsley86,
author=		{Patrick Billingsley},
title=		{Probability and Measure},
edition=	{second},
publisher=	{Wiley},
year=		1986,
comment=	{addess= New York}
}

@article{Blackwell56
,author=	{David Blackwell}
,title=		{An analog of the minimax theorem for vector payoffs}
,year=		1956
,journal=	{Pacific Journal of Mathematics}
,volume=	6
,number=	1
,month=		{Spring}
,pages=		{1--8}
}

@Misc{Blackwell56b,
  author =	 {David Blackwell},
  title =	 {Controlled random walks},
  howpublished = {invited address, Institute of Mathematical
		  Statistics Meeting, Seattle, Washington},
  year =	 1956
}

@Book{BlackwellGi54,
  author = 	 {David Blackwell and M.A. Girshick},
  title = 	 {Theory of games and statistical decisions},
  publisher = 	 {dover},
  year = 	 1954
}

@Article{Block62,
  author = 	 {H. D. Block},
  title = 	 {The Perceptron: A Model for Brain Functioning},
  journal = 	 {Reviews of Modern Physics},
  year = 	 1962,
  volume =	 34,
  pages =	 {123--135},
  note =         {Reprinted in "Neurocomputing" by Anderson and Rosenfeld}
}

@mastersthesis{Blum89,
author=   	{Blum, Avrim},
title=    	{On the Computational Complexity of Training Simple
		Neural Networks},
school=   	{MIT Department of Electrical Engineering and Computer 
		Science},
year=     	1989,
month=    	May,
note = 		{(Published as Laboratory for Computer Science Technical
		  Report MIT/LCS/TR-445 (May, 1989).)}
}

@inproceedings{Blum90,
author=		{Avrim Blum},
title=		{Separating Distribution-free and Mistake-bound
		 Learning Models over the {B}oolean Domain},
booktitle=	focs90,
pages=		{211--218},
month=		oct,
year=		1990
}

@inproceedings{Blum90b,
author=		{Avrim Blum},
title=		{Some Tools for Approximate 3-Coloring},
booktitle=	focs90,
pages=		{554--562},
month=		oct,
year=		1990
}

@article{Blum92,
author=		{Avrim Blum},
title=		{Learning Boolean Functions in an Infinite Attribute
		 Space},
journal=	ml,
volume=		9,
number=		4,
year=		1992,
pages=		{373--386}
}

@article{Blum95,
  author = 	 {Avrim Blum},
  title = 	 {Empirical support for Winnow and Weighted-Majority based 
          		algorithms: results on a calendar scheduling domain},
  journal =	"Machine Learning",
  volume =	26,
  pages =	{5-23},
  year =	1997
}

@article{BlumBl75,
author=   	{Blum, Lenore and Manuel Blum},
title=    	{Toward a Mathematical Theory of Inductive Inference},
journal=  	InfCtrl,
year=     	1975,
month=   	Jun,
volume=   	28,
number=   	2,
pages=    	{125--155},
comment=  	{Recursion-theoretic, a la Gold [Go67].}
}

@article{BlumBlSh86
,author=	{L. Blum and M. Blum and M. Shub}
,title=		{A simple unpredictable pseudo-random number
		 generator}
,journal=	sicomp
,volume=	15
,number=	2
,pages=		{364--383}
,month=		may
,year=		1986
}


@inproceedings{BlumCh92,
author=		{Avrim Blum and Prasad Chalasani},
title=		{Learning Switching Concepts},
booktitle=	colt92,
year=		1992,
month=		jul,
pages=		{231--242}
}

@inproceedings{BlumFuKeLi93,
author=		{Avrim Blum and Merrick Furst and Michael Kearns and
		 Richard J. Lipton},
title=		{Cryptographic Primitives Based on Hard Learning Problems},
booktitle=	{Pre-Proceedings of CRYPTO~'93},
year=		1993,
pages=		{24.1--24.10}
}

@InProceedings{BlumKa97,
  author = 	 {Avrim Blum and Adam Kalai},
  title = 	 {Universal Portfolios With and Without Transaction Costs},
  booktitle = 	 colt97,
  year =	 1997,
  pages =	 {309-313}
}

@incollection{BlumRi89,
author =	{Blum, Avrim and Ronald L. Rivest},
title = 	{Training a 3-node neural net is {NP-Complete}},
booktitle = 	{Advances in Neural Information Processing Systems I},
publisher = 	{Morgan Kaufmann},
year = 		1989,
editor = 	{David S. Touretzky},
pages = 	{494--501}
}

@InProceedings{BlumMi98,
  author = 	 {Avrim Blum and Tom Mitchell},
  title = 	 {Combining Labeled and Unlabeled Data with Co-Training},
  booktitle = 	 colt98,
  pages =	 {92-100},
  year =	 1998
}

@inproceedings{BlumSi90,
author=		{Avrim Blum and Mona Singh},
title=		{Learning functions of {$k$} terms},
booktitle=	colt90,
year=		1990,
month=		aug,
pages=		{144--153}
}

@inproceedings{BlumerEhHaWa86a,
author=   	{Blumer, Anselm and Andrzej Ehrenfeucht and David Haussler and
	   	Manfred K. Warmuth},
title=    	{Classifying Learnable Geometric Concepts with the 
	  	{V}apnik-{C}hervonenkis Dimension},
booktitle= 	stoc86,
address=  	{Berkeley, California},
year=     	1986,
month=    	May,
pages=    	{273--282},
comment=  	{Shows equivalence between finite VC dimension and 
		learnability of geometric concepts.}
}

@techreport{BlumerEhHaWa86b,
author=   	{Blumer, Anselm and Andrzej Ehrenfeucht and David Haussler and
	   	Manfred K. Warmuth},
title=    	{Occam's Razor},
institution= 	ucsccrl,
number=  	{UCSC-CRL-86-2},
year=     	1986,
month=    	Feb,
comment=  	{Defines `Occam-algorithm' which may produce a hypothesis of
	   	complexity $n^c m^\alpha$ for fixed $c$ and $\alpha < 1$, and
	   	shows that Occam-algorithms need only polynomially many 
		samples.}
}

@article{BlumerEhHaWa87,
author=   	{Blumer, Anselm and Andrzej Ehrenfeucht and David Haussler and
	   	Manfred K. Warmuth},
title=    	{Occam's Razor},
journal=  	{Information Processing Letters},
volume=   	24,
number=		6,
year=     	1987,
month=    	Apr,
pages=    	{377--380},
comment=  	{Defines `Occam-algorithm' which may produce a hypothesis of
	   	complexity $n^c m^\alpha$ for fixed $c$ and $\alpha < 1$, and
	   	shows that Occam-algorithms need only polynomially many 
		samples.}
}

@techreport{BlumerEhHaWa87b,
author=   	{Blumer, Anselm and Andrzej Ehrenfeucht and David Haussler and
	   	Manfred K. Warmuth},
title=    	{Learnability and the 
	  	{V}apnik-{C}hervonenkis Dimension},
institution= 	ucsccrl,
number=   	{UCSC-CRL-87-20},
year=     	1987,
month=   	Nov,
}

@article{BlumerEhHaWa89,
author=   	{Blumer, Anselm and Andrzej Ehrenfeucht and David Haussler and
	   	Manfred K. Warmuth},
title=    	{Learnability and the {V}apnik-{C}hervonenkis Dimension},
journal= 	jacm,
month=		Oct,
year=		1989,
volume=		36,
number=		4,
pages=		{929--965},
comment=	{An earlier version is available as
		U. C. Santa Cruz Computer Science Laboratory Tech.\ Report
		UCSC-CRL-87-20, November, 1987.}
}

@phdthesis{Board90
,author=	{Raymond Acton Board}
,title=		{Topics in computational learning theory and graph
		 algorithms}
,year=		1990
,month=		jul
,school=	{University of Illinois at Urbana-Champaign}
,note=		{Available as technical report UIUCDCS-R-90-1611}
}

@inproceedings{BoardPi90,
author=		{Board, Raymond and Leonard Pitt},
title=		{On the Necessity of {O}ccam Algorithms},
booktitle=	stoc90,
year=		1990,
month=		May,
pages=		{54--63}
}

@article{BoardPi92,
author=		{Board, Raymond and Leonard Pitt},
title=		{On the Necessity of {O}ccam Algorithms},
journal=	tcs,
year=		1992,
volume=		100,
number=		1,
pages=		{157--184}
}

@article{BohanecBr94
,author=        {Marko Bohanec and Ivan Bratko}
,title=         {Trading accuracy for simplicity in decision trees}
}

@book{Bongard??,
author=   	{Bongard, M.},
title=    	{Pattern Recognition},
publisher= 	{Spartan Books},
year=      	{19??}
}

@inproceedings{Boppana85,
author=		"Boppana, Ravi B.",
title=		"Amplification of Probabilistic {B}oolean Formulas",
booktitle=	focs85,
year=		1985,
month=		Oct,
pages=		"20--29"
}

@phdthesis{Boppana86,
author=		{Ravi Babu Boppana},
title=		{Lower Bounds for Monotone Circuits and Formulas},
year=		1986,
school=		mit
}

@incollection{Boppana89,
author=		"Boppana, Ravi B.",
title=		"Amplification of Probabilistic {B}oolean Formulas",
booktitle=	{Advances in Computing Research 5: Randomness and Computation},
editor=		{S. Micali},
year=		1989,
publisher=	{JAI Press},
pages=		{27--45}
}

@unpublished{BorgersSa94
,author=	{Tilman B\"orgers and Rajiv Sarin}
,title=		{Learning through reinforcement and replicator dynamics}
}

@unpublished{BorgersSa95
,author=	{Tilman B\"orgers and Rajiv Sarin}
,title=		{Naive reinforcement learning with endogenous
		aspirations}
}

@Book{BorweinLe00,
  author =	 {Jonathan M. Borwein and Adrian S. Lewis},
  title = 	 {Convex Analysis and Nonlinear Optimization},
  publisher = 	 {Springer-Verlag},
  year = 	 2000
}

@InProceedings{BoserGuVa92,
  author = 	 {Bernhard E. Boser and Isabelle M. Guyon and Vladimir
		  N. Vapnik},
  title = 	 {A Training Algorithm for Optimal Margin Classifiers},
  booktitle = 	 colt92,
  year =	 1992,
  pages =	 {144-152}
}

@unpublished{Boucheron88,
author = 	{St\'ephane Boucheron},
title = 	{Learnability from positive examples in the {V}aliant 
		framework},
note = 		{Unpublished manuscript},
year = 		{1988}
}

@inproceedings{BoucheronSa88,
author=   	{Boucheron, St\'ephane and Jean Sallantin},
title=    	{Some remarks about space-complexity of learning, and
		 circuit complexity of recognizing},
booktitle=	colt88,
month=    	Aug,
year=     	1988,
pages = 	{125--138}
}

@article{BoultonWa73,
author=   	{Boulton, D. M. and C. S. Wallace},
title=    	{An information measure for hierarchic classification},
journal=  	{The Computer Journal},
volume=   	16,
number=   	3,
year=     	1973,
month=    	Aug,
pages=    	{254--261}
}

@article{BoultonWa75,
author=   	{Boulton, D. M. and C. S. Wallace},
title=    	{An information measure for single-link classification},
journal=  	{The Computer Journal},
volume=   	18,
number=   	3,
year=     	1973,
month=    	Aug,
pages=    	{236--238}
}

@inproceedings{BousquetEl01,
	author={Olivier Bousquet and Andr\'e Elisseeff},
	title={Algorithmic Stability and Generalization Performance},
	booktitle=nips13,
	YEAR={2001}}

@techreport{BoyanFrJo94,
	Author = "Justin Boyan and Dane Freitag and Thorsten Joachims",
	Title = "A machine learning architecture for optimizing web search engines",
	Booktitle="Internet-based information systems",
	Number="WS-96-05",
	Institution="American Association of Artificial Intelligence",
	Year=1994
}

@article{BoylanEl91
,author=	{Richard T. Boylan and Mahmoud A. El-Gamal}
,title=		{Fictitious play: a statistical study of multiple
		economic experiments}
}

@article{BradtkeBa??
,author=	{Steven J. Bradtke and Andrew G. Barto}
,title=		{New Algorithms for temporal difference learning}
}

@Article{Bregman67,
  author = 	 {L. M. Bregman},
  title = 	 {The relaxation method of finding the common point of
                  convex sets and its application to the solution of
                  problems in convex programming},
  journal = 	 {U.S.S.R. Computational Mathematics and Mathematical Physics},
  year = 	 1967,
  volume =	 7,
  number =	 1,
  pages =	 {200-217}
}

@inproceedings{BreeseHeKa98,
	author = "John S. Breese and David Heckerman and Carl Kadie",
	title = "Empirical analysis of predictive algorithms for
		collaborative filtering",
	booktitle = "Proceedings of the Fourteenth Conference on Uncertainty
		in Artificial Intelligence",
	pages = "43--52",
	year = 1998
}
	
@Book{Breiman92,
  author =	 {Leo Breiman},
  title = 	 {Probability},
  publisher = 	 {SIAM},
  year = 	 1992,
  edition =	 {Classics}, 
  note =         {Original edition first published in 1968}
}

@techreport{Breiman96
,author=	{Leo Breiman}
,title=		{Bias, variance, and arcing classifiers}
,institution=	{Statistics Department, University of California at
		 Berkeley}
,year=		1996
,number=	460
,comment=	{Available from
		ftp://ftp.stat.berkeley.edu/pub/users/breiman/arcall.ps.Z.}
}

@article{Breiman96b
,author=	{Leo Breiman}
,title=		{Bagging predictors}
,journal=	ml
,volume=	24
,number=	2
,pages=		{123-140}
,year=		1996
}

@Article{Breiman96c,
  author = 	 {Leo Breiman},
  title = 	 {The heuristics of instability in model selection},
  journal = 	 annstat,
  year = 	 1996,
  volume =	 24,
  pages =	 {2350-2383}
}


@TechReport{Breiman97,
  author = 	 {Leo Breiman},
  title = 	 {Prediction games and arcing classifiers},
  institution =  {Statistics Department, University of California at Berkeley},
  year = 	 1997,
  number =	 504
}

@TechReport{Breiman97b,
  author = 	 {Leo Breiman},
  title = 	 {Arcing the Edge},
  institution =  {Statistics Department, University of California at Berkeley},
  year = 	 1997,
  number =	 486
}

@Article{Breiman98,
author=	{Leo Breiman},
title=		{Arcing classifiers},
  journal = 	 annstat,
volume=          26,
number=          3,
pages=           {801-849},
  year = 	 1998
}

@article{Breiman99,
  author = 	 {Leo Breiman},
  title = 	 {Prediction games and arcing classifiers},
 journal=        {Neural Computation},
  volume=        11,
 number = 7,
 pages= {1493-1517},
  year = 	 1999
}

@book{BreimanFrOlSt84,
author=   	{Breiman, Leo and Jerome H. Friedman and Richard A. Olshen and
		Charles J. Stone},
title=    	{Classification and Regression Trees},
publisher= 	{Wadsworth \& Brooks},
year=      	1984,
comment=   {Review of procedures for inferring decision trees from data.}
}

@inproceedings{Brill92
,author=	{Eric Brill}
,title=		{A simple rule-based part of speech tagger}
}

@article{BrownNe??
,author=        {G. W. Brown and J. von Neumann}
,title=         {Solutions of games by differential equations}
}

@Proceedings{BrussFeSa90,
  title =        {Strategies for sequential Search and Selection in Real Time},
  year =         1990,
  editor =       {F. Thomas Bruss, Thomas S. Ferguson, Stephen M. Samuels},
  volume =       125,
  series =       {Contemporary Mathematics},
  publisher =    {American Mathematical Society}
}

@inproceedings{Bshouty93,
author=		{Nader H. Bshouty},
title=		{Exact Learning via the Monotone Theory},
booktitle=	focs93,
month=		nov,
year=		1993
}

@inproceedings{BshoutyGoHaMa93
,author=	{Nader H. Bshouty and Sally A. Goldman and Thomas R.
		 Hancock and Sleiman Matar}
,title=		{Asking questions to minimize errors}
,booktitle=	colt93
,month=		jul
,year=		1993
,pages=		{41--50}
}

@inproceedings{BshoutyHaHe92,
author=		{Nader H. Bshouty and Thomas R. Hancock and Lisa
		 Hellerstein},
title=		{Learning Arithmetic Read-Once Formulas},
booktitle=	stoc92,
year=		1992,
month=		may,
pages=		{370--381}
}

@Article{BshoutyGr02,
  author = 	 {Nader H. Bshouty and Dmitry Gavinsky},
  title = 	 {On Boosting with Polynomially Bounded Distributions},
  journal = 	 jmlr,
  year = 	 2002,
  volume =	 3,
  pages =	 {483-506},
  month =	 nov
}

@inproceedings{BshoutyHaHe92b,
author=		{Nader H. Bshouty and Thomas R. Hancock and Lisa
		 Hellerstein},
title=		{Learning Boolean Read-Once Formulas with Arbitrary
		 Symmetric and Constant Fan-in Gates},
booktitle=	colt92,
pages=		{1--15},
month=		jul,
year=		1992
}

@unpublished{BshoutyHaHeKa91,
author=   	{Nader H. Bshouty and Thomas R. Hancock and Lisa
		 Hellerstein and Marek Karpinski},
title=    	{Read-Once Threshold Formulas, Justifying Assignments,
		 and Generic Transformations},
year=     	1991,
note =          {Unpublished manuscript}
}

@inproceedings{BuckleySaAl94
,author=	{Chris Buckley and Gerard Salton and James Allan}
,title=		{The effect of adding relevance information in the
		relevance feedback environment}
}

@incollection{Buehler70,
author=   	{Buehler, Robert J.},
title=    	{Measuring Information and Uncertainty},
booktitle=  	{Foundations of Statistical Inference},
editor=   	{V. P. Godambe and D. A. Sprott},
year=     	1970,
publisher=  	{Holt, Rinehard, and Winston},
comment=  	{General study of payoff functions that encourage honesty for
           	someone making probabilistic predictions (e.g. a weatherman).}
}

@phdthesis{Buntine90
,author=	{Wray Lindsay Buntine}
,title=		{A Theory of Learning Classification Rules}
,school=	{University of Technology, Sydney}
,year=		1990
}

@article{Buntine92
,author=	{Wray Buntine}
,title=		{Learning Classification Trees}
,journal=	{Statistics and Computing}
,volume=	2
,year=		1992
,pages=		{63--73}
}

@article{BuntineNi92
,author=	{Wray Buntine and Tim Niblett}
,title=		{A further comparison of splitting rules for
		 decision-tree induction}
,journal=	ml
,year=		1992
}

@inproceedings{CarbonellG87,
author=   	{Carbonell, Jaime G. and Yolanda Gil},
title=    	{Learning by Experimentation},
booktitle=	{Proceedings of the Fourth International Workshop on Machine
		Learning},
month=    	Jun,
year=     	1987,
editor=   	{Pat Langley},
pages=    	{256--266},
publisher=	{Morgan Kaufmann}
}

@inproceedings{CaruanaBaMi96,
	author = "Rich Caruana and Shumeet Baluja and Tom Mitchell",
	title = {Using the Future to ``Sort Out'' the Present:
		{R}ankprop and Multitask Learning for Medical Risk Evaluation},
	booktitle = nips8,
        pages = {959-965},
	year = 1996
}

@inproceedings{Catlett9?
,author=	{Jason Catlett}
,title=		{Overpruning large decision trees}
}

@Article{CensorLe81,
  author = 	 {Y. Censor and A. Lent},
  title = 	 {An Iterative Row-Action Method for Interval Convex
                  Programming},
  journal = 	 {Journal of Optimization Theory and Applications},
  year = 	 1981,
  volume =	 34,
  number =	 3,
  pages =	 {321-353},
  month =	 jul
}

@Book{CensorZe97,
  author =	 {Yair Censor and Stavros A. Zenios},
  title = 	 {Parallel Optimization: Theory, Algorithms, and Applications},
  publisher = 	 {Oxford University Press},
  year = 	 1997
}

@inproceedings{CesabianchiFrHeHaScWa92,
author=		{Nicol\`o Cesa-Bianchi and Yoav Freund and David P.
		 Helmbold and David Haussler and Robert E. Schapire
		 and Manfred K. Warmuth},
title=		{How to use expert advice},
booktitle=	stoc93,
year=		1993,
pages=		{382--391},
}

@Article{CesabianchiFrHeHaScWa97,
title={How to Use Expert Advice},
author={Nicol{\`o} Cesa-Bianchi and Yoav Freund and David Haussler and
David P. Helmbold and Robert E. Schapire and Manfred K. Warmuth},
journal=jacm,
pages={427--485},
month=may,
year=1997,
volume=44,
number=3
}

@Article{CesabianchiFrHeWa96,
  author = 	 {Nicol{\`o} Cesa-Bianchi and Yoav Freund and David
                  P. Helmbold and Manfred K. Warmuth},
  title = 	 {On-line Prediction and Conversion Strategies},
  journal = 	 ml,
  year = 	 1996,
  volume =	 25,
  pages =	 {71-110}
}


@Article{CesabianchiLu99,
  author = 	 {Nicol{\`o} Cesa-Bianchi and G\'{a}bor Lugosi},
  title = 	 {On prediction of individual sequences},
  journal = 	 {Annals of Statistics},
  year = 	 1999,
  volume =	 27,
  number =	 6,
  pages =	 {1865-1895}
}

@Article{CesabianchiKrWa94,
  author = 	 {Nicol{\`o} Cesa-Bianchi and Anders Krogh and Manfred
                  K. Warmuth},
  title = 	 {Bounds on Approximate Steepest Descent for
                  Likelihood Maximization in Exponential Families},
  journal = 	 ieeeit,
  year = 	 1994,
  volume =	 40,
  number =	 4,
  pages =	 {1215-1220},
  month =	 jul
}

@inproceedings{CesabianchiLoWa93,
author=		{Nicol\`o Cesa-Bianchi and Philip M. Long and Manfred
		 K. Warmuth},
title=		{Worst-case Quadratic Loss Bounds for a Generalization
		 of the {W}idrow-{H}off Rule},
booktitle=	colt93,
month=		jul,
year=		1993,
pages=		{429--438}
}

@inproceedings{CestnikKoBr87,
author=	   	{Cestnik, B, I. Kononenkko, and I. Bratko},
title=	   	{Assistant 86: A Knowledge-Elicitation Tool for Sophisticated 
	   	Users},
booktitle= 	{Progress in Machine Learning--Proceedings of EWSL 87: 
	   	2nd European Working Session on Learning},
address=   	{Bled, Yogoslavia},
year=	   	1987,
editor=	   	{Bratko, I. and N. Lavrac},
month=	   	may,
pages=	   	{31--45}
}

@book{ChambersHa92
,author=	{John M. Chambers and Trevor J. Hastie}
,title=		{Statistical Models in {S}}
,year=		1992
,publisher=	{Wadsworth \& Brooks/Cole}
}

@unpublished{Charniak??,
author=   	{Charniak, Eugene},
title=    	{The Bayesian Basis of Common Sense Medical Diagnosis},
year=     	{??},
comment=  	{Where was this published??.
	   	Argues in favor of Bayesian approach for medical diagnosis.}
}

@inproceedings{Charniak00,
      AUTHOR         = {Charniak, Eugene},
      TITLE          = {A Maximum-Entropy-Inspired Parser},
      PAGES          = {132--139},
      YEAR           = {2000},
      BOOKTITLE      = {Proceedings of the 1st Meeting of the North
                  American Chapter of the Association for
                  Computational Linguistics}
}

@inproceedings{Cheeseman83,
author=   	{Cheeseman, Peter C.},
title=    	{A Method of Computing Generalized Bayesian Probability Values
	   	for Expert Systems},
booktitle= 	{Proceedings Eighth International Conference on Artificial 
	   	Intelligence (Karlruhe, West Germany)},
year=      	1983,
month=     	Aug,
pages=     	{198--202},
comment=   	{Describes iterative graph-oriented method for computing 
	   	a maximum-entropy distribution subject to constraints.}
}

@inproceedings{Cheeseman84,
author=   	{Cheeseman, Peter C.},
title=    	{Learning of Expert Systems from Data},
booktitle= 	{Proceedings of the Workshop on Principles of Knowledge-Based
	   	Systems},
year=      	1984,
month=     	Dec,
pages=     	{115--122},
comment=   	{Describes a `message-length' approach to inferring significant
	   	contingencies in a contingency table.}
}

@inproceedings{Cheeseman85,
author=   	{Cheeseman, Peter C.},
title=    	{In Defense of Probability},
booktitle= 	{Proceedings of the Ninth International Joint Conference on
	   	Artificial Intelligence},
year=     	1985,
pages=    	{1002--1009},
comment=  	{Argues in favor of subjective Bayesianism by refuting some
	  	common misconceptions.}
}

@article{Cheeseman88,
author = 	{Cheeseman, Peter C.},
title = 	{An inquiry into computer understanding},
journal = 	{Computational Intelligence},
year = 		1988,
month = 	Feb,
volume = 	4,
number = 	1,
pages = 	{58--66},
comment = 	{More defense of Bayesian inference}
}

@inproceedings{Cheeseman88b,
author = 	{Cheeseman, Peter C. and Matthew Self and
		Jim Kelly and Will Taylor and Don Freeman and
		John Stutz},
title = 	{Bayesian Classification},
booktitle = 	{AAAI 88 Proceedings},
year = 		1988,
pages = 	{607--611}
}

@InProceedings{ChelbaJe98,
	author={Ciprian Chelba and Frederick Jelinek},
	title={Exploiting Syntactic Structure for Language Modeling},
	booktitle={Proceedings of the Thirty-Sixth Annual Meeting of
                  the Association for Computational Linguistics and
                  Seventeenth International Conference on
                  Computational Linguistics},
	year=1998
}

@PhdThesis{Chen96,
  author = 	 {Stanley F. Chen},
  title = 	 {Building probabilistic models for natural language},
  school = 	 {Harvard University},
  year = 	 1996,
  month =	 may
}

@phdthesis{Chen97
,author=        {Lei Chen}
,title=         {Applications of play against past strategies in
                  repetitions of a game}
}

@TechReport{ChenGo98,
	author={Stanley Chen and Joshua Goodman},
	title={An empirical study of smoothing techniques for language modeling},
	year=1998,
  institution =  {Harvard University},
  number =	 {TR-10-98},
  month =	 aug
}

@inproceedings{ChoiHiHiMaNaPeSiWh98,
	author = "J. Choi and D. Hindle and J. Hirschberg and
		I. Magrin-Chagnolleau and C. Nakatani and F. Pereira
		and A. Singhal and S. Whittaker",
	title = "{SCAN} - speech content based audio navigator:
		A systems overview",
	booktitle = "Proceedings of the Fifth International Conference on
		Spoken Language Processing",
	year = 1998
}

@article{ChorGo88,
author=		{B. Chor and O. Goldreich},
title=		{Unbiased bits from sources of weak randomness and
		 probabilistic communication complexity},
journal=	sicomp,
volume=		17,
year=		1988,
pages=		{230--261}
}

@inproceedings{Chung94
, author =      "Thomas H. Chung"
, title =       "Approximate methods for sequential decision making using expert advice"
, booktitle =   colt94
, year =        1994
, pages =       {183--189}
}

@phdthesis{Chung94b
,author=	{Thomas H. Chung}
,title=		{Minimax learning in iterated games via distributional
		 majorization}
}

@Article{ChurchGa91,
  author = 	 {Kenneth W. Church and William A. Gale},
  title = 	 {A comparison of the enhanced {Good-Turing} and
                   deleted estimation methods for estimating probabilities
                   of {English} bigrams},
  journal = 	 {Computer Speech and Language},
  year = 	 1991,
  volume =	 5,
  pages =	 {19-54}
}

@unpublished{ChurchGa??
,author=	{Kenneth W. Church and William A. Gale}
,title=		{Poisson mixtures}
}

@article{Chvatal79,
author=		{V. Chvatal},
title=		{A greedy heuristic for the set covering problem},
journal=	{Mathematics of Operations Research},
volume=		4,
number=		3,
year=		1979,
pages=		{233--235}
}

@Article{ClarkeBar94,
  author = 	 {Bertrand S. Clarke and Andrew R. Barron},
  title = 	 {Jeffrey's prior is asymptotically least favorable
		  under entropic risk},
  journal = 	 {J. Stat Planning and Inference},
  year = 	 1994,
  volume =	 41,
  pages =	 {37--60}
}

@unpublished{ClarkNi87,
author=   	{Peter Clark and Tim Niblett},
title=    	{The CN2 Induction Algorithm},
year=     	1987,
institution= 	{The Turing Institute},
comment=  	{Experimental study of rule induction, with accomodation for 
		noise.}
}

@inproceedings{ClarkNi87b,
author=	   	{Clark, P. and T. Niblett},
title=	   	{Induction in Noisy Domains},
booktitle= 	{Progress in Machine Learning--Proceedings of EWSL 87: 
	   	2nd European Working Session on Learning},
address=   	{Bled, Yogoslavia},
year=	   	1987,
editor=	   	{Bratko, I. and N. Lavrac},
month=	   	may,
pages=	   	{11--30}
}

@article{ClausenDrGrKa91,
author=		{Michael Clausen and Andreas Dress and Johannes
		 Grabmeier and Marek Karpinski},
title=		{On zero-testing and interpolation of {$k$}-sparse
		 multivariate polynomials over finite fields},
journal=	tcs,
volume=		84,
year=		1991,
pages=		{151--164}
}

@inproceedings{Cohen95
,author=	{William Cohen}
,title=		{Fast Effective Rule Induction}
,booktitle=	{Proceedings of the Twelfth International Conference
		on Machine Learning}
,pages=		{115-123}
,year=		1995
}

@inbook{CohenFe68,
editor=   	{Cohen, Paul R. and Edward A. Feigenbaum},
title=    	{The Handbook of Artificial Intelligence},
volume=   	3,
year=     	1968,
chapter=  	{XIV: {\em Learning and Inductive Inference}},
publisher= 	{William Kaufman, Inc.},
address=   	{Los Altos, California},
pages=     	{324--511}
}

@InProceedings{CohenScSi97,
  author = 	 {William W. Cohen and Robert E. Schapire and Yoram Singer},
  title = 	 {Learning to order things},
  booktitle = nips10,
  year =	 1998
}

@Article{CohenScSi99,
  author = 	 {William W. Cohen and Robert E. Schapire and Yoram Singer},
  title = 	 {Learning to order things},
  journal = 	 jair,
  year = 	 1999,
  volume =	 10,
  pages =	 {243-270},
}

@InProceedings{CohenSi96,
  author = 	 {William W. Cohen and Yoram Singer},
  title = 	 {Context-sensitive learning methods for text categorization},
  booktitle = 	 sigir96,
  year =	 1996,
  pages =	 {307-315}
}

@InProceedings{CohenSi99,
  author = 	 {William W. Cohen and Yoram Singer},
  title = 	 {A Simple, Fast, and Effective Rule Learner},
  booktitle = 	 aaai99,
  year =	 1999
}

@Article{CohnAtLa94,
  author = 	 {David Cohn and Les Atlas and Richard Ladner},
  title = 	 {Improving generalization with active learning},
  journal = 	 ml,
  year = 	 1994,
  volume =	 15,
  number =	 2,
  pages =	 {201-221}
}

@inproceedings{Collins97,
	author={Michael Collins},
	title={Three Generative, Lexicalised Models for Statistical Parsing},
	booktitle={Proceedings of the 35th Annual Meeting of the ACL},
	year={1997}}

@InProceedings{Collins00,
  author = 	 {Michael Collins},
  title = 	 {Discriminative Reranking for Natural Language Parsing},
  booktitle = 	 ml00,
  year =	 2000
}

@InProceedings{CollinsScSi00,
  author = 	 {Michael Collins and Robert E. Schapire and Yoram Singer},
  title = 	 {Logistic regression, {AdaBoost} and {Bregman} distances},
  booktitle = 	 colt00,
  year =	 2000
}

@article{CollinsScSi02,
  author = 	 {Michael Collins and Robert E. Schapire and Yoram Singer},
  title = 	 {Logistic regression, {AdaBoost} and {Bregman} distances},
journal=         ml,
volume=          48,
number=          {1/2/3},
year=            2002
}

@InProceedings{CollinsSi99,
  author = 	 {Michael Collins and Yoram Singer},
  title = 	 {Unsupervised Models for Named Entity Classification},
  booktitle = 	 {Empirical Methods in Natural Language Processing and
                  Very Large Corpora},
  year =	 1999
}

@article{Conlisk93
,author=	{John Conlisk}
,title=		{Adaptation in games: Two solutions to the {Crawford} puzzle}
,year=		1993
}

@article{Conlisk93b
,author=	{John Conlisk}
,title=		{Adaptive tactics in games: Further solutions to the
		 {Crawford} puzzle}
,year=		1993
}

@article{Cooper62,
author = 	{P. Cooper},
title = 	{The hypersphere in pattern recognition},
journal = 	infctrl,
year = 		1962,
volume = 	5,
pages = 	{324--346}
}

@book{CormenLeRi90,
author=		{Thomas H. Cormen and Charles E. Leiserson and
		 Ronald L. Rivest},
title=		{Introduction to Algorithms},
publisher=	{MIT Press},
year=		{1990}
}

@Article{CortesVa95,
  author = 	 {Corinna Cortes and Vladimir Vapnik},
  title = 	 {Support-Vector Networks},
  journal = 	 ml,
  year = 	 1995,
  volume =	 20,
  number =	 3,
  month =	 {September},
  pages =	 {273--297}
}

@article{Cover65,
author = 	{Cover, Thomas M.},
title = 	{Geometrical and Statistical Properties of Systems of
		 Linear inequalities with applications to pattern
		 recognition},
journal = 	{IEEE Transactions on Electronic Computers},
volume = 	{EC-14},
year = 		1965,
number = 	3,
pages = 	{326--334}
}

@article{Cover67,
author=	  	{Cover, T.M. and P.E. Hart},
title=	  	{Nearest Neighbor Pattern Classification},
journal=  	{IEEE Transactions in Information Theory},
year=     	1967,
month=    	jan,
volume=   	{IT-13},
number=   	1,
pages=    	{21--27}
}

@incollection {Cover67b,
    AUTHOR = {Cover, Thomas M.},
     TITLE = {Behavior of sequential predictors of binary sequences},
 BOOKTITLE = {Trans. Fourth Prague Conf. on Information Theory, Statistical
              Decision Functions, Random Processes (Prague, 1965)},
     PAGES = {263--272},
 PUBLISHER = {Academia},
   ADDRESS = {Prague},
      YEAR = {1967}
}

@incollection{Cover69,
author=   	{Cover, Thomas M.},
title=    	{Learning in Pattern Recognition},
booktitle=  	{Methodologies of Pattern Recognition},
publisher=  	{Academic Press},
year=     	1969,
pages=    	{111--132},
comment=  	{Bayesian classification procedures. Learning with finite 
		memory.}
}

@article{Cover84
,author=	{Thomas M. Cover}
,title=		{An algorithm for maximizing expected log investment
		return}
}

@article{Cover91
,author=	{Thomas M. Cover}
,title=		{Universal Portfolios}
,journal=	{Mathematical Finance}
,volume=	1
,number=	1
,month=		jan
,year=		1991
,pages=		{1--29}
}

@Article{CoverOr96,
  author = 	 {T. M. Cover and E. Ordentlich},
  title = 	 {Universal Portfolios With Side Information},
  journal = 	 ieeeit,
  year = 	 1996,
  month =	 {March}
}

@article{CoverSh77
,author=	{Thomas M. Cover and Aaron Shenhar}
,title=		{Compound {B}ayes predictors for sequences with
		 apparent {M}arkov structure}
,journal=	{IEEE Transactions on Systems, Man, and Cybernetics}
,volume=	{SMC-7}
,number=	6
,month=		jun
,year=		1977
,pages=		{421--424}
}

@book{CoverTh91,
author=		{Thomas M. Cover and Joy A. Thomas},
title=		{Elements of Information Theory},
publisher=	{Wiley},
year=		1991
}

@incollection{CoverWa76,
author=   	{Cover, T. M. and T. J. Wagner},
title=    	{Topics in Statistical Pattern Recognition},
booktitle=  	{Communication and Cybernetics: Digital Pattern Recognition},
chapter=  	2,
editor=   	{K. S. Fu},
year=    	1976,
volume=   	10,
publisher=  	{Springer-Verlag},
comment=  	{Learning classifiers. Distribution-free techniques. Modelling
		by gambling games.  Finite memory learning.}
}

@article{Cox46
,author=	{R. T. Cox}
,title=		{Probaility, Frequency and Reasonable Expectation}
,year=		1946
}

@book{CoxeterMo72,
author= 	{H. S. M. Coxeter and W. O. J. Moser},
title= 		{Generators and Relations for Discrete Groups},
publisher= 	{Springer-Verlag},
year= 		{1972},
address= 	{New York},
edition= 	{third},
comment= 	{high-powered book on groups with, among other things,
		good discussion of Cayley graphs.}
}

@inproceedings{CrammerSi00,
	author = "Koby Crammer and Yoram Singer",
	title = "On the Learnability and Design of Output Codes for
		Multiclass Problems",
	booktitle = colt00,
	year = 2000
}

@inproceedings{CrammerSi02,
	author = "Koby Crammer and Yoram Singer",
	title = "Pranking with Ranking",
	booktitle = nips14,
	year = 2002
}

@PhdThesis{Craven96,
  author = 	 {Mark W. Craven},
  title = 	 {Extracting Comprehensible Models from Trained Neural Networks},
  school = 	 {University of Wisconsin-Madison},
  year = 	 1996,
  note =	 {Also appears as UW Technical Report CS-TR-96-1326}
}

@article{Csiszar75,
author=   	{Csisz\'ar, I.},
title=    	{I-Divergence Geometry of Probability Distributions and
	   	Minimization Problems},
journal=  	{The Annals of Probability},
year=     	1975,
volume=   	3,
number=   	1,
pages=    	{146--158},
comment=  	{Generalized proof of iterative techniques for computing
	   	maximum-entropy distributions.}
}

@Article{Csiszar84,
  author = 	 {I. Csisz\'ar},
  title = 	 {Sanov property, generalized {I}-projection and a
                  conditional limit theorem},
  journal = 	 {Annals of Probability},
  year = 	 1984,
  volume =	 12,
  pages =	 {768-793}
}

@article{Csiszar89
,author=	{Imre Csisz\'ar}
,title=		{A geometric interpretation of {D}arroch and
		 {R}atcliff's generatlized iterative scaling}
}

@article{Csiszar89b
,author=	{Imre Csisz\'ar}
,title=         {Why least squares and maximum entropy?  {An} axiomatic
                  approach to inference for linear inverse problems}
,year=          1991
,volume=        19
,number=        4
,pages=         {2032-2066}
,journal=       annstat
}

@InProceedings{Csiszar95,
  author = 	 {I. Csisz\'ar},
  title = 	 {MaxEnt, Mathematics, and Information Theory},
  booktitle = 	 {Proceedings of the Fifteenth International Workshop
                  on Maximum Entropy and Bayesian Methods},
  pages =	 {35-50},
  year =	 1995
}

@Article{Csiszar95b,
  author = 	 {I. Csisz\'ar},
  title = 	 {Generalized projections for non-negative functions},
  journal = 	 {Acta Mathematica Hungarica},
  year = 	 1995,
  volume =	 68,
  number =	 {1-2},
  pages =	 {161-185}
}

@article{CsiszarTu84
,author=	{I. Csisz\'ar and G. Tusn\'ady}
,title=         {Information geometry and alternating minimization procedures}
,journal=	{Statistics and Decisions, Supplement Issue}
,volume=	{1}
,pages=		{205--237}
,year=          1984
}

@article{CurramMi94
,author=	{Stephen P. Curram and John Mingers}
,title=		{Neural networks, decision tree induction and
		discriminant analysis: an empirical comparison}
}

@inproceedings{CuttingKuPeSi92
,author=	{Doug Cutting and Julian Kupiec and Jan Pedersen and
		 Penelope Sibun}
,title=		{A practical part-of-speech tagger}
}

@unpublished{Cybenko88,
author = 	{G. Cybenko},
title = 	{Approximation by Superpositions of a Sigmoidal Function},
year = 		1989,
note = 		{To appear.}
}

@unpublished{Cybenko88b,
author = 	{G. Cybenko},
title = 	{Continuous-Valued Neural Networks with Two Hidden
		 Layers are Sufficient},
year = 		1989,
note = 		{To appear.}
}


@unpublished{DaganEn95
,author=	{Ido Dagan and Sean P. Engelson}
,title=		{Committee-based sampling for training probabilistic
		 classifiers}
}

@techreport{Dalkey85,
author=   	{Dalkey, Norman C.},
title=    	{Prior Probabilities Revisited},
institution=  	{UCLA Computer Science Department},
number=   	{CSD-850007},
year=     	1985,
comment=  	{Justification of maximum-entropy via proper scoring rules for
		probability distributions.  Some consequences.}
}

@article{DarrochRa72
,author=       {J. N. Darroch and D. Ratcliff}
,title=        {Generalized iterative scaling for log-linear models}
,year=         1972
,journal=      {The Annals of Mathematical Statistics}
,volume=       43
,number=       5
,pages=        {1470-1480}
}

@book{David81,
author=		{H. A. David},
title=		{Order Statistics},
publisher=	{John Wiley \& Sons},
year=		1981,
edition=	{second}
}

@Article{Davisson73,
  author = 	 {L. D. Davisson},
  title = 	 {Universal noiseless coding},
  journal = 	 {IEEE Trans. Inform. Theory},
  year = 	 1973,
  volume =	 19,
  pages =	 {783-795}
}

@article{Dawid84
,author=	{A. P. Dawid}
,title=		{Statistical theory: The prequential approach}
,journal=       {Journal of the Royal Statistical Society, Series A}
,year=          1984
,volume=        147
,pages=         {278-292}
}

@article{Dawid92
,author=	{A. P. Dawid}
,title=		{Prequential analysis, stochastic complexity and
		 {B}ayesian inference}
}

@article{Dayan92,
author=		{Peter Dayan},
title=		{The Convergence of {$TD(\lambda)$} for General~{$\lambda$}},
journal=	ml,
volume=		8,
number=		{3/4},
month=		may,
year=		1992,
pages=		{341--362}
}

@Unpublished{Dasgupta99,
  author = 	 {Sanjoy Dasgupta},
  title = 	 {Learning Mixtures of Gaussians},
  note = 	 {(In preparation)}
}

@article{DayanHiNeZe95
,author=	{Peter Dayan and Geoffrey E. Hinton and Radford
		 M. Neal and Richard S. Zemel}
,title=		{The {Helmholtz} Machine}
}		  

@techreport{DayanSe93,
author=		{Peter Dayan and Terrence J. Sejnowski},
title=		{{$TD(\lambda)$} Converges with Probability~1},
institution=	{CNL, The Salk Institute},
year=		1993
}
		  
@article{DayanSe94
,author=	{Peter Dayan and Terrence J. Sejnowski}
,title=		{{$TD(\lambda)$} Converges with Probability~1}
,journal=	ml
,volume=	14
,number=	3
,year=		1994
,pages=		{295--301}
}


@Book{DeBruijn58,
  author =	 {Bruijn, N. G. de },
  title = 	 {Asymptotic Methods in Analysis},
  publisher = 	 {Dover},
  year = 	 {1958,1981}
}

@book{DeSaintExupery43,
author=   	{Saint-Exup\`ery, Antoine de},
title=    	{The Little Prince},
publisher=	{Harcourt, Brace, \& World},
year=     	1943
}

@inproceedings{DeanAnBaEnKaKoMa92,
author=		{Thomas Dean and Dana Angluin and Kenneth Basye and
		 Sean Engelson and Leslie Kaelbling and Evangelos
		 Kokkevis and Oded Maron},
title=		{Inferring Finite Automata with Stochastic Output
		 Functions and an Application to Map Learning},
booktitle=	{Proceedings Tenth National Conference on
		 Artificial Intelligence},
pages=		{208--214},
month=		jul,
year=		1992
}

@article{DeerwesterDuFuLaHa90
,author=	{Scott Deerwester and Susan T. Dumais and George W.
		Furnas and Thomas K. Landauer and Richard Harshman}
,title=		{Indexing by latent semantic analysis}
}

@article{DellaDeLa97,
	author = "Della Pietra, Stephen and Della Pietra, Vincent and John Lafferty",
	title = "Inducing features of random fields",
	journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence",
	volume = 19,
        number = 4,
	pages = "1--13",
	year = 1997,
        month = apr
}

@TechReport{DellaDeLa01,
  author = 	 {Della Pietra, Stephen and Della Pietra, Vincent and
                  John Lafferty},
  title = 	 {Duality and Auxiliary Functions for {Bregman} Distances},
  institution =  {School of Computer Science, Carnegie Mellon University},
  year = 	 2001,
  number =	 {CMU-CS-01-109}
}

@article{DemingSt40,
author=   	{Deming, W. Edwards and Frederick F. Stephan},
title=    	{On a Least Squares Adjustment of a Sampled Frequency Table 
		when the Expected Marginal Totals are Known},
journal=  	{Annals Mathematical Statistics},
year=     	1940,
volume=   	11,
pages=    	{427--444},
comment=  	{Introduces iterative updating procedure; useful for 
	   	computing maximum-entropy solution.}
}

@article{Dennis84,
author=   	{Dennis, J.E., Jr.},
title=    	{A User's Guide to Nonlinear Optimization Algorithms},
journal=  	{Proceedings of the IEEE},
year=     	1984,
month=    	Dec,
volume=   	72,
number=   	12,
pages=    	{1765--1776},
comment=  	{Brief survey with bibliography.}
}

@Article{DemirizBeSh02,
  author = 	 {Ayhan Demiriz and Kristin P. Bennett and John
                  Shawe-Taylor},
  title = 	 {Linear Programming Boosting via Column Generation},
  journal = 	 ml,
  year = 	 2002,
  volume =	 46,
  number =	 {1/2/3},
  pages =	 {225-254}
}

@inproceedings{DesantisMaWe88
,author=	{Alfredo DeSantis and George Markowsky and Mark
		 N. Wegman}
,title=		{Learning probabilistic prediction functions}
,booktitle=	colt88
,pages=		{312--328}
,year=		1988
}

@Article{Devroye82,
  author = 	 {Luc Devroye},
  title = 	 {Bounds for the uniform deviation of empirical measures},
  journal = 	 {Journal of Multivariate Analysis},
  year = 	 1982,
  volume =	 12,
  pages =	 {72-79}
}

@article{Devroye88,
author = 	{Luc Devroye},
title = 	{Automatic Pattern Recognition: A Study of the Probability
	 	 of Error},
journal = 	{IEEE Trans.\ Pattern Analysis and Machine Intelligence},
volume = 	10,
number = 	4,
year = 		1988,
month = 	Jul,
pages = 	{530--543}
}
		  
@Book{DevroyeGyLu96,
  author = 	 {Luc Devroye and L\'{a}zl\'{o} Gy\"{o}rfi and G\'{a}bor Lugosi},
  title = 	 {A Probabilistic Theory of Pattern Recognition},
  publisher = 	 {Springer},
  year = 	 1996,
}		  
		  
@inproceedings {DhagatGa92,
    AUTHOR = {Dhagat, Aditi and G{\'a}cs, P{\'e}ter and Winkler, Peter},
     TITLE = {On playing ``twenty questions'' with a liar},
 BOOKTITLE = {Proceedings of the Third Annual ACM-SIAM Symposium on Discrete
              Algorithms (Orlando, FL, 1992)},
     PAGES = {16--22},
 PUBLISHER = {ACM},
   ADDRESS = {New York},
      YEAR = {1992},
   MRCLASS = {68P99 (62C99 62L99 68Q99)},
  MRNUMBER = {93f:68022},
MRREVIEWER = {Allen Klinger},
}

@article{Diaconis87
,author=        {Persi Diaconis}
,title=         {A generalization of spectral analysis with
                  application to ranked data}
}

@article{DiaconisSt91
,author=	{Persi Diaconis and Daniel Stroock}
,title=		{Geometric bounds for eigenvalues of Markov chains}
,journal=	annapprob
,volume=	1
,number=	1
,year=		1991
,pages=		{36--61}
}

@phdthesis{Diep95
,author=	{Thanh Am Diep}
,title=		{Capacity of multi-level threshold devices}
}

@inproceedings{Dietterich84,
author=   	{Dietterich, Thomas G.},
title=    	{Learning About Systems That Contain State Variables},
booktitle= 	{Proceedings of the National Conference on Artificial
           	Intelligence},
year=     	1984,
month=   	Aug,
pages=    	{96--100}
}

@incollection{Dietterich90,
author=		{Thomas G. Dietterich},
title=		{Machine Learning},
booktitle=	{Annual Review of Computer Science},
volume=		4,
year=		{1990},
editor=		{Joseph F. Traub and Barbara J. Grosz and Butler W.
		 Lampson and Nils J. Nilsson},
publisher=	{Annual Reviews},
pages=		{255--306},
comment=	{address= Palo Alto}
}

@article{Dietterich00,
  author = 	 {Thomas G. Dietterich},
  title = 	 {An experimental comparison of three methods for
                  constructing ensembles of decision trees: Bagging,
                  boosting, and randomization},
journal=         ml,
  year =	 {2000},
pages =          {139--158},
volume =         40,
number =         2 
}

@inproceedings{DietterichBa91
,author=	{Thomas G. Dietterich and Ghulum Bakiri}
,title=		{Error-correcting output codes: a general method for
		 improving multiclass inductive learning programs}
}

@article{DietterichBa95
,author=	{Thomas G. Dietterich and Ghulum Bakiri}
,title=		{Solving Multiclass Learning Problems via
		 Error-Correcting Output Codes}
,journal=	jair
,year=		1995
,month=		jan
,pages=		{263--286}
,volume=	2
}

@inproceedings{DietterichKeMa96
,author=	{Tom Dietterich and Michael Kearns and Yishay Mansour}
,title=		{Applying the Weak Learning Framework to Understand
		 and Improve {C4.5}}
,booktitle=	ml96
,year=		1996
}

@techreport{DietterichKo95
,title=         "Machine Learning Bias, Statistical Bias, and
		 Statistical Variance of Decision Tree Algorithms"
,author=        {Tom Dietterich and Eun Bae Kong}
,institution=   {Oregon State University}		  
,year=          1995
,note=          {Available via the WWW at
		  http://www.cs.orst.edu:80/~tgd/cv/tr.html}
}

@inproceedings{DifabbrizioDuGuHoRaRiScSc02,
  author = 	 {Giuseppe Di Fabbrizio and
                  Dawn Dutton and
                  Narendra Gupta and
                  Barbara Hollister and
                  Mazin Rahim and
                  Giuseppe Riccardi and
                  Robert Schapire and
                  Juergen Schroeter},
  title = 	 {{AT\&T} Help Desk},
booktitle=       {7th International Conference on Spoken Language Processing},
  year =	 2002
}

@book{Dobson90,
author=		{Annette J. Dobson},
title=		{An Introduction to Generalized Linear Models},
publisher=	{Chapman and Hall},
year=		1990
}

@phdthesis{Dolan89,
author = 	{Charles Patrick Dolan},
title = 	{Tensor Manipulation Networks: Connectionist and Symbolic
		 Approaches to Comprehension, Learning, and Planning},
school = 	{UCLA Computer Science Department},
month = 	Jun,
year = 		1989
}		  
		  
@Article{DonahueGuDaSa97,
  author = 	 {M. J. Donahue and L. Gurvits and C. Darken and E. Sontag},
  title = 	 {Rates of convex approximation in non-{H}ilbert spaces},
  journal = 	 {Constructive Approximation},
  year = 	 1997,
  volume =	 13,
  pages =	 {187--220}
}

@InProceedings{Domingos97,
  author = 	 {Pedro Domingos},
  title = 	 {Knowledge Acquisition from Examples Via Multiple Models},
  booktitle = 	 {ml97},
  pages =	 {98--106},
  year =	 1997
}

@InProceedings{DomingoWa00,
  author = 	 {Carlos Domingo and Osamu Watanabe},
  title = 	 {Scaling up a boosting-based learner via adaptive sampling},
  booktitle = 	 {Proceedings of the Fourth Pacific-Asia Conference
                  on Knowledge Discovery and Data Mining},
  year =	 2000
}

@Article{DonohoJo95,
  author = 	 {David L. Donoho and Iain M. Johnstone},
  title = 	 {Adapting to Unknown Smoothness via Wavelet Shrinkage},
}

@Book{Doukhan94,
  author =	 {Paul Doukhan},
  title = 	 {Mixing, Properties and Examples},
  publisher = 	 {Springer},
  year = 	 1994,
  number =	 85,
  series =	 {Lecture Notes in Statistics},
  note =	 {This book was recommended by Avi Wyner, he sais it has a good summary of "exponential bounds" which are a generalization of Hoeffding/Chernoff bounds to non-IID processes. Currently (3/99) the book is out of print.}
}

@techreport{Drescher80,
author=   	{Drescher, Gary L.},
title=    	{Suggestions for Genetic A.I.},
institution= 	{MIT Artificial Intelligence Laboratory},
year=     	1980,
number=   	198,
month=    	Feb
}

@mastersthesis{Drescher85,
author=   	{Drescher, Gary L.},
title=    	{The Schema Mechanism: A Conception of Constructivist 
		Intelligence},
school=   	{MIT Department of Electrical Engineering and Computer 
		Science},
year=     	1985,
month=    	Feb,
comment=  	{Constructivist version of Piagetian theory.}
}

@techreport{Drescher86,
author=   	{Drescher, Gary L.},
title=    	{Genetic {AI} --- Translating {P}iaget into {L}isp},
institution= 	{MIT Artificial Intelligence Laboratory},
year=     	1986,
number=   	890,
month=    	Feb
}

@inproceedings{Drescher87,
author = 	{Gary L. Drescher},
title = 	{A Mechanism for Early {Piagetian} Learning},
booktitle = 	{Proceedings of AAAI-87: Sixth National Conference on
		 Artificial Intelligence},
address = 	{Seattle, Washington},
month = 	Jul,
year = 		1987,
pages = 	{290--294}
}

@book{Drescher91,
author=		{Gary L. Drescher},
title=		{Made-up Minds: A Constructivist Approach to
		 Artificial Intelligence},
publisher=	{MIT Press},
year=		1991
}

@article{DressGr91,
author=		{Andreas Dress and Johannes Grabmeier},
title=		{The Interpolation Problem for $k$-sparse Polynomials
		 and Character Sums},
journal=	{Advances in Applied Mathematics},
volume=		12,
year=		1991,
pages=		{57--75}
}

@unpublished{Drucker95
,author=	{Harris Drucker}
,title=		{Fast decision tree ensembles for optical character
		 recognition}
}

@inproceedings{Drucker97
,author=	{Harris Drucker}
,title=		{Improving regressors using boosting techniques}
,year=		1997
,booktitle=	ml97
,pages=		{107-115}
}

@inproceedings{DruckerBuKaSmVa97
,author=	{Harris Drucker and Chris J. C. Burges and Linda
		 Kaufman and Alex Smola and Vladimir Vapnik}
,title=		{Support Vector Regression Machines}
}

@unpublished{DruckerCo95b
,author=	{Harris Drucker and Corinna Cortes}
,title=		{Improving Pattern Classification Performance Using
		 Boosting Techniques}
,year=		1995
}

@inproceedings{DruckerCo96
,author=	{Harris Drucker and Corinna Cortes}
,title=		{Boosting Decision Trees}
,year=		1996
,booktitle=	nips8
,pages=		{479-485}
}

@article{DruckerCoJaLeVa94
,author=	{Harris Drucker and Corinna Cortes and L. D. Jackel
		 and Yann LeCun and Vladimir Vapnik}
,title=		{Boosting and other ensemble methods}
,journal=	{Neural Computation}
,volume=	6
,number=	6
,year=		1994
,pages=		{1289--1301}
}

@inproceedings{DruckerScSi93
,author=	{Harris Drucker and Robert Schapire and Patrice
		 Simard}
,title=		{Improving performance in neural networks using a
		 boosting algorithm}
,booktitle=	nips5
,year=		1993
,comment=	{,publisher=	{Morgan Kaufmann},editor=	nips5eds}
,pages=		{42--49}
}

@article{DruckerScSi93b
,author=	{Harris Drucker and Robert Schapire and Patrice
		 Simard}
,title=		{Boosting Performance in Neural Networks}
,journal=	{International Journal of Pattern Recognition and
		 Artificial Intelligence}
,year=		{1993}
,pages=		{705--719}
,volume=	7
,number=	4
}

@book{DudaHa73,
author = 	{Richard O. Duda and Peter E. Hart},
title = 	{Pattern Classification and Scene Analysis},
publisher = 	{Wiley},
year = 		{1973}
}

@article{Dudley78,
author=   	{Dudley, R. M.},
title=    	{Central Limit Theorems for Empirical Measures},
journal=  	{The Annals of Probability},
year=     	1978,
volume=   	6,
number=   	6,
pages=    	{899--929},
comment=  	{Generalization and proofs of Vapnik-Chervonenkis results.}
}

@article{Dudley79
,author=        {R. M. Dudley}
,title=         {Balls in {$R^k$} do not cut all subsets of {$k+2$}
                  points}
}

@article{Dudley87,
author=		{R. M. Dudley},
title=		{Universal {D}onsker classes and metric entropy},
journal=	{Annals of Probability},
volume=		15,
number=		4,
pages=		{1306--1326},
year=		1987
}


@inproceedings{DuffyHe99,
	author = "N. Duffy and D. Helmbold",
	title = "A Geometric Approach to Leveraging Weak Learners",
	booktitle = eurocolt99,
	publisher =	"Springer-Verlag",
	year = 1999
}

@InProceedings{DuffyHe99b,
  author = 	 {Nigel Duffy and David Helmbold},
  title = 	 {Potential Boosters?},
  booktitle = 	 nips11,
  year =	 1999
}

@Article{DuffyHe02,
  author = 	 {Nigel Duffy and David Helmbold},
  title = 	 {Boosting Methods for Regression},
  journal = 	 ml,
  year = 	 2002,
  volume =	 49,
  number =	 {2/3}
}

@slides{Dumouchel97
,author=        {William duMouchel}
,title=         {Statistical analysis of categorical data}
}

@article{DurGr93,
author=		{A. D\"ur and J. Grabmeier},
title=		{Applying Coding Theory to Sparse Interpolation},
journal=	sicomp,
volume=		22,
number=		4,
pages=		{695--704},
month=		aug,
year=		1993
}

@article{Duttweiler78
,author=	{Donald L. Duttweiler}
,title=		{A twelve-channel digital echo canceler}
,year=		1978
}

@article{DyerFrKa91,
author=		{Martin Dyer and Alan Frieze and Ravi Kannan},
title=		{A Random Polynomial-Time Algorithm for Approximating
		 the Volume of Convex Bodies},
journal=	jacm,
volume=		38,
number=		1,
pages=		{1--17},
month=		jan,
year=		1991
}

@book{EfronTi93
,author=	{Bradley Efron and Robert J. Tibshirani}
,title=		{An Introduction to the Bootstrap}
,year=		1993
,publisher=	{Chapman \& Hall}
}

@inproceedings{EhrenfeuchtHa88,
author=   	{Ehrenfeucht, Andrzej and David Haussler},
title=    	{Learning Decision Trees from Random Examples},
booktitle=	colt88,
publisher=	{Morgan Kaufman},
address=	{San Mateo, CA},
month=    	Aug,
year=     	1988,
pages = 	{182--194}
}

@techreport{EhrenfeuchtHaKeVa87,
author = 	{A. Ehrenfeucht and D. Haussler and M. Kearns and L. Valiant},
title = 	{A General Lower Bound on the Number of Examples Needed
		 for Learning},
institution = 	ucsccrl,
year = 		1987,
number = 	{UCSC-CRL-87-26}
}

@inproceedings{EhrenfeuchtHaKeVa88,
author = 	{Andrzej Ehrenfeucht and David Haussler and Michael
		 Kearns and Leslie Valiant},
title = 	{A General Lower Bound on the Number of Examples Needed
		 for Learning},
booktitle = 	colt88,
year = 		{1988},
month = 	aug,
pages = 	{139--154}
}

@article{EichbergerHaMi93
,author=	{J. Eichberger and H. Haller and F. Milne}
,title=		{Naive {B}ayesian learning in $2\times 2$ matrix games}
,year=		1993
}

@InCollection{Elith02,
  author = 	 {Jane Elith},
  title = 	 {Quantitative Methods for Modeling Species Habitat:
                  Comparative Performance and an Application to
                  {Australian} Plants},
  booktitle = 	 {Quantitative Methods for Conservation Biology},
  pages =	 {39-58},
  publisher =	 {Springer Verlag},
  year =	 2002,
  editor =	 {Scott Ferson and Mark Burgman}
}

@inproceedings{EtzioniHaJiKaMaWa96,
	Author = "O. Etzioni and S. Hanks and T. Jiang and R. M. Karp and 
		O. Madani and O. Waarts",
	title = "Efficient information gathering on the internet",
	booktitle = focs96,
	year = 1996
}

@InProceedings{EscuderoMaRi00,
  author = 	 {Gerard Escudero and Llu\'is M\`arquez and German Rigau},
  title = 	 {Boosting applied to word sense disambiguation},
  booktitle = 	 {Proceedings of the 12th European Conference on
                  Machine Learning},
  pages =	 {129-141},
  year =	 2000
}

@article{FaberMy91
,author=        {V. Faber and J. Mycielsky}
,title=         {Applications of learning theorems}
}

@book{FangPu93,
author=		{Shu-Cherng Fang and Sarat Puthenpura},
title=		{Linear Optimization and Extensions: Theory and
		 Algorithms},
year=		1993,
publisher=	{Prentice Hall}
}

@techreport{FarmerSi88,
author=	  	{Farmer, J. Doyne and John J. Sidorowich},
title=	  	{Exploiting Chaos to Predict the Future and Reduce Noise},
institution= 	{Los Alamos National Laboratory},
year=	  	1988,
month=	  	mar,
number=	  	{LA-UR-88-901}
}

@article{FederMeGu92,
author=		{M. Feder and N. Merhav and M. Gutman},
title=		"Universal Prediction of individual sequences",
journal=	"IEEE Transactions on Information Theory",
volume=		38,
pages=		"1258--1270",
year=		1992
}

@book{Feller68,
author=   	{Feller, William},
title=    	{An Introduction to Probability and its Applications},
publisher=	{John Wiley and Sons},
year=     	1968,
edition=  	{third},
volume=   	1
}


@book{Ferguson67
,author=	{Thomas S. Ferguson}
,title=		{Mathematical Statistics: A Decision Theoretic
		 Approach}
,year=		1967
,publisher=	{Academic Press}
}

@book{Feyerabend81,
author=   	{Feyerabend, P. K.},
title=    	{Philosophical Papers: Realism, Rationalism, \& 
           	Scientific Method},
publisher=	{Cambridge University Press},
year=     	1981,
volume=   	1
}

@article{FiatRaRa94
,author=	{A. Fiat and Y. Rabani and Y. Ravid}
,title=		{Competitive {$k$}-Server Algorithms}
,journal=	jcss
,volume=	48
,number=	3
,year=		1994
,pages=		{410-428}
}

@article{Fiedler72,
author=   	{Fiedler, Miroslav},
title=    	{Bounds for Eigenvalues of Doubly Stochastic Matrices},
journal=   	{Linear Algebra and its Applications},
year=      	1972,
month=     	Jul,
volume=    	5,
number=    	3,
pages=     	{299--310}
}

@article{Field75
,author=        {B. J. Field}
,title=         {Towards automatic indexing: automatic assignment of
                  controlled-language indexing and classification from
                  free indexing}
,journal=       {Journal of Documentation}
,volume=        31
,number=        4
,month=         dec
,year=          1975
,pages=         {246-265}
}

@article{Fienberg70,
author=   	{Fienberg, Stephen E.},
title=    	{An Iterative Procedure for Estimation in Contingency Tables},
journal=  	{Journal of Mathematical Statistics},
year=     	1970,
volume=   	41,
number=   	3,
pages=    	{907--917},
comment=  	{Proves convergence of Deming/Stephan iterative procedure for
	   	finding maximum entropy solution}
}

@article{Fill91,
author=		{James Allen Fill},
title=		{Eigenvalue bounds on convergence to stationarity for
		 nonreversible {M}arkov chains, with an application to
		 the exclusion process},
journal=	{The Annals of Applied Probability},
volume=		1,
number=		1,
year=		1991,
pages=		{62--87}
}

@inproceedings{FischerSi90,
author=		{Paul Fischer and Hans Ulrich Simon},
title=		{On learning ring-sum-expansions},
booktitle=	colt90,
year=		1990,
month=		aug,
pages=		{130--143}
}


@article{FischerSi92,
author=		{Paul Fischer and Hans Ulrich Simon},
title=		{On learning ring-sum-expansions},
journal=	sicomp,
volume=		21,
number=		1,
pages=		{181--192},
month=		feb,
year=		1992
}

@article{FischhoffBM83,
author=   	{Fischhoff, Baruch and Ruth Beyth-Marom},
title=    	{Hypothesis Evaluation from a Bayesian Perspective},
journal=  	{Psychological Review},
year=     	1983,
volume=   	90,
number=   	3,
pages=    	{239--260},
comment=  	{Taxonomy of ways people might deviate from Bayesianism.}
}

@article{FlannaganFrHo86,
author=  	{Flannagan, Michael J. and Lisbeth S. Fried and Keith J. 
		Holyoak},
title=   	{Distributional Expectations and the Induction of Category 
		Structure},
journal= 	{Journal of Experimental Psychology: Learning, Memory, and 
		Cognition},
year=    	1986,
volume=  	12,
number=  	2,
pages=   	{241--256},
comment= 	{Experimental evidence in favor of idea that people expect 
		exemplars of a category to follow the normal distribution}
}

@book{Fletcher87,
	author = "R. Fletcher",
	title = "Practical Methods of Optimization",
	edition = "Second",
	publisher = "John Wiley",
	year = 1987
}


@inproceedings{Floyd89,
author=   	{Floyd, Sally},
title=    	{Space-bounded learning and the {V}apnik-{C}hervonenkis
		 Dimension},
booktitle=	colt89,
month=    	Jul,
year=     	1989,
pages = 	{349--364}
}

@phdthesis{Floyd89b,
author=   	{Floyd, Sally},
title=    	{On Space-bounded learning and the {V}apnik-{C}hervonenkis
		 Dimension},
school=		{University of California at Santa Cruz},
month=		dec,
year=		1989,
note=		{Available as Technical Report TR-89-061,
		 International Computer Science Institute}
}

@Article{FloydWa95,
  author = 	 {Sally Floyd and Manfred Warmuth},
  title = 	 {Sample compression, learnability, and the
		  {V}apnik-{C}hervonenkis dimension},
  journal = 	 ml,
  year = 	 1995,
  volume =	 21,
  number =	 3,
  pages =	 {269-304}
}

@unpublished{Flynn88
,author=	{Anita Flynn (ed.)}
,title=		{Olympic robot building manual}
,year=		1988
}

@Article{ForsterWa??,
  author = 	 {J\"urgen Forster and Manfred Warmuth},
  title = 	 {Relative Expected Instantaneous Loss Bounds},
  journal = 	 jcss,
  year = 	 {to appear}
}

@article{Forsyth81
,author=	{Richard Forsyth}
,title=		{{BEAGLE} --- {A} {Darwinian} approach to pattern
		 recognition}
,journal=	{Kybernetes}
,year=		1981
,volume=	10
,pages=		{159--166}
}

@inproceedings{FortnowWh94
,author=	{Lance Fortnow and Duke Whang}
,title=		{Optimality and domination in repeated games with
		 bounded players}
,booktitle=	stoc94
}

@article{Foster91
,author=	{Dean P. Foster}
,title=		{Prediction in the worst case}
,journal=       {The Annals of Statistics}
,volume=        19
,number=        2
,pages=         {1084--1090}
,year=          1991
}

@article{FosterVo93
,author=	{Dean P. Foster and Rakesh V. Vohra}
,title=		{A randomization rule for selecting forecasts}
,journal=	{Operations Research}
,volume=	41
,number=	4
,month=		{July--August}
,year=		1993
,pages=		{704--709}
}

@unpublished{FosterVo97
,author=	{Dean P. Foster and Rakesh Vohra}
,title=		{Regret in the On-line Decision Problem}
,note=		{unpublished manuscript}
,year=		1997
}

@article {FosterVo99,
    AUTHOR = {Foster, Dean P. and Vohra, Rakesh},
     TITLE = {Regret in the on-line decision problem},
      NOTE = {Learning in games: a symposium in honor of David Blackwell},
   JOURNAL = {Games and Economic Behavior},
    VOLUME = 29,
      YEAR = 1999,
    NUMBER = {1-2},
     PAGES = {7--35}
}

@article{FosterVo98
,author=	{Dean P. Foster and Rakesh V. Vohra}
,title=		{Asymptotic calibration}
,journal=       {Biometrika}
,volume=        85
,year=          1998
,number=        2
,pages=         {379--390}
}

@article{FranklWi81
,author=	{P. Frankl and R. M. Wilson}
,title=		{Intersection theorems with geometric consequences}
,journal=	{Combinatorica}
,volume=	1
,year=		1981
,pages=		{357--368}
}

@book{Franklin68
,author=	{Joel N. Franklin}
,title=		{Matrix Theory}
,publisher=	{Prentice-Hall}
,year=		{1968}
}

@unpublished{FranklinGaYu93
,author=	{Matthew Franklin and Zvi Galil and Moti Yung}
,title=		{An overview of secure distributed computing}
}

@techreport{FreanDo98
,author=    "Frean, Marcus and Downs, Tom"
,title=     "A simple cost function for boosting"
,institution= "Department of Computer Science and Electrical
               Engineering, University of Queensland"
,year=      1998
}

@inproceedings{Freund90,
author=		{Freund, Yoav},
title=		{Boosting a weak learning algorithm by majority},
month=		aug,
year=		1990,
booktitle=	colt90,
pages=		{202--216},
note=		{To appear, {\it Information and Computation}}
}

@inproceedings{Freund92,
author=		{Freund, Yoav},
title=		{An improved boosting algorithm and its implications
		 on learning complexity},
month=		jul,
year=		1992,
pages=		{391--398},
booktitle=	colt92
}

@phdthesis{Freund93
,author=	"Yoav Freund"
,title=		"Data Filtering and Distribution Modeling Algorithms
                 for Machine Learning"
,school=	"University of California at Santa Cruz"
,year =		 1993
,note=		"Retrievable from: ftp.cse.ucsc.edu/pub/tr/ucsc-crl-93-37.ps.Z"

}

@article{Freund95
,author=	"Yoav Freund"
,title=		"Boosting a weak learning algorithm by majority"
,journal=	infcomp
,year=		1995
,volume=	121
,number=	2
,pages=		{256--285}
}

@inproceedings{Freund96
,author=	"Yoav Freund"
,title=		"Predicting a binary sequence almost as well as the
		 optimal biased coin"
,booktitle=	colt96
,year=		1996
}

@article{Freund??
,author=	"Yoav Freund"
,title=		"Boosting a weak learning algorithm by majority"
,journal=	"Information and Computation"
,year=		"To appear"
,note=		{An extended abstract appeared in {\it Proceedings of
		 the Third Annual Workshop on Computational Learning
		 Theory}, 1990.}
}

@InProceedings{FreundIyScSi98,
  author = 	 {Yoav Freund and Raj Iyer and Robert E. Schapire and
                  Yoram Singer},
  title = 	 {An efficient boosting algorithm for combining preferences},
  booktitle = 	 ml98,
  year =	 1998
}

@inproceedings{FreundKeMaRoRuSc95
,author=	{Yoav Freund and Michael Kearns and Yishay Mansour and
		 Dana Ron and Ronitt Rubinfeld and Robert E. Schapire}
,title=		{Efficient Algorithms for Learning to Play Repeated
		 Games Against Computationally Bounded Adversaries}
,booktitle=	focs95
,pages=		{332-341}
,year=		1995
}

@inproceedings{FreundKeRoRuScSe93
,author=	{Yoav Freund and Michael Kearns and Dana Ron and
		 Ronitt Rubinfeld and Robert E. Schapire and Linda
		 Sellie}
,title=		{Efficient learning of typical finite automata from
		 random walks}
,booktitle=	stoc93
,month=		may
,year=		1993
,pages=		{315--324}
}

@inproceedings{FreundRo95
,author=        {Yoav Freund and Dana Ron}
,title=         {Learning to model sequences generated by switching distribution
s}
,booktitle=     colt95
,year=          1995
,pages=         {41--50}
}

@inproceedings{FreundSc95
,author=	{Yoav Freund and Robert E. Schapire}
,title=		{A decision-theoretic generalization of on-line
		 learning and an application to boosting}
,booktitle=	eurocolt95
,year=		1995
,pages=		{23--37}
,publisher=	{Springer-Verlag}
,comment=	{,note=		{A draft of the journal version is available
		 electronically (on our web pages, or by email request).}}
}

@unpublished{FreundSc95b
,author=	{Yoav Freund and Robert E. Schapire}
,title=		{A decision-theoretic generalization of on-line
		 learning and an application to boosting}
,note=		{Unpublished manuscript available electronically (on
		 our web pages, or by email request).  An extended
		 abstract appeared in {\it Computational Learning
		 Theory: Second European Conference, EuroCOLT~'95},
		 pages~23--37, Springer-Verlag, 1995}
}


@inproceedings{FreundSc96
,author=	{Yoav Freund and Robert E. Schapire}
,title=		{Experiments with a New Boosting Algorithm}
,booktitle=	ml96
,year=		1996
,pages=		{148-156}
}

@inproceedings{FreundSc96b
,author=	{Yoav Freund and Robert E. Schapire}
,title=		{Game Theory, On-line Prediction and Boosting}
,booktitle=	colt96
,year=		1996
,pages=		{325-332}
}

@article{FreundSc97
,author=	{Yoav Freund and Robert E. Schapire}
,title=		{A decision-theoretic generalization of on-line
		 learning and an application to boosting}
,journal=	jcss
,year=		1997
,volume=	55
,number=	1
,month=		aug
,pages=		{119-139}		  
}

@Article{FreundSc99,
  author = 	 {Yoav Freund and Robert E. Schapire},
  title = 	 {Adaptive game playing using multiplicative weights},
  journal = 	 {Games and Economic Behavior},
  year = 	 1999,
  volume=        29,
  pages=         {79-103}
}

@Article{FreundSc99b,
  author = 	 {Yoav Freund and Robert E. Schapire},
  title = 	 {Large margin classification using the perceptron algorithm},
  journal = 	 ml,
  year = 	 1999,
  volume =	 37,
  number =	 3,
  pages =	 {277-296},
  month =	 dec
}

@Article{FreundSc99c,
  author = 	 {Yoav Freund and Robert E. Schapire},
  title = 	 {A Short introduction to Boosting},
  journal = 	 {Journal of Japanese Society for Artificial Intelligence},
  year = 	 1999,
  volume =	 14,
  number =	 5,
  pages =	 {771-780},
  month =	 sep,
  note=          {Appearing in Japanese, translation by Naoki Abe}
}

@InProceedings{FreundSc98,
  author = 	 {Yoav Freund and Robert E. Schapire},
  title = 	 {Large margin classification using the perceptron
                  algorithm},
  booktitle = 	 colt98,
  year =	 1998,
  note =	 {To appear, {\it Machine Learning}}
}

@inproceedings{FreundScSiWa97,
author =	{Yoav Freund and Robert E. Schapire and
			Yoram Singer and Manfred K. Warmuth},
title =		{Using and Combining Predictors that Specialize},
booktitle =	stoc97,
pages =		{334--343},
year =		1997
}


@Article{FreundSeShTi97,
  author = 	 {Yoav Freund and H. Sebastian Seung and Eli Shamir and Naftali Tishby},
  title = 	 {Selective Sampling Using the Query by Committee Algorithm},
  journal = 	 ml,
  year = 	 1997,
  volume =	 28,
  pages =	 {133-168}
}

@InProceedings{Freund99,
  author = 	 {Yoav Freund},
  title = 	 {An adaptive version of the boost by majority algorithm},
  booktitle = 	 colt99,
  year =	 1999,
  pages =        {102-113}
}

@InProceedings{FreundMa99,
  author = 	 {Yoav Freund and Llew Mason},
  title = 	 {The alternating decision tree learning algorithm},
  booktitle = 	 ml99,
  pages = {124--133},
  year =	 1999
}

@InProceedings{FreundOp00,
  author = 	 {Yoav Freund and Manfred Opper},
  title = 	 {Continuous Drifting Games},
  booktitle = 	 colt00,
  year =	 2000,
  publisher=     {Morgan Kaufmann},
  pages =        {126--132}
}

@Article{FreundOp02,
  author = 	 {Yoav Freund and Manfred Opper},
  title = 	 {Continuous Drifting Games},
  journal = 	 jcss,
  year = 	 2002,
  pages =	 {113-132}
}

@Article{Freund01,
  author = 	 {Yoav Freund},
  title = 	 {An adaptive version of the boost by majority algorithm},
  journal = 	 ml,
  year = 	 2001,
  volume =	 43,
  number =	 3,
  pages =	 {293--318},
  month =	 {June},
}

@unpublished{Friedman95
,author=	{Jerome H. Friedman}
,title=		{Introduction to computational learning and
		 statistical prediction}
,comment=	{Slides from ml-95}
}

@unpublished{Friedman96
,author=	{Jerome H. Friedman}
,title=		{Another approach to polychotomous classification}
}

@unpublished{Friedman96b
,author=	{Jerome H. Friedman}
,title=		{On Bias, Variance, 0/1-loss, and the Curse-of-Dimensionality}
,note=          {Available electronically from http://stat.stanford.edu/$\sim$jhf}
}

@article{Friedman97
,author=	{Jerome H. Friedman}
,title=		{On Bias, Variance, 0/1-loss, and the Curse-of-Dimensionality}
,journal=       {Data Mining and Knowledge Discovery}
,volume=   1
,number=   1
,pages=    {55-77}
,year=1997
}

@Article{Friedman01,
  author = 	 {Jerome H. Friedman},
  title = 	 {Greedy function approximation: {A} gradient boosting
                  machine},
  journal = 	 annstat,
  year = 	 2001,
  volume =	 29,
  number =	 5,
  month =	 oct
}

@article{FriedmanBeFi77,
author = 	{Jerome J. Friedman and Jon Louis Bentley and Raphael Ari Finkel},
title = 	{An Algorithm for Finding Best Matches in Logarithmic 
		 Expected Time},
journal = 	{ACM Transactions on Mathematical Software},
year = 		{1977},
month = 	Sep,
volume = 	3,
number = 	3,
pages = 	{209--226}
}

@Unpublished{FriedmanHaTi98,
  author = 	 {Jerome Friedman and Trevor Hastie and Robert Tibshirani},
  title = 	 {Additive logistic regression: a statistical view of
                  boosting},
  note = 	 {Technical Report},
  year = 1998
}

@article{FriedmanHaTi00,
  author = 	 {Jerome Friedman and Trevor Hastie and Robert Tibshirani},
  title = 	 {Additive logistic regression: {A} statistical view of
                  boosting},
  journal=       annstat,
  volume=        38,
  number=        2,
  pages=         {337-374},
  month =          apr,
  year =         2000
}

@inproceedings{FriessCrCa98
,author=         {Thilo Friess and Nello Cristianini and Colin Campbell}
,title=          {The Kernel-Adatron: A Fast and Simple Learning
                  Procedure for Support Vector Machines}
,booktitle=      ml98
,year=           1998
}

@article{FuBo75a,
author=   	{Fu, King-Sun and Taylor L. Booth},
title=    	{Grammatical Inference: Introduction and Survey -- Part I},
journal= 	{IEEE Transactions on Systems, Man, and Cybernetics},
year=     	1975,
month=    	Jan,
volume=   	{SMC-5},
number=   	1,
pages=    	{95--111},
comment=  	{Inference of finite-state and context-free grammars reviewed.}
}

@article{FuBo75b,
author=   	{Fu, King-Sun and Taylor L. Booth},
title=    	{Grammatical Inference: Introduction and Survey -- Part II},
journal= 	{IEEE Transactions on Systems, Man, and Cybernetics},
year=     	1975,
month=    	Jul,
volume=   	{SMC-5},
number=   	4,
pages=    	{409--423},
comment=  	{Inference of stochastic finite-state and context-free 
		grammars.}
}

@article{FudenbergLe93
,author=	{Drew Fudenberg and David K. Levine}
,title=		{Steady State Learning and Nash Equilibrium}
,year=		1993
}

@article{FudenbergLe95
,author=	{Drew Fudenberg and David K. Levine}
,title=		{Consistency and cautious fictitious play}
,year=		1995
,volume=	19
,journal=	{Journal of Economic Dynamics and Control}
,pages=		{1065--1089}
}

@book{FudenbergTi91
,author=	{Drew Fudenberg and Jean Tirole}
,title=		{Game Theory}
,year=		1991
,publisher=	{MIT Press}
}

@article{FuhrPf94
,author=        {Norbert Fuhr and Ulrich Pfeifer}
,title=         {Probabilistic information retrieval as a combination
                  of abstraction, inductive learning, and
                  probabilistic assumptions}
,journal=       {ACM Transactions on Information Systems}
,volume=        12
,number=        1
,month=         jan
,year=          1994
,pages=         {92-115}
}

@book{Fukunaga90
,author=	{Keinosuke Fukunaga}
,title=		{Introduction to Statistical Pattern Recognition}
,edition=	{second}
,year=		1990
,publisher=	{Academic Press}
}

@inproceedings{FurnkranzWi94
,author=	{Johannes  F\"urnkranz and Gerhard Widmer}
,title=		{Incremental Reduced Error Pruning}
,booktitle=	{Machine Learning: Proceedings of the Eleventh
		International Conference}
,year=		1994
,pages=		{70-77}
}

@techreport{FurstJaSm90,
author=		{Merrick Furst and Jeffrey Jackson and Sean Smith},
title=		{Learning {$AC^0$} Functions Sampled Under Mutually
		 Independent Distributions},
institution=	{Carnegie Mellon University, School of Computer Science},
number=		{CMU-CS-90-183},
month=		oct,
year=		1990
}

@inproceedings{FurstJaSm91,
author=		{Merrick L. Furst and Jeffrey C. Jackson and Sean W. Smith},
title=		{Improved Learning of {$AC^0$} Functions},
booktitle=	colt91,
month=		aug,
year=		1991,
pages=		{317--325}
}

@article{Gaines79,
author=   	{Gaines, B.R.},
title=    	{Maryanski's Grammatical Inferencer},
journal=  	{IEEE Transactions on Computers},
year=     	1979,
month=    	Jan,
volume=   	{C-27},
number=   	1,
pages=    	{62--66},
comment=  	{Corrects some typographical errors in the Maryanski-Booth
		algorithm for inferring a probabilistic finite-state grammar
	  	from a given set of strings.}
}

@book{Gallager68
,author=	{Robert G. Gallager}
,title=		{Information Theory and Reliable Communication}
,publisher=	{John Wiley \& Sons}
,year=		1968
}

@InProceedings{Gallant86,
  author = 	 {S. I. Gallant},
  title = 	 {Optimal Linear Discriminants},
  booktitle = 	 {Eighth International Conference on Pattern Recognition},
  pages =	 {849-852},
  year =	 1986,
  organization = {IEEE}
}

@inproceedings{GamsLa87,
author=	   	{Gams, M. and N. Lavrac},
title=	   	{Review of Five Empirical Learning Systems Within a  
		Proposed Schemata},
booktitle= 	{Progress in Machine Learning--Proceedings of EWSL 87: 
	   	2nd European Working Session on Learning},
address=   	{Bled, Yogoslavia},
year=	   	1987,
editor=	   	{Bratko, I. and N. Lavrac},
month=	   	may,
pages=	   	{46--66}
}

@book{Gantmacher59,
author=		{F. R. Gantmacher},
title=		{Applications of the Theory of Matrices},
publisher=	{Interscience},
year=		1959
}

@thesis{Garsia??
,author=	{A. M. Garsia}
,title=		{On the distribution function of a geometric series
		 whose terms have random changes of sign}
}

@Book{Gardiner85,
  author =	 {C.W. Gardiner},
  title = 	 {Handbook of Stochastic Methods},
  publisher = 	 {Springer Verlag},
  year = 	 1985,
  edition =	 {2nd}
}

@book{GareyJo79,
author=   	{Garey, M. and D. Johnson},
title=    	{Computers and Intractability: A Guide to the Theory of
	  	NP-Com\-plete\-ness},
publisher=	{W. H. Freeman},
year=     	1979,
address=  	{San Francisco}
}

@inproceedings{Geisser70,
author=   	{Geisser, Seymour},
title=    	{The Inferential Use of Predictive Distributions},
booktitle=  	{Foundations of Statistical Inference},
editor=   	{Godambe, V. P. and D. A. Sprott},
year=     	1970,
publisher=  	{Holt, Rinehart, and Winston},
comment=  	{Argues for deriving probability density of future observations
		(a predictive distribution) from prior observations.}
}

@incollection{Geman86,
author=   	{Geman, Stuart},
title=    	{Stochastic Relaxation Methods for Image Restoration and
	   	Expert Systems},
booktitle=  	{Automated Image Analysis: Theory and Experiments},
editor=   	{D.B. Cooper and R.L.Launer and D.E. McClure},
publisher=  	{Academic Press},
year=     	1986,
comment=  	{Describes stochastic relaxation technique for maximum entropy
	   	computations and for image restoration.}
}

@article{GemmellSu92,
author=		{Peter Gemmell and Madhu Sudan},
title=		{Highly Resilient Correctors for Polynomials},
journal=	ipl,
volume=		43,
number=		4,
month=		sep,
date=		28,
year=		1992,
pages=		{169--174}
}

@article{George57,
author=   	{George, F. H.},
title=    	{Logical Networks and Probability},
journal=  	{Bulletin of Mathematical Biophysics},
year=     	1957,
volume=   	19,
pages=    	{187--199},
comment=  	{Extends McCulloch-Pitts networks with counters for 
		probabilities.}
}

@article{George59,
author=   	{George, F. H.},
title=    	{Inductive machines and the problem of learning},
journal=  	{Cybernetica},
year=     	1959,
volume=   	{II},
pages=    	{109--126},
comment=  	{Discussion of learning; machines which learn associations.}
}

@incollection{GeorgeffWa84,
author=   	{Georgeff, M. P. and C. S. Wallace},
title=    	{A General Selection Criterion for Inductive Inference},
booktitle=	{ECAI 84: Advances in Artificial Intelligence},
publisher=	{Elsevier Science Publishers},
year=     	1984,
pages=    	{473--482}
}

@article{GilboaSa89
,author=	{Itzhak Gilboa and Dov Samet}
,title=		{Bounded versus unbounded rationality: the tyranny of
		 the weak}
,year=		1989
}

@article{GinzburgSo??
,author=	{Iris Ginzburg and Haim Sompolinsky}
,title=		{Theory of correlations in stochastic neural networks}
}

@book{Gittins89
,author=	{J. C. Gittins}
,title=		{Multi-armed Bandit Allocation Indices}
,publisher=	wiley
,year=		1989
}
		  
@article{GoemansWi95
,author=	{Michel X. Goemans and David P. Williamson}
,title=		{Improved Approximation Algorithms for Maximum Cut and
		 Satisfiability Problems Using Semidefinite Programming}
,pages=		{1115-1145}
,volume=	42
,number=	6
,month=		nov
,year=		1995
,journal=	jacm
}
		  
@techreport{Goetsch86,
author=   	{Goetsch, Gordon J.},
title=    	{CONSENSUS: A Statistical Learning Procedure in a Connectionist
	  	Network},
institution= 	{Carnegie-Mellon Computer Science Department},
year=     	1986,
month=    	May,
number=   	{CMU-CS-86-131},
comment=  	{Layered network built out of communities of units, each with a
	  	supervisor.}
}


@article{Gold67,
author=   	{Gold, E. Mark},
title=    	{Language Identification in the Limit},
journal=  	infctrl,
year=     	1967,
volume=   	10,
pages=    	{447--474},
comment=  	{Classic paper, introducing computer science theory into 
		learning.}
}

@article{Gold72,
author=   	{Gold, E. Mark},
title=    	{System Identification via State Characterization},
journal=  	{Automatica},
volume=   	8,
year=     	1972,
pages=    	{621--636}
}


@article{Gold78,
author=   	{Gold, E. Mark},
title=    	{Complexity of Automaton Identification from Given Data},
journal=  	infctrl,
year=     	1978,
volume=   	37,
pages=    	{302--320},
comment=  	{Proves that finding an automaton of $n$ or fewer states 
		which agrees with a given list of input/output pairs.}
}

@phdthesis{Goldman90,
author=		{Sally Ann Goldman},
title=		{Learning Binary Relations, Total Orders, and
		 Read-Once Formulas},
school=		mit,
month=		sep,
year=		1990,
note=		{Available as Technical Report MIT/LCS/TR-483,
		 MIT Laboratory for Computer Science}
}

@inproceedings{GoldmanKe91,
author=		{Sally A. Goldman and Michael J. Kearns},
title=		{On the Complexity of Teaching},
booktitle=	colt91,
pages=		{303--314},
month=		aug,
year=		1991
}

@inproceedings{GoldmanKeSc90,
author=		{Sally A. Goldman and
		 Michael J. Kearns and Robert E. Schapire},
title=		{Exact Identification of Circuits Using Fixed Points of
		 Amplification Functions},
booktitle=	focs90,
pages=		{193--202},
month=		oct,
year=		1990,
note=		{To appear, {\it SIAM Journal on Computing}}
}

@inproceedings{GoldmanKeSc90b,
author=		{Sally A. Goldman and
		 Michael J. Kearns and Robert E. Schapire},
title=		{On the Sample Complexity of Weak Learning},
booktitle=	colt90,
pages=		{217--231},
month=		aug,
year=		1990
}

@article{GoldmanKeSc93,
author=		{Sally A. Goldman and
		 Michael J. Kearns and Robert E. Schapire},
title=		{Exact Identification of Read-once Formulas Using
		 Fixed Points of Amplification Functions},
journal=	sicomp,
pages=		{705--726},
month=		aug,
year=		1993,
volume=		22,
number=		4
}

@article{GoldmanKeSc95,
author=		{Sally A. Goldman and
		 Michael J. Kearns and Robert E. Schapire},
title=		{On the Sample Complexity of Weakly Learning},
journal=	infcomp,
pages=		{276-287},
month=		mar,
year=		1995,
volume=		117,
number=		2
}

@inproceedings{GoldmanRiSc89,
author=		{Sally A. Goldman and Ronald L. Rivest and Robert E.
		 Schapire},
title=		{Learning Binary Relations and Total Orders},
booktitle=	focs89,
month=		oct,
year=		1989,
pages=		{46--51},
note=		{Available as Technical Report MIT/LCS/TM-413, MIT
		 Laboratory for Computer Science.}
}

@article{GoldmanRiSc93
,author=	{Sally A. Goldman and Ronald L. Rivest and Robert E.
		 Schapire}
,title=		{Learning Binary Relations and Total Orders}
,volume=	22
,journal=	sicomp
,number=	5
,year=		1993
,pages=		{1006--1034}
}

@article{GoldmannHaRa92,
author =       "Mikael Goldmann and Johan H\aa stad and Alexander Razborov",
title =        "Majority Gates vs. General Weighted Threshold Gates",
journal =      "Computational Complexity",
volume =       "2",
year =         "1992",
pages=		{277-300}
}

@techreport{Goldreich88,
author=		{Oded Goldreich},
title=		{Towards a Theory of Average Case Complexity (A Survey)},
institution=	{Technion Computer Science Department},
year=		1988,
month=		dec,
number=		{531}
}

@article{GoldreichGoMi86,
author=   	{Goldreich, Oded and Shafi Goldwasser and Silvio Micali},
title=    	{How to Construct Random Functions},
journal = 	jacm,
volume = 	33,
number = 	4,
year=     	1986,
month=    	Oct,
pages=    	{792--807}
}

@inproceedings{GoldreichGoRo96
,author=	{Oded Goldreich and Shafi Goldwasser and Dana Ron}
,title=		{Property Testing and its connection to Learning and
		 Approximation}
,booktitle=	focs96
,pages=		{339-348}
,year=		1996
}

@inproceedings{GoleaBaLeMa98
,author=    {Mostefa Golea and Peter L. Bartlett, Wee Sun Lee and Llew Mason}
,title=     {Generalization in decision trees and {DNF}: Does size matter?}
,booktitle= nips10
,pages=		{259--265}
,year=		1998
}

@Article{Good53,
  author = 	 {Good, I. J.},
  title = 	 {The population frequencies of species and the estimation of population parameters},
  journal = 	 {Biometrika},
  year = 	 1953,
  volume =	 40,
  number =	 16,
  pages =	 {237-264},
  month =	 dec
}

@article{Good59,
author=   	{Good, I. J.},
title=    	{Kinds of Probability},
journal=  	{Science},
year=     	1959,
month=    	Feb,
volume=   	129,
number=   	3347,
pages=    	{443--447}
}

@article{Good63,
author=   	{Good, I.J.},
title=    	{Maximum entropy for hypothesis formulation, especially for
	   	multidimensional contingency tables},
journal=  	{Annals of Mathematical Statistics},
year=     	1963,
volume=   	34,
pages=    	{911-934},
comment=  	{Uses maximum entropy to judge order of interactions, and the
	   	order of a Markov chain.}
}

@article{Good66,
author=   	{Good, I.J.},
title=    	{A Derivation of the Probabilistic Explication of Information},
journal=  	{Journal of the Royal Statistical Society (Series B)},
year=     	1966,
volume=   	28,
pages=    	{578--581},
comment=  	{Argues from an axiomatic framework that I(H:E|G), the 
		information concerning H provided by E given G, should be 
		log( P(H.E|G) / (P(H|G)P(E|G)) ) or some monotonic variation 
		of this.}
}

@inproceedings{Good70,
author=   	{Good, I. J.},
title=    	{The Probabilistic Explication of Information, Evidence, 
		Surprise, Causality, Explanation, and Utility},
booktitle= 	{Foundations of Statistical Inference},
year=     	1971,
publisher= 	{Holt, Reinhart, and Winston},
editor=    	{V. P. Godame and D. A. Sprott},
pages=    	{108--141}
}

@Article{Good00,
  author = 	 {Good, I. J.},
  title = 	 {Turing's anticipation of emprical {Bayes} in
                  connection with the cryptanalysis of the {Naval} {Enigma}},
  journal = 	 {Journal of Statistical Computation and Simulation},
  year = 	 2000,
volume=          66,
number=          2,
pages=           {101-112}
}

@article{Goodman01,
author= {Joshua Goodman},
	title = {A Bit of Progress in Language Modeling},
	journal={Computer Speech and Language},
	year={2001},
	month={October},
	pages = {403-434}
}


@article{GordonSh85,
author=   	{Gordon, Jean and Edward H. Shortliffe},
title=    	{A Method for Managing Evidential Reasoning in a Hierarchical
		Hypothesis Space},
journal=  	{Artificial Intelligence},
year=     	1985,
month=    	Jul,
volume=   	26,
number=   	3,
pages=    	{323--357},
comment=  	{Dempster-Shafer theory explained and generalized.}
}

@article{Gorin95
,author=        {Allen Gorin}
,title=         {On automated language acquisition}
}

@inproceedings{GorinPaSaWi96,
	author = "A. L. Gorin and B. A. Parker and R. M. Sachs and J. G. Wilpon",
	title = "How may {I} help you?",
	booktitle = "Proceedings Interactive Voice Technology for Telecommunications
		Applications (IVTTA)",
	year = 1996,
	pages = "57--60"
}


@article{GorinRiWr97,
	author = "A. L. Gorin and G. Riccardi and J. H. Wright",
	title = "How may {I} help you?",
	journal = "Speech Communication",
	year = "1997",
volume=23,
number={1-2},
month= oct,
pages={113-127}
}

@article{GormanSe88,
author = 	{R. Paul Gorman and Terrence J. Sejnowski},
title = 	{Analysis of Hidden Units in a Layered Network Trained
	         to Classify Sonar Targets},
journal = 	{Neural Networks},
volume = 	1,
year = 		1988,
pages = 	{75--89}
}

@techreport{GravesLa94
,author=	{Todd Graves and Tze Leung Lai}
,title=		{Asymptotically Efficient Adaptive Choice of Control
		 Laws in Controlled Markov Chains}
,year=		1994
}

@book{Grenander81,
author=   	{Grenander, Ulf},
title=    	{Abstract Inference},
publisher=	{John WIley \& Sons, Inc.},
year=     	1981
}

@TechReport{GrigoriadisKh91,
  author = 	 {Michael D. Grigoriadis and Leonid G. Khachiyan},
  title = 	 {Approximate solution of matrix games in parallel},
  institution =  {DIMACS},
  year = 	 1991,
  number =	 {91-73},
  month =	 jul
}

@TechReport{GrigoriadisKh94,
  author = 	 {Michael D. Grigoriadis and Leonid G. Khachiyan},
  title = 	 {A sublinear-time randomized approximation algorithm
		  for matrix games},
  institution =  {Rutgers University Department of Computer Science},
  year = 	 1994,
  number =	 {LCSR-TR-222},
  month =	 {April}
}

@Article{GrigoriadisKh95,
  author = 	 {Michael D. Grigoriadis and Leonid G. Khachiyan},
  title = 	 {A sublinear-time randomized approximation algorithm for
                         matrix games},
  journal = 	 {Operations Research Letters},
  year = 	 1995,
  volume =	 18,
  number =	 2,
  month =	 {Sep},
  pages =	 {53-58}
}

@article{GrigorievKaSi90,
author=		{Dima Yu. Grigoriev and Marek Karpinski and Michael F.
		 Singers},
title=		{Fast Parallel Algorithms for Sparse Multivariate
		 Polynomial Interpolation over Finite Fields},
journal=	sicomp,
volume=		19,
number=		6,
pages=		{1059--1963},
month=		dec,
year=		1990
}

@book{GrossmanMa64,
author = 	{Israel Grossman and Wilhelm Magnus},
title = 	{Groups and Their Graphs },
publisher = 	{Mathematical Association of America},
year = 		{1964},
volume = 	{14},
series = 	{New Mathematical Library},
address = 	{Washington},
comment = 	{A low level introduction to group theory via Cayley graphs}
}

@InProceedings{GroveSc98,
  author = 	 {Adam J. Grove and Dale Schuurmans},
  title = 	 {Boosting in the limit: Maximizing the margin of
                  learned ensembles},
  booktitle = 	 {Proceedings of the Fifteenth National Conference on
                  Artificial Intelligence},
  year =	 1998
}

@inproceedings{Gurevich87,
author=   	{Yuri Gurevich},
title=    	{Complete and Incomplete Randomized {NP} Problems},
booktitle=	focs87,
month=    	Oct,
pages=    	{111--117},
year=     	1987
}

@article{Gurevich89,
author=		{Yuri Gurevich},
title=		{The Challenger-Solver game: Variations on the theme
		of {P=?NP}},
journal=	{Bulletin of the European Association for Theoretical
		Computer Science},
year=		1989,
month=		Oct
}

@unpublished{Gurevich89b,
author=		{Yuri Gurevich},
title=		{Matrix Correspondence Problem is Complete for the
		Average Case},
month=		Nov,
year=		1989,
note=		{Unpublished manuscript}
}

@article{Gurevich91,
author=		{Yuri Gurevich},
title=		{Average Case Completeness},
journal=	jcss,
year=		1991,
volume=		42,
number=		3,
pages=		{346--398}
}

@article{Gurevich??,
author=		{Yuri Gurevich},
title=		{Average Case Completeness},
journal=	jcss,
year=		{To appear}
}

@unpublished{GurevichMc87,
author=		{Yuri Gurevich and David McCauley},
title=		{Average Case Complete Problems},
month=		Apr,
year=		1987,
note=		{Unpublished manuscript}
}

@InProceedings{GuruswamiSa99,
  author = 	 {Venkatesan Guruswami and Amit Sahai},
  title = 	 {Multiclass Learning, Boosting, and Error-Correcting Codes},
  booktitle = 	 colt99,
  year =	 1999,
  pages=         {145-155}
}

@article{Gurvits95
,author=	{Leonid Gurvits}
,title=		{Stability of discrete linear inclusion}
}

@inproceedings{GyorgiTi89,
author = 	{G. Gy\"orgi and N. Tishby},
title = 	{Statistical Theory of Learning a Rule},
booktitle = 	{Proceedings of the STATPHYS-17 Workshop on Neural Networks
		 and Spin Glasses},
year = 		1989
}

@Article{Hagelbarger56,
  author = 	 {D. W. Hagelbarger},
  title = 	 {{SEER}, {A} {SE}quence {E}xtrapolating {R}obot},
  journal = 	 {IRE Transactions on Electronic Computers},
  year = 	 1956,
  month =	 mar,
  pages =	 {1-7}
}

@Article{HagerupRu90,
  author = 	 {Torben Hagerup and Christine R\"{u}b},
  title = 	 {A guided tour of Chernoff bounds},
  journal = 	 {Information Processing Letters},
  year = 	 1990,
  volume =	 33,
  pages =	 {305--308}
}

@inbook{Halmos73,
author = 	{Paul R. Halmos},
title = 	{How to Write Mathematics},
pages = 	{19--48},
publisher = 	{American Mathematical Society},
year = 		1973,
note = 		{In book entitled {\sl How to Write Mathematics}, by
		N.E. Steenrod, Paul R. Halmos, M.M. Schiffer, and
		J.A. Dieudonn\'e}
}

@inproceedings{Hancock90,
author=		{Hancock, Thomas R.},
title=		{Identifying $\mu$-Formula Decision Trees with Queries},
booktitle=	colt90,
pages=		{23--37},
month=		Aug,
year=		1990
}

@techreport{Hancock90b,
author=		{Hancock, Thomas R.},
title=		{Identifying $\mu$-Formula Decision Trees with Queries},
institution=	{Harvard University, Center for Research in Computing
		 Technology},
number=		{TR-16-90},
year=		1990
}

@inproceedings{HancockHe91,
author=		{Thomas Hancock and Lisa Hellerstein},
title=		{Learning Read-Once Formulas over Fields and Extended
		 Bases},
year=		1991,
month=		aug,
booktitle=	colt91,
pages=		{326--336}
}

@inproceedings{HancockMa91,
author=		{Thomas Hancock and Yishay Mansour},
title=		{Learning Monotone {$k\mu$ DNF} Formulas on Product
		 Distributions},
booktitle=	colt91,
month=		aug,
year=		1991,
pages=		{179--183}
}

@incollection{Hannan57
,author=	{James Hannan}
,title=		{Approximation to {B}ayes risk in repeated play}
,booktitle=	{Contributions to the Theory of Games}
,volume=	{III}
,editor=	{M. Dresher and A. W. Tucker and P. Wolfe}
,publisher=	{Princeton University Press}
,year=		1957
,pages=		{97--139}
}

@book{HardyWr60,
author =  	{G. H. Hardy and E. M. Wright},
title=	  	{An Introduction to the Theory of Numbers},
edition=  	{4th},
year =    	1960,
publisher= 	{Oxford University Press}
}

@techreport{Hart85,
author=   	{Hart, George W.},
title=    	{Prototype Nonintrusive Appliance Load Monitor},
institution=  	{MIT Energy Laboratory},
year=     	1985,
month=    	Sep,
number=   	{Progress Report #2},
comment=  	{Finite-state automaton inference to determine appliances 
		present.}
}

@phdthesis{Hart87,
author=   	{Hart, George W.},
title=    	{Minimum Information Estimation of Structure},
school=   	{MIT Dept.\ of Electrical Engineering and Computer Science},
year=     	1987,
month=    	Apr,
note=     	{Appears as LIDS-TH-1664.},
comment=  	{Studies and applies Rissanen's MDLP.}
}

@article {HartMa00,
    AUTHOR = {Hart, Sergiu and Mas-Colell, Andreu},
     TITLE = {A simple adaptive procedure leading to correlated equilibrium},
   JOURNAL = {Econometrica},
  FJOURNAL = {Econometrica. Journal of the Econometric Society},
    VOLUME = 68,
      YEAR = 2000,
    NUMBER = 5,
     PAGES = {1127--1150},
}

@Article{HartMa01,
  author = 	 {Sergiu Hart and Andreu Mas-Coell},
  title = 	 {A General Class of Adaptive Strategies},
  journal = 	 {Journal of Economic Theory},
  year = 	 2001,
  volume =	 98,
  pages =	 {26-54}
}

@book{HartmanisSt66,
author=   	{Hartmanis, J.\ and R. E. Stearns},
title=    	{Algebraic Structure Theory of Sequential Machines},
publisher=	{Prentice-Hall},
year=     	1966
}

@Article{HarunoShOo99,
  author = 	 {Masahiko Haruno and Satoshi Shirai and Yoshifumi Ooyama},
  title = 	 {Using Decision Trees to Construct a Practical Parser},
  journal = 	 {Machine Learning},
  year = 	 1999,
  volume =	 34,
  pages =	 {131-149}
}

@article{HashlamounVaSa94
,author=	{W. A. Hashlamoun and P. K. Varshney and V. N. S.
		 Samarasooriya}
,title=		{A tight upper bound on the {Bayesian} probability of
		 error}
}

@unknown{HastieTi95
,author=        {Trevor Hastie and Robert Tibshirani}
,title=         {Generalized additive models}
,year=          1995
}

@article{HastieTi98
,author=        {Trevor Hastie and Robert Tibshirani}
,title=         {Classification by pairwise coupling}
,journal=       annstat
,year=          1998
,volume=        26
,number=        2
,pages=         {451-471}
}

@Book{HastieTiFr01,
  author =	 {Trevor Hastie and Robert Tibshirani and Jerome Friedman},
  title = 	 {The Elements of Statistical Learning: Data Mining,
                  Inference, and Prediction},
  publisher = 	 {Springer Verlag},
  year = 	 2001
}

@techreport{HastiePr90
,author=	{Trevor Hastie and Daryl Pregibon}
,title=		{Shrinking Trees}
,year=		1990
,institution=	{AT\&T Bell Laboratories}
}

@inproceedings{Haussler86,
author=   	{Haussler, David},
title=    	{Quantifying the inductive bias in concept learning},
booktitle=  	{Proceedings  AAAI-86},
organization= 	{American Association for Artificial Intelligence},
year=     	1986,
month=		aug,
pages=		{485--489},
comment=  	{Defines bias=  Vapnik-Chervonenkis dimension. Algorithms for
		learning k-CNF, k-DNF. Many ideas for extensions, 
		generalizations.}
}

@inproceedings{Haussler87a,
author=   	{Haussler, David},
title=    	{Bias, Version Spaces and {Valiant's} Learning Framework},
booktitle=	{Proceedings of the Fourth International Workshop on
           	Machine Learning},
address=  	{University of California, Irvine},
year=     	1987,
month=    	Jun,
pages=	  	{324--336}
}

@techreport{Haussler87b,
author=   	{Haussler, David},
title=    	{Learning Conjunctive Concepts in Structural Domains},
institution= 	ucsccrl,
year=     	1987,
month=    	Feb,
number=   	{UCSC-CRL-87-1}
}

@article{Haussler88,
author = 	{Haussler, David},
title = 	{Quantifying Inductive Bias: {AI} Learning Algorithms and
		 {V}aliant's Learning Framework},
journal = 	{Artificial Intelligence},
year = 		1988,
volume = 	36,
pages = 	{177--221}
}

@techreport{Haussler88b,
author=   	{Haussler, David},
title=    	{Space Efficient Learning Algorithms},
institution= 	ucsccrl,
number=   	{UCSC-CRL-88-2},
year=     	1988,
month=    	Mar
}

@inproceedings{Haussler89,
author = 	{Haussler, David},
title = 	{Generalizing the {PAC} Model: Sample Size Bounds from
		 Metric Dimension-based Uniform Convergence Results},
booktitle=	focs89,
pages=		{40--45},
month=    	Oct,
year=		1989
}

@techreport{Haussler89b,
author = 	{Haussler, David},
title = 	{Generalizing the {PAC} Model for Neural Net and
		 Other Learning Applications},
institution= 	ucsccrl,
number=   	{UCSC-CRL-89-30},
year=     	1989,
month=    	Sep,
note=		{To appear, {\it Information and Computation}}
}

@inproceedings{Haussler90,
author=		{David Haussler},
title=		{Decision Theoretic Generalizations of the {PAC}
		 Learning Model},
booktitle=	{Proceedings of the First International Workshop on
		 Algorithmic Learning Theory},
year=		{1990},
pages=		{21--41},
comment=	{address=Tokyo, pub=Japanese Society for Artificial
		 Intelligence}
}

@inproceedings{Haussler90b,
title=		"Probably Approximately Correct Learning",
author=		"David Haussler",
booktitle=	"Proceedings of the 8th National Conference on Artificial
		 Intelligence",
publisher=	"Morgan Kaufmann",
year= 		1990,
pages=		"1101--1108"
}


@article{Haussler92,
author=		{David Haussler},
title=		{Decision Theoretic Generalizations of the {PAC} Model
		 for Neural Net and Other Learning Applications},
journal=	infcomp,
year=		1992,
volume=		100,
number=		1,
pages=		{78--150}
}

@article {Haussler97,
    AUTHOR = {Haussler, David},
     TITLE = {A general minimax result for relative entropy},
   JOURNAL = {IEEE Trans. Inform. Theory},
    VOLUME = 43,
      YEAR = 1997,
    NUMBER = 4,
     PAGES = {1276--1280},
      ISSN = {0018-9448},
}

@inproceedings{HausslerKeLiWa88,
author=   	{Haussler, David and Michael Kearns and Nick Littlestone and
	   	Manfred K. Warmuth},
title=    	{Equivalence of Models for Polynomial Learnability},
booktitle=	colt88,
month=    	Aug,
year=     	1988,
pages = 	{42--55},
note=		{Available as Technical
		 Report UCSC-CRL-88-06, University of California Santa
		 Cruz, Computer Research Laboratory.
		 To appear, {\it Information and Computation}},
comment=	{This reference is out of date -- use HKLW91}
}

@techreport{HausslerKeLiWa88b,
author=   	{Haussler, David and Michael Kearns and Nick Littlestone and
	   	Manfred K. Warmuth},
title=    	{Equivalence of Models for Polynomial Learnability},
number=		{UCSC-CRL-88-06},
institution=	ucsccrl,
month=		Sep,
year=		1988
}

@article{HausslerKeLiWa91,
author=   	{David Haussler and Michael Kearns and Nick Littlestone and
	   	Manfred K. Warmuth},
title=    	{Equivalence of Models for Polynomial Learnability},
journal=	infcomp,
month=    	dec,
year=     	1991,
volume=		95,
number=		2,
pages=		{129--161}
}

@incollection{HausslerKeOpSc92
,author=	{David Haussler and Michael Kearns and Manfred Opper
		 and Robert Schapire}
,title=		{Estimating average-case learning curves using
		 {B}ayesian, statistical physics and {VC} dimension
		 methods}
,booktitle=	{Advances in Neural Information Processing Systems 4}
,publisher=	{Morgan Kaufmann}
,pages=		{855--862}
,year=		1992
,editor=	{John E. Moody and Steve J. Hanson and Richard P.
		 Littmann}
}

@inproceedings{HausslerKeSc91,
author=		{David Haussler and Michael Kearns and Robert E. Schapire},
title=		{Bounds on the Sample Complexity of {B}ayesian Learning
		 Using Information Theory and the {VC} Dimension},
booktitle=	colt91,
month=		aug,
year=		1991
}

@article{HausslerKeSc94,
author=		{David Haussler and Michael Kearns and Robert E. Schapire},
title=		{Bounds on the Sample Complexity of {B}ayesian Learning
		 Using Information Theory and the {VC} Dimension},
journal=	ml,
volume=		14,
year=		1994,
pages=		{83--113}
}

@unpublished{HausslerKeSeTi94
,author=	{David Haussler and Michael Kearns and H. Sebastian
		 Seung and Naftali Tishby}
,title=		{Rigorous learning curve bounds from statistical
		 mechanics}
,year=		1994
}

@inproceedings{HausslerKiWa95
,author=	{David Haussler and Jyrki Kivinen and Manfred K. Warmuth}
,title=		{Tight worst-case loss bounds for predicting with
		 expert advice}
,booktitle=	eurocolt95
,year=		1995
,publisher=	{Springer-Verlag}
,pages=		{69--83}
}

@unpublished{HausslerLiWa87,
author = 	{Haussler, David and Nick Littlestone and
	   	Manfred K. Warmuth},
title = 	{Expected mistake bounds for on-line learning algorithms},
month=		Apr,
year=		1987,
note=		{Unpublished manuscript}
}

@inproceedings{HausslerLiWa88,
author = 	{Haussler, David and Nick Littlestone and
	   	Manfred K. Warmuth},
title = 	{Predicting $\{0,1\}$-Functions on Randomly Drawn Points},
booktitle = 	focs88,
month=		oct,
year = 		{1988},
pages = 	{100--109},
comment =	{Tech.\ Report, U. C. Santa Cruz, to appear (longer version).}
}

@article{HausslerLiWa94,
author = 	{Haussler, David and Nick Littlestone and
	   	Manfred K. Warmuth},
title = 	{Predicting $\{0,1\}$-Functions on Randomly Drawn Points},
journal=	infcomp,
volume=		115,
number=		2,
year=		1994,
pages = 	{248-292}
}

@Article{HausslerLo95,
  author = 	 {David Haussler and Philip M. Long},
  title = 	 {A generalization of {S}auer's lemma},
  journal = 	 {Journal of Combinatorial Theory, Series A},
  year = 	 1995,
  volume =	 71,
  number =	 2,
  pages =	 {219-240}
}

@article {HausslerOp97,
    AUTHOR = {Haussler, David and Opper, Manfred},
     TITLE = {Mutual information, metric entropy and cumulative relative
              entropy risk},
   JOURNAL = {The Annals of Statistics},
    VOLUME = 25,
      YEAR = 1997,
    NUMBER = 6,
     PAGES = {2451--2492},
      ISSN = {0090-5364},
      comment = {In this paper a stochastic model is chosen in a worst-case fashion}
}

@InCollection{HausslerOp98,
  author = 	 {Haussler, David and Opper, Manfred},
  title = 	 {Worst case prediction over sequences under log loss},
  booktitle = 	 {The Mathematics of Information Coding, Extraction and Distribution},
  publisher =	 {Springer Verlag},
  year =	 1998,
  editor =	 {G. Cybenko, D. O'Leary and J. Rissanen},
  comment = {Similar to HausslerOp97 but applies to worst case sequences}
}

@Article{HeckermanGeCh95,
  author = 	 {D. Heckerman and D. Geiger and D.M. Chickering},
  title = 	 {Learning Bayesian Networks: The combination of
		  knowledge and statistical data},
  journal = 	 {ml},
  year = 	 1995,
  volume =	 20,
  pages =	 {197-243}
}

@phdthesis{Hellerstein89,
author=		{Lisa Hellerstein},
title=		{On Characterizing and Learning Some Classes of
		 Read-Once Formulas},
school=		{University of California at Berkeley},
year=		1989
}

@techreport{HellersteinKa90,
author=		{Lisa Hellerstein and Marek Karpinski},
title=		{Read-Once Formulas over Different Bases},
year=		1990,
institution=	{University of Bonn},
number=		{8556-CS}
}

@unpublished{HellersteinWa??,
author=		{Lisa Hellerstein and Manfred Warmuth},
title=		{Interpolating {GF[2]} polynomials},
note=		{Unpublished manuscript}
}

@article{Hellman77,
author=  	{Martin E. Hellman},
title=   	{An Extension of the Shannon Theory Approach to Cryptography},
journal= 	{IEEE Transactions on Information Theory},
volume=  	{IT-23},
number=  	{3},
month=   	May,
year=    	1977,
pages=   	{289--294}
}

@inproceedings{HelmboldKiWa95
,author=	{David P. Helmbold and Jyrki Kivinen and Manfred
		 K. Warmuth}
,title=		{Worst-case loss bounds for sigmoided neurons}
,booktitle=	{Advances in Neural Information Processing Systems 7}
,year=		1995
,pages=		{309--315}
}

@inproceedings{HelmboldLo91,
author=		{David P. Helmbold and Philip M. Long},
title=		{Tracking Drifting Concepts Using Random Examples},
booktitle=	colt91,
month=		aug,
year=		1991,
pages=		{13--23}
}

@article{HelmboldLo94,
author=		{David P. Helmbold and Philip M. Long},
title=		{Tracking Drifting Concepts by Minimizing Disagreements},
journal=	ml,
volume=		14,
number=		1,
year=		1994,
pages=		{27--45}
}

@inproceedings{HelmboldSc95
,author=	{David P. Helmbold and Robert E. Schapire}
,title=		{Predicting nearly as well as the best pruning of a
		 decision tree}
,booktitle=	colt95
,pages=		{61-68}
,year=		1995
}

@Article{HelmboldSc97,
  author = 	 {David P. Helmbold and Robert E. Schapire},
  title = 	 {Predicting nearly as well as the best pruning of a
		 decision tree},
  journal = 	 ml,
  year = 	 1997,
  volume =	 27,
  number =	 1,
  pages =	 {51-68},
  month =	 apr
}

@inproceedings{HelmboldScSiWa95
,author=	{David P. Helmbold and Robert E. Schapire and Yoram
		 Singer and Manfred K. Warmuth}
,title=		{A comparison of new and old algorithms for a mixture
		 estimation problem}
,booktitle=	colt95
,pages=		{69-78}
,year=		1995
}

@inproceedings{HelmboldScSiWa96
,author=	{David P. Helmbold and Robert E. Schapire and Yoram
		 Singer and Manfred K. Warmuth}
,title=		{On-Line Portfolio Selection Using Multiplicative Updates}
,booktitle=	ml96
,pages=		{243-251}
,year=		1996
,note=          {Long version available from my web page}		  
}

@article{HelmboldScSiWa98
,author=	{David P. Helmbold and Robert E. Schapire and Yoram
		 Singer and Manfred K. Warmuth}
,title=		{On-Line Portfolio Selection Using Multiplicative Updates}
,journal=	{Mathematical Finance}
,pages=		{325-347}
,year=		1998
,volume=        8
,number=        4
}

@inproceedings{HelmboldSlWa89,
author=   	{Helmbold, David and Robert Sloan and Manfred K. Warmuth},
title=    	{Learning Nested Differences of Intersection-Closed
		 Concept Classes},
booktitle=	colt89,
month=    	Jul,
year=     	1989,
pages = 	{41--56}
}

@article{HelmboldSlWa90,
author=   	{Helmbold, David and Robert Sloan and Manfred K. Warmuth},
title=    	{Learning Nested Differences of Intersection-Closed
		 Concept Classes},
journal=	ml,
month=    	jun,
volume=		5,
number=		2,
year=     	1990,
pages = 	{165--196}
}

@article{HelmboldSlWa92,
author=		{David Helmbold and Robert Sloan and Manfred K. Warmuth},
title=		{Learning Integer Lattices},
journal=	sicomp,
volume=		21,
number=		2,
year=		1992,
pages=		{240--266}
}

@techreport{HelmboldWa92,
author=		{David P. Helmbold and Manfred K. Warmuth},
title=		{On Weak Learning},
institution=	ucsccrl,
number=		{UCSC-CRL-92-54},
year=		1992,
month=		dec,
note=		{Revised May, 1993}
}

@Article{HelmboldWa95,
  author = 	 {David P. Helmbold and Manfred K. Warmuth},
  title = 	 {On weak learning},
  journal = 	 jcss,
  year = 	 1995,
  volume =	 50,
  pages =	 {551-573}
}

@unpublished{HenisLeGo94
,author=	{Ealan A. Henis and Stephen E. Levinson and Allen L.
		Gorin}
,title=		{Mapping natural language and sensory information into
		manipulatory actions}
}

@book{Hennie68,
author=   	{Hennie, Frederick C.},
title=    	{Finite-State Models for Logical Machines},
publisher=	{John Wiley and Sons},
year=     	1968
}

@InProceedings{HerbsterWa95
,author=	{Mark Herbster and Manfred Warmuth}
,title=		{Tracking the best expert}
,booktitle=	ml95
,year=		1995
,pages =        "286--294"
,note=		{Long version to appear in {\it Machine Learning} and
		  available from {http://www.cse.ucsc.edu/$\sim$manfred}}
}

@book{Herstein75,
author=		{I. N. Herstein},
title=		{Topics in Algebra},
publisher=	{Wiley},
year=		{1975},
edition=	{Second}
}

@book{HertzKrPa91,
author=		{John Hertz and Anders Krogh and Richard G. Palmer},
title=		{Introduction to the Theory of Neural Computation},
publisher=	{Addison-Wesley},
year=		1991
}

@inproceedings{HillStRoFu95,
	author = "Will Hill and Larry Stead and Mark Rosenstein and
                  George Furnas",
	title = "Recommending and Evaluating Choices in a Virtual
		Community of Use",
	booktitle = "Human Factors in Computing Systems {CHI'95}
                  Conference Proceedings",
	pages = "194--201",
	year = 1995
}

@inproceedings{Hinton86,
author=   	{Hinton, Geoffrey E.},
title=    	{Learning Distributed Representations of Concepts},
booktitle= 	{Proceedings of the Eighth Annual Conference of the Cognitive
	   	Science Society},
address=   	{Amherst, Mass},
year=      	1986,
month=     	Aug,
pages=     	{???--???},
comment=   	{Uses a back-propagation learning procedure to learn relational
	   	data items.}
}

@article{HintonDaFrNe95
,author=	{Geoffrey E. Hinton and Peter Dayan and Brendan
		 J. Frey and Radford M. Neal}
,title=		{The ``Wake-Sleep'' algorithm for unsupervised neural
		 networks}
}

@techreport{HintonSeAc84,
author=   	{Hinton, Geoffrey E. and Terrence J. Sejnowski and David H. 
		Ackley},
title=    	{Boltzmann Machines: Constraint Satisfaction Networks that 
		Learn},
institution= 	{CMU Computer Science Department},
number=   	{CMS-CS-84-119},
year=     	1984,
month=    	May,
comment=  	{Uses simulated annealing to update propositional states in
	  	a connectionist Hopfield-like neural network.}
}

@techreport{HirschbergWhPeSiRoPaMa97
,author=        {Julia Hirschberg and Steve Whittaker and Fernando
                  Pereira and Amitabh Singhal and Aaron Rosenberg and
                  S. Parthasarathy and Ivan Magrin-Chagnolleau}
,title=         {Browsing and retrieval of speech in audio databases}
}

@inproceedings{HoKe96
,author=	{Tin Kam Ho and Eugene M. Kleinberg}
,title=		{Building projectable classifiers of arbitrary
		 complexity}
}

@article{Hoeffding56,
author= {W. Hoeffding},
title = {On the Distribution of the Number of Successes in Independent Trials},
journal = {Annals of Mathematical Statistics}, 
volume = 27, 
pages = {713-721}, 
year = 1956
}

@article{Hoeffding63,
author = 	{Wassily Hoeffding},
title = 	{Probability inequalities for sums of bounded random  
		variables},
journal = 	{Journal of the American Statistical Association},
year = 		{1963},
volume = 	{58},
number = 	{301},
pages = 	{13--30},
month = 	mar
}

@inproceedings{HoffgenSi92,
author = 	{Klaus-U. H\"offgen and Hans-U. Simon},
title = 	{Robust Trainability of Single Neurons},
booktitle=	colt92,
year=     	1992,
month=    	Jul,
pages=    	{428--439}
}

@InProceedings{Hofmann99,
  author = 	 {Thomas Hofmann},
  title = 	 {Probabilistic Latent Semantic Indexing},
  booktitle = 	 sigir99,
  year =	 1999
}

@book{Holland75,
author=		{John H. Holland},
title=		{Adaptation in Natural and Artificial Systems},
publisher=	{University of Michigan Press},
year=		1975,
comment=	{full title = ...: an introductory analysis with
		 applications to biology, control, and
		 artificial intelligence.  address= Ann Arbor}
}

@incollection{Holland84,
author=		{John H. Holland},
title=		{Genetic algorithms and adaptation},
booktitle=	{Adaptive Control of Ill-defined Systems},
editor=		{Oliver G. Selfridge and Edwina L. Rissland and
		 Michael A. Arbib},
year=		1984,
publisher=	{Plenum Press},
comment=	{address = New York}
}

@article{Holte93
,author=	{Robert C. Holte}
,title=		{Very simple classification rules perform well on most
		 commonly used datasets}
,journal=	ml
,volume=	11
,number=	1
,year=		1993
,pages=		{63-91}
}

@book{HonigMe84
,author=	{Michael L. Honig and David G. Messerschmitt}
,title=		{Adaptive Filters: Structures, Algorithms, and
		 Applications}
,year=		1984
,publisher=	{Kluwer Academic Publishers}
}

@incollection{Hopcroft71,
author=    	{Hopcroft, John},
title=     	{An $n\log(n)$ Algorithm for Minimizing States in a Finite 
	   	Automaton},
booktitle= 	{Theory of Machines and Computations},
editor=    	{Zvi Kohavi and Azaria Paz},
publisher= 	{Academic Press},
year=      	1971,
pages=     	{189--196}
}

@book{HopcroftUl79,
author = 	{John Hopcroft and Jeffrey Ullman},
title = 	{Introduction to Automata Theory, Languages, and Computation},
publisher = 	{Addison-Wesley},
year = 		{1979},
address = 	{Reading, MA}
}

@book{HornJo85
,author=	{Roger A. Horn and Charles R. Johnson}
,title=		{Matrix Analysis}
,publisher=	{Cambridge University Press}
,year=		1985
}

@inproceedings{HullPeSc96,
        author = {David Hull and Jan Pedersen and Hinrich Schutze},
        booktitle = sigir96,
        pages = {279--288},
        title = {Method Combination for Document Filtering},
        year = {1996}
}

@article{HyafilRi76,
author=    	{Hyafil, Laurent and Ronald L. Rivest},
title=     	{Constructing Optimal Binary Decision Trees is {NP}-Complete},
journal=   	{Information Processing Letters},
year=      	1976,
month=     	May,
volume=    	5,
number=    	1,
pages=     	{15--17},
comment=   	{Proof based on exact-cover by 3-subsets.}
}

@article{IrelandKu68,
author=   	{Ireland, C.T. and S. Kullback},
title=    	{Contingency tables with given marginals},
journal=  	{Biometrika},
year=     	1968,
volume=   	55,
number=   	1,
pages=    	{179--188},
comment=  	{Proves geometric convergence of Deming/Stephan procedure for
	   	computing maximum entropy solution.}
}

@article{IshikidaVa94,
	author = "T. Ishikida and P. Varaiya",
	title = "Multi-Armed Bandit Problem Revisited",
	journal = "Journal of Optimization Theory and Applications",
	number = 1,
	volume = 83,
	year = 1994,
	month = oct,
	pages = "113-154"}

@inproceedings{IttnerLeAh95,
    author = "David J. Ittner and David D. Lewis and David D. Ahn",
    title = "Text Categorization of Low Quality Images",
    booktitle = "Symposium on Document Analysis and Information Retrieval",
    organization = "ISRI; Univ. of Nevada, Las Vegas",
    address = "Las Vegas, NV",
    pages = "301--315",
    year = 1995
}


@inproceedings{IwayamaTo95
,author=        {Makoto Iwayama and Takenobu Tokunaga}
,title=         {Cluster-based text categorization: A comparison of
                  category search strategies}
,pages=         {273-281}
,booktitle=     sigir95
,year=          1995
}

@article{Izenman91,
author=		{Alan Julian Izenman},
title=		{Recent Developments in Nonparametric Density
		 Estimation},
journal=	{Journal of the American Statistical Association},
month=		mar,
year=		1991,
volume=		86,
number=		413,
pages=		{205--224}
}

@InProceedings{IyerLeScSiSi00,
  author = 	 {Raj D. Iyer and David D. Lewis and Robert
                  E. Schapire and Yoram Singer and Amit Singhal},
  title = 	 {Boosting for document routing},
  booktitle = 	 {Proceedings of the Ninth International Conference on
                  Information and Knowledge Management},
  year =	 2000
}

@techreport{JaakkolaJoSi93,
author=		{Tommi Jaakkola and Michael I. Jordan and Satinder P.
		 Singh},
title=		{On the convergence of stochastic iterative dynamic
		 programming algorithms},
institution=	{MIT Computational Cognitive Science},
number=		{9307},
month=		jul,
year=		1993
}

@inproceedings{JacksonCr96
,author=	{Jeffrey C. Jackson and Mark W. Craven}
,title=		{Learning sparse perceptrons}
,booktitle=	nips8
,pages=		{654-660}
,year=		1996
}

@unpublished{JacksonShSh9?
,author=        {Jeffrey Jackson and Eli Shamir and Clara Shwartzman}
,title=         {Learning with queries corrupted by classification
                  noise}
}

@book{Jacobson74,
author=		{Jacobson, Nathan},
title=		{Basic Algebra},
year=	  	1974,
volume=	  	1,
publisher= 	{W. H. Freeman and Company},
comment = 	{Classic abstract algebra text}
}

@article{Jamshidian92
,author=	{Farshid Jamshidian}
,title=		{Asympotitically optimal portfolios}
}

@inproceedings{Jantke84,
author=   	{Jantke, K. P.},
title=    	{Polynomial-time inference of general pattern languages},
booktitle=	{Proceedings of the Symposium of Theoretical Aspects of 
		Computer Science; Lecture Notes in Computer Science},
year=     	1984,
volume=   	166,
pages=    	{314-325},
publisher=	{Springer}
}

@article{Jaynes68,
author=   	{Jaynes, Edwin T.},
title=    	{Prior Probabilities},
journal=  	{IEEE Transactions on Systems Science and Cybernetics},
year=     	1968,
month=    	Sep,
volume=   	{SSC-4},
number=   	3,
pages=    	{227--241},
comment=  	{Presentation and justification for maximum-entropy procedure}
}

@article{Jaynes82,
author=   	{Jaynes, Edwin T.},
title=    	{On the Rationale of Maximum-Entropy Methods},
journal=  	{Proceedings of the IEEE},
volume=   	70,
number=  	9,
year=     	1982,
month=    	Sep,
pages=    	{939--952},
comment=  	{Justification for maximum-entropy methods, and comparison with
	   	full Bayesian and autoregressive models.}
}

@techreport{Jelinek83,
author=   	{Jelinek, Frederick},
title=    	{Markov Source Modeling of Text Generation},
institution=  	{IBM T.J. Watson Research Center},
year=     	1983,
comment=  	{Interpolates 1-, 2-, and 3-gram transistion probabilities.}
}

@article{JerrumSi89,
author=		{M. Jerrum and A. Sinclair},
title=		{Approximating the Permanent},
journal=	sicomp,
volume=		18,
year=		1989,
pages=		{1149--1178},
comment=	{fill in first names and journal number}
}

@article{JerrumSi93
,author=	{Mark Jerrum and Alistair Sinclair}
,title=		{Polynomial-time approximation algorithms for the
		 ising model}
,journal=	sicomp
,volume=	22
,number=	5
,pages=		{1087--1116}
,month=		oct
,year=		1993
}

@inproceedings{Joachims97
,author= 	"T. Joachims"
,title=		"A probabilistic analysis of the {R}ochhio algorithm with {TFIDF}
	for text categorization"
,booktitle=     ml97
,year=          1997
,pages=         {143--151}
}

@article{JogdeoSa68
,author=        {Kumar Jagdeo and S. M. Samuels}
,title=         {Monotone convergence of binomial probabilities and a
                  generalization of {R}amanujan's equation}
}

@article{Johnson79,
author=   	{Johnson, Rodney W.},
title=    	{Axiomatic Characterization of the Directed Divergences and 
		their Linear Combinations},
journal=  	{IEEE Transactions on Information Theory},
volume=   	{IT-25},
number=   	6,
year=     	1979,
month=    	Nov,
pages=    	{709--716},
comment=  	{Characterized by positivity, additivity, and finiteness.
	   	Directed divergence also called expected weight of evidence,
	   	cross-entropy, and discrimination information.}
}


@article{Johnson84,
author=		{David S. Johnson},
title=		{The {NP}-Completeness Column: An Ongoing Guide},
journal=	{Journal of Algorithms},
volume=		5,
number=		2,
month=		jun,
year=		1984,
pages=		{284--299}
}

@article{JohnsonSh83,
author=   	{Johnson, Rodney W. and John E. Shore},
title=    	{Comments on and Corrections to `Axiomatic Derivation of the
	   	Principle of Maximum Entropy and the Principle of Minimum
	   	Cross-Entropy},
journal=  	{IEEE Transactions on Information Theory},
year=     	1983,
month=    	Nov,
volume=   	{IT-29},
number=   	6,
pages=    	{942--943},
comment=  	{Corrects error in previous paper regarding discrete case.}
}

@Book{Jolliffe86,
  author =	 {I. T. Jolliffe},
  title = 	 {Principal Component Analysis},
  publisher = 	 {Springer-Verlag},
  year = 	 1986
}

@Article{Jones92,
  author = 	 {Lee K. Jones},
  title = 	 {A simple lemma on greedy approximation in {H}ilbert
                  space and convergence rates for projection pursuit
                  regression and neural network training},
  journal = 	 {Annals of Statistics},
  year = 	 1992,
  volume =	 20,
  number =	 1,
  pages =	 {608--613}
}

@article{Juang84,
author=   	{Juang, B.-H.},
title=    	{On the Hidden Markov Model and Dynamic Time Warping for Speech
		Recognition -- A Unified View},
journal=  	{AT&T Bell Laboratories Technical Journal},
year=     	1985,
month=    	Sep,
volume=   	63,
number=   	7,
pages=    	{1213--1242},
comment=  	{Gaussian autoregressive models; Markov models.  Baum's 
		forward-backward algorithm.  Computing all paths versus
		computing best path.}
}

@techreport{JuangChLe93
,author=	{B. H. Juang and Wu Chou and C. H. Lee}
,title=		{Statistical and discriminitive methods for speech
		recognition}
}

@techreport{Judd87,
author=   	{Judd, J. Stephen},
title=    	{Complexity of Connectionist Learning with Various Node 
		Functions},
institution=	{Department of Computer and Information Science, 
	     	University of Massachusetts at Amherst},
month=     	Jul,
year=     	1987,
number=    	{87-60},
note=      	{Also presented at the First IEEE International Conference on 
		Neural Networks, June 21--24, 1987, San Diego, California}
}


@phdthesis{Judd88,
author=   	{Judd, J. Stephen},
title=    	{Neural Network Design and the Complexity of Learning},
school=   	{University of Massachussets at Amherst, Department of
	   	Computer and Information Science},
year=     	1988
}

@book{Jumarie90
,author=	{Guy Jumarie}
,title=		{Relative Information: Theories and Applications}
,year=		1990
,publisher=	{Springer-Verlag}
}

@book{Kac59
,author=	{Mark Kac}
,title=		{Statistical Independence in Probability, Analysis and
		 Number Theory}
,publisher=	{John Wiley and Sons}
,year=		1959
}

@unpublished{KahnLiSa93
,author=	{Jeff Kahn and Nathan Linial and Alex Samorodintsky}
,title=		{Inclusion-exclusion: exact and approximate}
,year=		1993
,note=		{Manuscript}
}

@article{KalaiLe93
,author=	{Ehud Kalai and Ehud Lehrer}
,title=		{Rational learning leads to {Nash} equilibrium}
,year=		1993
}

@article{KalaiLe95
,author=	{Ehud Kalai and Ehud Lehrer}
,title=		{Subjective games and equilibria}
,year=		1995
}

@book{Kandel82,
author=		{Abraham Kandel},
title=		{Fuzzy Techniques in Pattern Recognition},
year=		1982,
publisher=	{Wiley}
}

@book{KapurKe92
,author=	{J. N. Kapur and H. K. Kesavan}
,title=		{Entropy Optimization Principles with Applications}
,year=		1992
,publisher=	{Academic Press}
}

@incollection{Karp72
,author=	{R. M. Karp}
,title=		{Reducibility among combinatorial problems}
,booktitle=	{Complexity of Computer Computations}
,publisher=	{Plenum Press}
,editor=	{R. E. Miller and J. W. Thatcher}
,pages=		{85-103}
,year=		1972
}


@Article{Katz87,
  author = 	 {Slava M. Katz},
  title = 	 {Estimation of probabilities from sparse data for
                   the language model component of a speech recognizer},
  journal = 	 {IEEE Transactions on Acoustics, Speech and
                  Signal Processing},
  year = 	 1987,
  volume =	 {ASSP-35},
  number =	 3,
  pages =	 {400-401},
  month =	 mar
}

@article{Kaufman87
,author=	{Linda Kaufman}
,title=		{Implementing and accelerating the {EM} algorithm for
		positron emission tomography}
}

@article{Kaufman93
,author=	{Linda Kaufman}
,title=		{Maximum likelihood, least squares, and the penalized
		least squares for {PET}}
}


@unpublished{Kearns88,
author=		{Michael Kearns},
title=		{Thoughts on Hypothesis Boosting},
year=		1988,
month=		Dec,
note=		{Unpublished manuscript}
}

@phdthesis{Kearns89,
author=   	{Michael Kearns},
title=    	{The Computational Complexity of Machine Learning},
school=   	{Harvard University},
year=     	1989,
month=    	may,
comment=     	{Technical Report TR-13-89,
		 Center for Research in Computing Technology}
}

@book{Kearns90,
author=   	{Michael Kearns},
title=    	{The Computational Complexity of Machine Learning},
year=     	1990,
publisher=	{MIT Press}
}

@inproceedings{Kearns93,
author=   	{Kearns, Michael},
title=    	{Efficient Noise-Tolerant Learning From Statistical
			Queries},
booktitle=	stoc93,
year=     	1993,
pages=    	{392--401}
}

@inproceedings{KearnsLi88,
author=   	{Kearns, Michael and Ming Li},
title=    	{Learning in the Presence of Malicious Errors},
booktitle= 	stoc88,
pages=		{267--280},
year=     	1988,
month=    	May,
note=		{To appear, {\it SIAM Journal on Computing}},
comment=  	{Studies pac learning in the presence of worst sort of errors}
}

@article{KearnsLi93,
author=   	{Kearns, Michael and Ming Li},
title=    	{Learning in the Presence of Malicious Errors},
journal=	sicomp,
pages=		{807--837},
year=     	1993,
month=		aug,
volume=		22,
number=		4
}

@inproceedings{KearnsLiPiVa87,
author=   	{Kearns, Michael and Ming Li and Leonard Pitt and Leslie 
		Valiant},
title=    	{On the Learnability of {B}oolean Formulae},
booktitle=	stoc87,
year=     	1987,
month=    	May,
pages=    	{285--295}
}

@inproceedings{KearnsLiPiVa87b,
author=   	{Kearns, Michael and Ming Li and Leonard Pitt and Leslie 
		Valiant},
title=    	{Recent Results on Boolean Concept Learning},
booktitle=	{Proceedings of the Fourth International Workshop on
           	Machine Learning},
address=  	{University of California, Irvine},
year=     	1987,
month=    	Jun,
pages=    	{337--352}
}

@inproceedings{KearnsMa96
,author=	{Michael Kearns and Yishay Mansour}
,title=		{On the Boosting Ability of Top-Down Decision Tree
		 Learning Algorithms}
,booktitle=	stoc96
,year=		1996
}

@inproceedings{KearnsMaNgRo95
,author=	{Michael Kearns and Yishay Mansour and Andrew Y. Ng
		 and Dana Ron}
,title=		{An experimental and theoretical comparison of model
		 selection methods}
,booktitle=	colt95
,year=		1995
}

@inproceedings{KearnsMaRoRuScSe94
,author=	{Michael Kearns and Yishay Mansour and Dana Ron and
		 Ronitt Rubinfeld and Robert E. Schapire and Linda
		 Sellie}
,title=		{On the learnability of discrete distributions}
,booktitle=	stoc94
,year=		1994
,pages=		{273--282}
}

@inproceedings{KearnsPi89,
author=   	{Kearns, Michael and Leonard Pitt},
title=    	{A Polynomial-time Algorithm for Learning $k$-variable
           	Pattern Languages from Examples},
booktitle= 	colt89,
year=      	1989,
month=     	Jul,
pages=     	{57--71}
}

@inproceedings{KearnsSc90,
author=		{Michael J. Kearns and Robert E. Schapire},
title=		{Efficient Distribution-free Learning of Probabilistic
		 Concepts},
booktitle=	focs90,
pages=		{382--391},
month=		oct,
year=		1990,
note=		{To appear, {\it Journal of Computer and System Sciences}}
}

@article{KearnsSc94
,author=	{Michael J. Kearns and Robert E. Schapire}
,title=		{Efficient Distribution-free Learning of Probabilistic
		 Concepts}
,journal=	jcss
,volume=	48
,number=	3
,year=		1994
,pages=		{464--497}
}

@inproceedings{KearnsScSe92,
author=		{Michael J. Kearns and Robert E. Schapire and Linda M.
		 Sellie},
title=		{Toward Efficient Agnostic Learning},
booktitle=	colt92,
year=		1992,
pages=		{341--352},
month=		jul,
note=		{To appear, {\it Machine Learning}}
}

@article{KearnsScSe94,
author=		{Michael J. Kearns and Robert E. Schapire and Linda M.
		 Sellie},
title=		{Toward Efficient Agnostic Learning},
journal=	ml,
volume=		17,
pages=		{115--141},
year=		1994
}

@techreport{KearnsVa88,
author = 	{Michael Kearns and Leslie G. Valiant},
title = 	{Learning {B}oolean Formulae or Finite Automata is as
		 Hard as Factoring},
institution = 	{Harvard University Aiken Computation Laboratory},
month=		Aug,
year = 		1988,
number = 	{TR-14-88}
}

@inproceedings{KearnsVa89,
author = 	{Michael Kearns and Leslie G. Valiant},
title = 	{Cryptographic Limitations on Learning {B}oolean
                 Formulae and Finite Automata},
booktitle=      stoc89,
month=          May,
year = 		1989,
pages = 	{433--444},
note=		{To appear, {\it Journal of the Association for Computing
		 Machinery}}
}

@article{KearnsVa94,
author = 	{Michael Kearns and Leslie G. Valiant},
title = 	{Cryptographic Limitations on Learning {B}oolean
                 Formulae and Finite Automata},
journal=	jacm,
month=          jan,
year = 		1994,
pages = 	{67--95},
volume=		41,
number=		1
}

@book{KearnsVa94b
,author=	{Michael J. Kearns and Umesh V. Vazirani}
,title=		{An Introduction to Computational Learning Theory}
,year=		1994
,publisher=	{MIT Press}
}

@techreport{KellyGl89,
author = 	{Kevin T. Kelly and Clark Glymour},
title = 	{Inductive Inference from Theory Laden Data},
institution = 	{CMU Laboratory for Computational Linguistics},
month = 	Oct,
year = 		1989,
number = 	{CMU-LCL-89-5}
}

@article{KesavanKa89,
author = 	{H. K. Kesavan and J. N. Kapur},
title = 	{The Generalized Maximum Entropy Principle},
journal = 	{IEEE Transactions on Systems, Man, and Cybernetics},
volume = 	19,
number = 	5,
year = 		1989,
month = 	{September/October},
pages = 	{1042--1052},
comment = 	{Generalizes to include a prior distribution, using 
		Kullback/Leibler Minimum Discrimination Information metric}
}

@techreport{KeshavLuPhReSa94
,author=	{S. Keshav and Carsten Lund and Steven Phillips and
		Nick Reingold and Huzur Saran}
,title=		{An empirical evaluation of virtual circuit holding
		time policies in {IP}-over-{ATM} networks}
}

@article{Khacian79,
title=    	{A Polynomial Algorithm for Linear Programming},
author=   	{Khacian, L. G.},
journal=  	{Soviet Math. Doklady},
volume=   	20,
pages=    	{191--194},
year=     	1979
}

@phdthesis{Kim83,
author=   	{Kim, Jin Hyung},
title=    	{CONVINCE: A Conversational Inference Consolidation Engine},
school=   	{University of California, Los Angeles},
year=     	1983,
comment=  	{Interactive decision support system using Pearl's Bayesian 
	   	networks}
}

@techreport{KivinenWa94
,author=	{Jyrki Kivinen and Manfred K. Warmuth}
,title=		{Additive Versus Exponentiated Gradient Updates 
                 For Learning Linear Functions}
,institution=	ucsccrl
,year=		1994
,number=	{UCSC-CRL-94-16}
}

@inproceedings{KivinenWa94b
,author=	{Jyrki Kivinen and Manfred K. Warmuth}
,title=		{Using experts for predicting continuous outcomes}
,booktitle=	eurocolt93
,year=		1994
,pages=		{109--120}
,publisher=	{Springer-Verlag}
}

@InProceedings{KivinenWa95,
  author = 	 {Jyrki Kivinen and Manfred K. Warmuth},
  title = 	 {Additive Versus Exponentiated Gradient Updates 
                 for Linear Prediction},
  booktitle = 	 {stoc95},
  year =	 1995,
  pages =	 {209-218},
  note =	 {See also technical report UCSC-CRL-94-16, University
		  of California, Santa Cruz, Computer Research Laboratory}
}

@Article{KivinenWa97,
  author = 	 {Jyrki Kivinen and Manfred K. Warmuth},
  title = 	 {Additive Versus Exponentiated Gradient Updates 
                 for Linear Prediction},
  journal = 	 {Information and Computation},
  year = 	 1997,
  volume =	 132,
  number =	 1,
  month =	 jan,
  pages =	 {1-64}
}

@InProceedings{KivinenWa99,
  author = 	 {Jyrki Kivinen and Manfred K. Warmuth},
  title = 	 {Boosting as entropy projection},
  booktitle = 	 colt99,
  pages =	 {134-144},
  year =	 1999
}

@Article{KivinenWa01,
  author = 	 {J. Kivinen and M. K. Warmuth},
  title = 	 {Relative loss bounds for multidimensional regression
                  problems},
  journal = 	 ml,
  year = 	 2001,
  pages=         {301-329},
volume=          45,
number=          3  
}

@article{KivinenWaAu??
,author=	{J. Kivinen and M. K. Warmuth and P. Auer}
,title=		{The {Perceptron} algorithm vs. {Winnow}: linear
		 vs. logarithmic mistake bounds when few input
		 variables are relevant}
}

@InProceedings{KlasnerSi95,
  author = 	 {Norbert Klasner and Hans Ulrich Simon},
  title = 	 {From Noise-Free to Noise-Tolerant and from On-line
                  to Batch Learning},
  booktitle = 	 colt95,
  pages =	 {250-264},
  year =	 1995
}

@article{Kleinberg90
,author=	{E. M. Kleinberg}
,title=		{Stochastic discrimination}
}

@article{Kleinberg96
,author=	{E. M. Kleinberg}
,title=         {An overtraining-resistant stochastic modeling method
                  for pattern recognition}
}

unpublished{Kleinberg9?
,author=	{E. M. Kleinberg}
,title=		{An overtraining-resistant stochastic modeling method
		 for pattern recognition}
}

@InProceedings{KleinYo99,
  author = 	 {Philip Klein and Neal Young},
  title = 	 {On the Number of Iterations for {Dantzig-Wolfe}
                  Optimization and Packing-Covering Approximation
                  Algorithms},
  booktitle = 	 {Proceedings of the Seventh Conference on Integer
                  Programming and Combinatorial Optimization},
  year =	 1999
}

@book{Knuth68,
author=   	{Knuth, Donald E.},
title=    	{The Art of Computer Programming: Fundamental Algorithms},
publisher=	{Addison-Wesley},
year=     	1968,
volume=   	1
}

@techreport{KnuthLaRo88,
author = 	{Donald E. Knuth and Tracy Larrabee and Paul M. Roberts},
title = 	{Mathematical Writing},
institution = 	{Stanford University Computer Science Department},
month = 	Jan,
year = 		1988,
number = 	{STAN-CS-88-1193}
}

@article{KoHu87,
author=   	{Ko, K. and Hua, C.},
title=    	{A note on the two-variable pattern-finding problem},
journal=  	jcss,
year=     	1987,
volume=   	34,
pages=    	{75--86}
}

@inproceedings{KoMaTz90,
author=		{Ko, Ker-I and Assaf Marron and Wen-Guey Tzeng},
title=		{Learning String Patterns and Tree Patterns from Examples},
booktitle=	{Proceedings of the Seventh International Conference on
		 Machine Learning},
year=		1990,
month=		Jun,
}

@unpublished{KoTz90,
author=		{Ko, Ker-I and Wen-Guey Tzeng},
title=		{Finding Common Patterns is Complete for the Second Level
		 of the Polynomial Time Hierarchy},
year=		1990,
note=		{Unpublished}
}

@book{KodratoffMi90,
title=		{Machine Learning: An Artificial Intelligence Approach},
editor=   	{Yves Kodratoff and Ryszard Michalski},
volume=		{III},
publisher=	{Morgan Kaufmann},
year=     	{1990}
}


@book{Kohavi78,
author=   	{Kohavi, Zvi},
title=    	{Switching and Finite Automata Theory},
publisher=	{McGraw-Hill},
year=     	{1978},
edition=  	{second},
comment=  	{Chapter on State-Identification and Fault-Detection 
		Experiments}
}

@inproceedings{KohaviKu97
,author=	{Ron Kohavi and Clayton Kunz}
,title=		{Option Decision Trees with Majority Votes}
,booktitle=     ml97
,year=          1997
,pages=         {161--169}
}

@inproceedings{KohaviWo96
,author=	{Ron Kohavi and David H. Wolpert}
,title=		{Bias plus variance decomposition for zero-one loss
		 functions}
,booktitle=     ml96
,year=          1996
,pages=         {275--283}
}

@Article{Kolaczyk96,
  author = 	 {Eric D. Kolaczyk},
  title = 	 {A Wavelet Shrinkage Approach to Tomographic Image
		  Reconstruction},
}

@inproceedings{KollerSa97,
	author = "D. Koller and M. Sahami",
	title = "Hierarchically classifying docuemnts using very few words",
	booktitle = ml97,
	pages = "171--178",
	year = 1997
}

@article{Kolmogorov68,
author=   	{Kolmogorov, Andrei N.},
title=    	{Logical Basis for Information Theory and Probability Theory},
journal=  	{IEEE Transactions on Information Theory},
volume=   	{IT-14},
number=   	5,
year=     	1968,
month=    	Sep,
pages=    	{662--664},
comment=  	{Definition of `Kolmogorov' complexity; analogy to information-
	   	theoretic entropy; notion of random sequences}
}

@inproceedings{KongDi95
,author=	{Eun Bae Kong and Thomas G. Dietterich}
,title=		{Error-Correcting Output Coding Corrects Bias and
		 Variance}
,booktitle=	ml95
,pages=		{313--321}
,year=		1995
}

@Article{KoltchinskiiPa02,
  author = 	 {V. Koltchinskii and D. Panchenko},
  title = 	 {Empirical margin distributions and bounding the
                  generalization error of combined classifiers},
  journal = 	 annstat,
  year = 	 2002,
  volume =	 30,
  number =	 1,
  month =	 feb
}

@InProceedings{KoltchinskiiPaLo01,
  author = 	 {Vladimir Koltchinskii and Dmitriy Panchenko and
                  Fernando Lozano},
  title = 	 {Some new bounds on the generalization error of
                  combined classifiers},
  booktitle = 	 nips13,
  year =	 2001
}

@InProceedings{KoltchinskiiPaLo01b,
  author = 	 {Vladimir Koltchinskii and Dmitriy Panchenko and
                  Fernando Lozano},
  title = 	 {Further Explanation of the Effectiveness of Voting
                  Methods: The Game Between Margins and Weights},
  booktitle = 	 colt01,
  pages =	 {241-255},
  year =	 2001
}

@incollection{Kononenko92
,author=	{Igor Kononenko}
,title=		{Combining decisions of multiple rules}
}

@unpublished{Koza95
,author=	{John R. Koza}
,title=		{A response to the {ML-95} paper entitled ``Hill
		 climbing beats genetic search on a {B}oolean circuit
		 synthesis problem of {K}oza's''}
}

@Article{KrichevsyTr81,
  author = 	 {R.E. Krichevsky and V.K. Trofimov},
  title = 	 {The performance of universal encoding},
  journal = 	 {ieeeit},
  year = 	 1981,
  volume =	 {IT-27},
  pages =	 {199--207},
  month =	 {March},
  note =	 {The paper where the add 1/2 rule is first shown to be optimal}
}

@article{Kugel77,
author=   	{Kugel, Peter},
title=    	{Induction, Pure and Simple},
journal=  	infctrl,
volume=   	35,
year=     	1977,
pages=    	{276--336}
}

@inproceedings{Kuhl??
,author=	{Patricia K. Kuhl}
,title=		{Infants' perception and representation of speech:
		 development of a new theory}
,comment=	{from pereira}
}

@inproceedings{KuipersBy88,
author=		{Benjamin J. Kuipers and Yung-Tai Byun},
title=		{A Robust, Qualitative Approach to a Spatial Learning
		 Mobile Robot},
booktitle=	{SPIE Advances in Intelligent Robotics Systems},
month=		nov,
year=		1988
}

@article{Kullback68,
author=   	{Kullback, S.},
title=    	{Probability Densities with Given Marginals},
journal=  	{Annals of Mathematical Statistics},
year=     	1968,
volume=   	39,
number=   	4,
pages=    	{1236--1243},
comment=  	{Extension of Ireland/Kullback results to continuous 
		densities.}
}

@article{Kullback71,
author=   	{Kullback, S.},
title=    	{Marginal Homogeneity of Multidimensional Contingency Tables},
journal=  	{Annals of Mathematical Statistics},
year=     	1971,
volume=   	42,
number=   	2,
pages=    	{594--606},
comment=  	{Uses maximum entropy solution to derive minimum discrimination
	   	information statistic.}
}

@inproceedings{KushilevitzMa91,
author=		{Eyal Kushilevitz and Yishay Mansour},
title=		{Learning Decision Trees using the {F}ourier Spectrum},
booktitle=	stoc91,
month=		may,
year=		1991,
pages=		{455--464}
}

@article{KushilevitzMa93
,author=	{Eyal Kushilevitz and Yishay Mansour}
,title=		{Learning Decision Trees using the {Fourier} Spectrum}
,journal=	sicomp
,volume=	22
,number=	6
,pages=		{1331--1348}
,year=		1993
}

@incollection{KwokCa90
,author=	{Suk Wah Kwok and Chris Carter}
,title=		{Multiple decision trees}
,booktitle=	{Uncertainty in Artificial Intelligence 4}
,editor=	{Ross D. Shachter and Tod S. Levitt and Laveen N.
		 Kanal and John F. Lemmer}
,year=		1990
,publisher=	{North-Holland}
,pages=		{327--335}
}

@inproceedings{Lafferty99,
	author = "John Lafferty",
	title = "Additive Models, Boosting and Inference for
		Generalized Divergences",
	booktitle = colt99,
    pages = {125-133},
	year = 1999
}

@inproceedings{LaffertyDeDe97
,author=        {John D. Lafferty and Stephen Della Pietra and Vincent
                  Della Pietra}
,title=         {Statistical learning algorithms based on {Bregman}
                  distances}
,booktitle=     {Proceedings of the Canadian Workshop on Information Theory}
,year=          1997
}

@article{LaiRo85
,author=   	{T. L. Lai and Herbert Robbins}
,title=		{Asymptotically Efficient Adaptive Allocation Rules}
,journal=       {Advances in Applied Mathematics}
,volume=        {6}
,pages=         {4--22}
,year=          {1985}
}

@article{LaiYa95
,author=   	{Tze-Leung Lai and Sidney Yakowitz}
,title=		{Machine Learning and Nonparametric Bandit Theory}
,journal=       {IEEE Transactions on Automatic Control}
,volume=        {40}
,pages=         {1199--1209}
,number=	7
,month=		jul
,year=          {1995}
}

@book{Laird88,
author = 	{Philip D. Laird},
title = 	{Learning from Good and Bad Data},
publisher = 	{Kluwer Academic Publishers},
year = 		{1988},
series = 	{Kluwer international series in engineering and computer 
		science},
address = 	{Boston},
comment = 	{His PhD thesis in book form}
}

@techreport{LairdGa88,
author=  	{Laird, Philip and Evan Gamble},
title=   	{Learning a Probability Distribution Efficiently and Reliably},
institution= 	{NASA Ames Research Center},
month=  	oct,
year=		1988
}

@article{LairdNeRo87,
author = 	{Laird, J.E. and A. Newell and P.S. Rosenbloom},
title = 	{SOAR: An architecture for General Intelligence},
journal = 	{Artificial Intelligence},
year = 		1987,
month = 	Sep,
volume = 	33,
number = 	1,
pages = 	{1--64}
}

@inproceedings{LairdRoNe84,
author=		{Laird, John and Paul Rosenbloom and Allen Newell},
title=		{Towards Chunking as a General Learning Mechanism},
booktitle=	{Proceedings  AAAI-84},
orgainzation=	{American Association for Artificial Intelligence},
year=		1984,
month=		aug,
pages=		{188--192}
}

@article{LairdRoNe86,
author=		{Laird, John and Paul Rosenbloom and Allen Newell},
title=		{Chunking in {S}oar: the anatomy of a general learning 
		mechanism},
journal=	ml,
volume=		1,
number=		1,
pages=		{11--46},
year=		1986
}

@inproceedings{Lang92,
author=		{Kevin J. Lang},
title=		{Random {DFA}'s can be approximately learned from
		 sparse uniform examples},
booktitle=	colt92,
year=		1992,
month=		jul,
pages=		{45--52}
}


@inproceedings{Lang95
,author = "K. Lang"
,title = "Newsweeder: Learning to Filter Netnews"
,booktitle = ml95
,year = 1995
,pages = "331--339"
}

@article{LatteuxRo84,
author = 	{M. Latteux and G. Rozenberg},
title = 	{Commutative One-Counter Languages Are Regular},
journal = 	jcss,
year = 		{1984},
volume = 	{29},
number = 	{1},
pages = 	{54--57},
month = 	aug,
comment = 	{Gives a nice characteriztion of commutative regular languages}
}

@article{LeCunBoBeHa98
,author=         {Yann LeCun and L\'eon Bottou and Yoshua Bengio and
                  Patrick Haffner}
,title=          {Gradient-Based Learning Applied to Document
                  Recognition}
,year=           1998
}

@InProceedings{LebanonLa02,
  author = 	 {Guy Lebanon and John Lafferty},
  title = 	 {Boosting and maximum likelihood for exponential models},
  booktitle = 	 nips14,
  year =	 2002
}

@InProceedings{LeCunEtAl95,
  author = 	 {LeCun, Y. and Jackel, L. D. and Bottou, L. and
		  Brunot, A. and Cortes, C. and Denker, J. S. and
		  Drucker, H. and Guyon, I. and Muller, U. A. and
		  Sackinger, E. and Simard, P. and Vapnik, V},
  title = 	 {Comparison of learning algorithms for handwritten
		  digit recognition},
  booktitle = 	 {International Conference on Artificial Neural Networks},
  year =	 1995,
  pages =	 {53-60}
}

@phdthesis{Lee88,
author=	  	{Lee, Kai-Fu},
title=	  	{Large-Vocabulary Speaker-Independent Continuous
	  	Speech Recognition: The SPHINX System},
school=	  	{Carnegie Mellon University Computer Science Dept.},
year=	  	1988,
month=	  	apr,
note=	  	{Tech report number CMU-CS-88-148}
}

@Article{LeeBaWi96,
  author = 	 {Wee Sun Lee and Peter L. Bartlett and Robert C. Williamson},
  title = 	 {Efficient agnostic learning of neural networks with
                  bounded fan-in},
  journal = 	 ieeeit,
  year = 	 1996,
  volume =	 42,
  number =	 6,
  pages =	 {2118--2132}
}


@Article{LeeBaWi??,
  author = 	 {Wee Sun Lee and Peter L. Bartlett and Robert C. Williamson},
  title = 	 {The importance of convexity in learning with squared loss},
  journal = 	 ieeeit,
  year = 	 {to appear}
}

@article{LeeMa88,
author=	  	{Kai-Fu Lee and Sanjoy Mahajan},
title=    	{A Pattern Classification Approach to Evaluation Function 
		Learning},
journal=  	{Artificial Intelligence},
year=     	1988,
month=    	Aug,
volume=   	36,
number=   	1,
pages=    	{1--26},
comment=  	{Learns Othello evaluation function based on four features and
           	multivariate normal distribution assumption.}
}

@Article{LeeSe99,
  author = 	 {D. D. Lee and H. S. Seung},
  title = 	 {Learning the parts of objects with nonnegative
                  matrix factorization},
  journal = 	 {Nature},
  year = 	 1999,
  volume =	 401,
  pages =	 788
}

@InProceedings{LeeSe01,
  author = 	 {Daniel D. Lee and H. Sebastian Seung},
  title = 	 {Algorithms for non-negative matrix factorization},
  booktitle = 	 nips13,
  year =	 2001
}

@article{Lemmer83,
author=   	{Lemmer, John F.},
title=    	{Generalized Bayesian updating of incompletely specified
	   	distributions},
journal=  	{Large Scale Systems},
year=     	1983,
volume=   	5,
pages=    	{51--68},
comment=  	{Derives sufficient conditions for when marginals are 
		consistent with some underlying distribution.}
}

@article{LenstraLeLo82,
author = 	{A. K. Lenstra and H. W. Lenstra and L. Lov\'{a}sz},
title = 	{Factoring Polynomials with Rational Coefficients},
journal = 	{Mathematische Annalen},
year = 		{1982},
volume = 	{261},
pages = 	{515--534}
}

@techreport{Levin80,
author=   	{Levin, Leonid, A.},
title=    	{A Concept of Independence with Applications in Various Fields
		of Mathematics},
institution=	mitlcs,
year=     	1980,
month=    	Apr,
number=   	{MIT/LCS/TR-235}
}

@inproceedings{Levin84,
author=   	{Leonid A. Levin},
title=    	{Problems, complete in ``average'' instance},
booktitle=	stoc84,
year=     	1984,
month=    	apr,
pages=    	{465}
}

@article{Levin86,
author=		{Leonid A. Levin},
title=		{Average Case Complete Problems},
journal=	{SIAM Journal of Computing},
volume=		15,
number=		1,
year=		1986,
month=		feb,
pages=		{285--286},
}

@article{LevinsonRaSo83,
author=   	{S. E. Levinson and L. R. Rabiner and M. M. Sondhi},
title=    	{An Introduction to the Application of the Theory of 
		Probabilistic Functions
           	of a Markov Process to Automatic Speech Recognition},
journal=  	{Bell System Technical Journal},
year=     	1983,
month=    	Apr,
volume=   	62,
number=   	4,
pages=    	{1035--1074},
comment=  	{Analysis of implementation of Hidden Markov models and the 
		Baum-Welch algorithm}
}

@TechReport{Lewis92,
    Author="David Lewis",
    Institution="Computer Science Dept., University of
        Massachusetts at Amherst",
    Title="Representation and Learning in Information Retrieval",
    Year="1992",
    Number="91-93",
    Note="PhD Thesis"
}

@unpublished{Lewis95
,author=	{David D. Lewis}
,title=		{Evaluation and optimizing autonomous text
		 classification systems}
}

@inproceedings{LewisCa94,
    Author = "David Lewis and Jason Catlett",
    Title = "Heterogeneous Uncertainty Sampling for Supervised Learning",
    Booktitle=ml94,
    Year=1994
}

@inproceedings{LewisGa94,
    Author = "David Lewis and William Gale",
    Title = "Training text classifiers by  uncertainty sampling",
    Booktitle="Seventeenth Annual International ACM SIGIR
        Conference on Research and Development in Information Retrieval",
    Year=1994
}

@inproceedings{LewisRi94
,author=        {David D. Lewis and Marc Ringuette}
,title=         {A comparison of two learning algorithms for text
                  categorization}
,booktitle=     {Third Annual Symposium on Document Analysis and
                  Information Retrieval}
,year=          1994
,pages=         {81-93}
}


@InProceedings{LewisScCaPa96,
  author = 	 {David D. Lewis and Robert E. Schapire and James
                  P. Callan and Ron Papka},
  title = 	 {Training algorithms for linear text classifiers},
  booktitle = 	 {SIGIR '96: Proceedings of the 19th Annual
                  International Conference on Research and Development
                  in Information Retrieval},
  year =	 1996
}

@Book{LiVit93,
  author = 	 {Ming Li and Paul Vit\'{a}nyi},
  title = 	 {An introduction to Kolmogorov complexity and its applications},
  publisher = 	 {Springer-Verlag},
  year = 	 1993,
  series =	 {Texts and Monogaraphs in Computer Science}
}

@article{Li94
,author=	{Shuhe Li}
,title=		{Dynamic stability and learning processes in
		 {$2\times 2$} coordination games}
}

@PhdThesis{Li99,
  author = 	 {Qiang (Jonathan) Li},
  title = 	 {Estimation of Mixture Models},
  school = 	 {Yale University},
  year = 	 1999,
  month =	 {May},
  annote =	 {Student of Andrew Barron}
}

@inproceedings{Liggett96
,author=	{Thomas M. Liggett}
,title=		{Stochastic models of interacting systems}
}

@inproceedings{LinialMaNi89,
author =  	{Nathan Linial and Yishay Mansour and Noam Nisan},
title=		{Constant depth circuits, {F}ourier Transform, and 
		 Learnability},
booktitle = 	focs89,
month=    	Oct,
year=		1989,
pages = 	{574--579}
}

@article{LinialMaNi93,
author =  	{Nathan Linial and Yishay Mansour and Noam Nisan},
title=		{Constant depth circuits, {F}ourier Transform, and 
		 Learnability},
journal = 	jacm,
month=    	jul,
year=		1993,
volume=		40,
number=		3,
pages = 	{607--620}
}

@inproceedings{LinialMaRi88,
author =  	{Nathan Linial and Yishay Mansour and Ronald L. Rivest},
title = 	{Results on Learnability and the {V}apnik-{C}hervonenkis
		Dimension}, 
booktitle = 	focs88,
year = 		1988,
month =		oct,
pages = 	{120--129}
}

@inproceedings{LinialMaRi88b,
author =  	{Nathan Linial and Yishay Mansour and Ronald L. Rivest},
title = 	{Results on Learnability and the {V}apnik-{C}hervonenkis
		Dimension}, 
booktitle = 	colt88,
publisher = 	{Morgan-Kaufmann},
year = 		1989,
pages = 	{56--68}
}

@article{LinialMaRi91,
author =  	{Nathan Linial and Yishay Mansour and Ronald L. Rivest},
title = 	{Results on Learnability and the {V}apnik-{C}hervonenkis
		Dimension}, 
journal=	infcomp,
volume=		90,
number=		1,
month=		jan,
year = 		1991,
pages = 	{33--49}
}

@article{Lippmann87b,
author=   	{Lippmann, Richard P.},
title=    	{An Introduction to Computing with Neural Nets},
journal=  	{IEEE ASSP Magazine},
year=     	1987,
month=    	Apr,
pages=    	{4--22},
comment=  	{Good survey article}
}

@inproceedings{Littlestone87,
author=   	{Littlestone, Nick},
title=    	{Learning when Irrelevant Attributes Abound},
booktitle=	focs87,
year=     	1987,
month=    	Oct,
pages=    	{68--77}
}

@article{Littlestone88,
author=   	{Littlestone, Nick},
title=    	{Learning when Irrelevant Attributes Abound: A New
 		Linear-threshold Algorithm},
journal=  	ml,
volume=   	2,
pages=    	{285--318},
year=     	1988
}

@phdthesis{Littlestone88b,
author = 	{Nick Littlestone},
title  = 	{Mistake bounds and logarithmic linear-threshold 
		learning algorithms},
school = 	{U. C. Santa Cruz},
year = 		{1989},
month = 	mar
}

@phdthesis{Littlestone89,
author = 	{Nick Littlestone},
title  = 	{Mistake bounds and logarithmic linear-threshold 
		learning algorithms},
school = 	{U. C. Santa Cruz},
year = 		{1989},
month = 	mar
}

@inproceedings{Littlestone89b,
author=   	{Nick Littlestone},
title=    	{From On-line to Batch Learning},
booktitle= 	colt89,
year=      	1989,
month=     	Jul,
pages=     	{269--284}
}

@inproceedings{LittlestoneLoWa91,
author=		{Nicholas Littlestone and Philip M. Long and Manfred
		 K. Warmuth},
title=		{On-Line Learning of Linear Functions},
year=		1991,
month=		may,
booktitle=	stoc91,
pages=		{465--475}
}

@article{LittlestoneLoWa95,
author=		{Nicholas Littlestone and Philip M. Long and Manfred
		 K. Warmuth},
title=		{On-Line Learning of Linear Functions},
journal=	{Computational Complexity},
volume=		5,
number=		1,
year=		1995,
pages=		{1-23}
}

@unpublished{LittlestoneWa87,
author = 	{Littlestone, Nick and Manfred Warmuth},
title = 	{Relating Data Compression and Learnability},
note = 		{Unpublished manuscript},
month=		nov,
year = 		1987
}

@unpublished{LittlestoneWa89,
author = 	{Manfred Warmuth and Nick Littlestone},
title = 	{Learning from an adversary},
note = 		{In preparation},
year = 		{1989}
}

@inproceedings{LittlestoneWa89b,
author = 	{Nick Littlestone and Manfred Warmuth},
title = 	{The Weighted Majority Algorithm},
booktitle=	focs89,
pages=		{256--261},
month=    	Oct,
year=		1989
}

@article{LittlestoneWa94
,author = 	{Nick Littlestone and Manfred K. Warmuth}
,title = 	{The Weighted Majority Algorithm}
,journal=	infcomp
,volume=	108
,year=		1994
,pages=		{212--261}
}

@book{Lovasz79,
author=		{L. Lov\'asz},
title=		{Combinatorial Problems and Exercises},
publisher=	{North-Holland},
year=		{1979}
} 

@book{LuceRa57
,author=	{R. Duncan Luce and Howard Raiffa}
,title=		{Games and Decisions}
,publisher=	{John Wiley \& Sons}
,year=		1957
}

@unpublished{LundPhRe94
,author=	{Carsten Lund and Steven Phillips and Nick Reingold}
,title=		{Adaptive holding policies for {IP} over {ATM}
		networks}
}

@PhdThesis{Mackay91,
  author = 	 {David J. C. MacKay},
  title = 	 {Bayesian Methods for Adaptive Models},
  school = 	 {California Institute of Technology},
  year = 	 1991
}

@InProceedings{MaclinOp97,
  author = 	 {Richard Maclin and David Opitz},
  title = 	 {An Empirical Evaluation of Bagging and Boosting},
  booktitle = 	 aaai97,
  year =	 1997,
  pages =	 {546-551}
}

@unpublished{Macskassy98
,author=         {Sofus Macskassy}
,title=          {A comparison of two on-line algorithms that adapt to
                  concept drift}
}

@book{MadalaIv94
,author=	{Hema R. Madala and Alexy G. Ivakhnenko}
,title=		{Inductive Learning Algorithms for Complex Systems
		 Modeling}
,year=		1994
,publisher=	{CRC Press}
}

@book{MagnusKaSo66,
author = 	{Wilhelm Magnus and Abraham Karrass and Donald Solitar},
title = 	{Combinatorial Group Theory:  Presentation of Groups in
		Terms of Generators and Relations},
publisher = 	{John Wiley \& Sons},
year = 		{1966},
address = 	{New York},
comment= 	{Discusses word problem and Cayley graph of a group}
}

@article{MakhoulRoGi85,
author=   	{Makhoul, John and Salim Roucos and Herbert Gish},
title=    	{Vector Quantization in Speech Coding},
journal=  	{Proceedings of the IEEE},
volume=   	73,
number=   	11,
month=    	Nov,
year=     	1985,
pages=    	{1551--1588},
comment=  	{Excellent overview and introduction to vector quantization.}
}

@InProceedings{MaltzahnRiGrMa99,
  author = 	 {Carlos Maltzahn and Kathy Richardson and Dirk
                  Grunwald and James Martin},
  title = 	 {On Bandwidth Smoothing},
  booktitle = 	 {Fourth International Web Caching Workshop},
  year =	 1999
}

@unpublished{Mansour90,
author=		{Yishay Mansour},
title=		{Learning via {F}ourier transform},
month=		apr,
year=		1990,
note=		{Unpublished manuscript}
}

@inproceedings{Mansour92,
author=		{Yishay Mansour},
title=		{Randomized interpolation and approximation of sparse
		 polynomials},
booktitle=	icalp92,
month=		jul,
year=		1992,
pages=		{261--272}
}

@InProceedings{Mansour97,
  author = 	 {Yishay Mansour},
  title = 	 {Pessimistic decision tree pruning based on tree size},
  booktitle = 	 {ml97},
  year =	 1997,
  pages =	 {195-201}
}

@inproceedings{MargineantuDi97,
author =	{Dragos D. Margineantu and Thomas G. Dietterich},
title =		{Pruning Adaptive Boosting},
booktitle =	ml97,
pages =		{211--218},
year =		1997
}

@article{Marimon93
,author=	{Ramon Marimon}
,title=		{Adaptive learning, evolutionary dynamics and
		 equilibrium selection in games}
,journal=	{European Economic Review}
,volume=	37
,year=		1993
,pages=		{603--611}
}

@article{Marron87,
author=   	{Marron, Assaf and Ker-I Ko},
title=    	{Identification of Pattern Languages from Examples and 
		Queries},
journal=  	infcomp,
volume=   	74,
number=   	2,
year=     	1987,
month=    	Aug,
pages=    	{91--112}
}

@techreport{Marroquin85,
author=   	{Marroquin, Jose Luis},
title=    	{Probabilistic Solution of Inverse Problems},
institution=  	{MIT AI Laboratory},
year=     	1985,
month=    	Sep,
number=   	{AI-TR-860},
comment=  	{Vision problems, Markov Random Fields, simulation techniques 
		similar to simulated annealing used for Bayesian estimation.}
}

@TechReport{MasonBaBa98,
  author = 	 {Llew Mason and Peter Bartlett and Jonathan Baxter},
  title = 	 {Direct optimization of margins improves
                  generalization in combined classifiers},
  institution =  {Deparment of Systems Engineering, Australian
                  National University},
  year = 	 1998
}

@InProceedings{MasonBaBa00,
  author = 	 {Llew Mason and Peter Bartlett and Jonathan Baxter},
  title = 	 {Direct optimization of margins improves
                  generalization in combined classifiers},
  booktitle = 	 nips12,
  year =	 2000
}

@TechReport{MasonBaBaFr99
,author=    {Llew Mason and Jonathan Baxter and Peter Bartlett and Marcus Frean}
,title=     {Doom {II}}
,institution= {Deparment of Systems Engineering, Australian
               National University}
,year=      1999
,note=      "(Available from http://syseng.anu.edu.au/lsg)"
}

@InCollection{MasonBaBaFr99b,
  author = 	 {Llew Mason and Jonathan Baxter and Peter Bartlett and Marcus Frean},
  title = 	 {Functional Gradient Techniques for Combining Hypotheses},
  booktitle = 	 {Advances in Large Margin Classifiers},
  publisher =	 {MIT Press},
comment= {editor=          {Alexander J. Smola and Peter J. Bartlett and
                  Bernhard Sch\"olkopf and Dale Schuurmans}},
  year =	 1999
}

@inproceedings{MasonBaBaFr00,
	author = {Llew Mason and Jonathan Baxter and Peter Bartlett and Marcus Frean},
	title = {Boosting Algorithms as Gradient Descent},
	booktitle = nips12,
	year = 2000
}

@article{MasonBaGo02,
  author    = {Llew Mason and
               Peter L. Bartlett and
               Mostefa Golea},
  title     = {Generalization Error of Combined Classifiers.},
  journal   = jcss,
  volume    = {65},
  number    = {2},
  year      = {2002},
  pages     = {415-438}
}

@unpublished{Matan??
,author=	{Ofer Matan}
,title=		{On-Site Learning}
}

@unpublished{MatanSo??
,author=	{Ofer Matan and Sara Solla}
,title=		{Some notes on {S}chapire's boosting algorithm}
}

@mastersthesis{Mataric90,
author=		{Maja J. Mataric},
title=		{A Distributed Model for Mobile Robot
		 Environment-Learning and Navigation},
year=		1990,
month=		may,
school=		mit,
note=		{Technical Report AI-TR 1228, MIT Artificial
		 Intelligence Laboratory}
}

@article{Matthews91,
author=		{P. Matthews},
title=		{Generating a Random Linear Extension of a Partial
		 Order},
journal=	anprob,
volume=		19,
number=		3,
year=		1991,
pages=		{1367--1392}
}

@article{MazurHa78
,author=	{James E. Mazur and Reid Hastie}
,title=		{Learning as accumulation: a reexamination of the
		 learning curve}
,year=		1978
}

@InProceedings{McAllester98,
  author = 	 {David A. McAllester},
  title = 	 {Some {PAC-Bayesian} Theorems},
  year = 	 1998,  	  
  booktitle = 	 colt98
}

@inproceedings{McAllester99,
	author={David McAllester},
	title={{PAC-Bayesian} Model Averaging},
	booktitle=colt99,
	year={1999}}

@inproceedings{McallesterSc00,
	author={David McAllester and Robert Schapire},
	title={On the Convergence Rate of {Good-Turing} Estimators},
	year={2000},
	booktitle=colt00
}

@article{McCarthy56,
author=   	{McCarthy, John},
title=    	{Measures of the Value of Information},
journal=  	{Proceedings of the National Academy of Sciences},
year=     	1956,
volume=   	42,
pages=    	{654--655},
comment=  	{How to pay a weatherman to make honest predictions. 
		Generalizes rule that pays him log(Pi) if event predicted with 
		prob Pi happens.}
}

@inproceedings{McCarthy58,
author=   	{McCarthy, John},
title=    	{Programs with common sense},
booktitle=	{Proceedings of the Symposium on the Mechanization of Thought
           	Processes},
organization=	{National Physical Laboratory},
year=     	1958,
volume=   	1,
pages=    	{77-84},
note=     	{Reprinted in Minsky's (ed.) {\em Semantic Information 
		Processing}, MIT Press(1968), 403--409}
}

@book{McClellandRu88,
editor=   	{McClelland, James L. and Rumelhart, David E.},
title=    	{Explorations in Parallel Distributed Processing: A Handbook
		of Models, Programs, and Exercises},
publisher=	{MIT Press},
year=     	1988,
comment=  	{Contains example of back-prop never converging on 1:1:1 
		network}
}

@Book{McCullaghNe90,
  author =	 {P. McCullagh and J. A. Nelder},
  title = 	 {Generalized Linear Models},
  publisher = 	 {CRC Press},
  year = 	 1990,
  edition =	 {2nd}
}

@InCollection{McDiarmid89,
  author = 	 {Colin McDiarmid},
  title = 	 {On the method of bounded differences},
  booktitle = 	 {Surveys in Combinatorics 1989},
  pages =	 {148-188},
  publisher =	 {Cambridge University Press},
  year =	 1989
}

@Misc{McJones97,
	author = "Paul McJones",
	howpublished = "DEC Systems Research Center",
	title = "EachMovie collaborative filtering data set",
	note = "http:/$\!$/www.research.digital.com/SRC/eachmovie/",
	year = "1997"
}

@article{McKean85
,author=	{Kevin McKean}
,title=		{Decisions, decisions}
,year=		1985
}

@article{Megiddo80
,author=   	{N. Megiddo}
,title=		{On repeated games with incomplete information played
		 by non-{Bayesian} players}
,journal=       {International Journal of Game Theory}
,volume=        {9}
,number=        {3}
,pages=         {157--167}
,year=          {1980}
}

@techreport{Megiddo86,
author=   	{Megiddo, Nimrod},
title=    	{On The Complexity of Polyhedral Separability},
institution= 	{IBM Almaden Research Center},
year=     	1986,
month=    	Aug,
number=   	{RJ 5252}
}

@article{MegiddoVi88,
author=		{Megiddo, Nimrod and Uzi Vishkin},
title=		{On Finding a Minimum Dominating Set in a Tournament},
journal=	{Theoretical Computer Science},
year=		1988,
month=          Nov,
volume=		61,
number=		{2-3},
pages=		{307--316},
comment=  	{A problem that is equiv. to CNF with log**2 n variables}
}

@incollection{MeirRa03,
author= {Ron Meir and Gunnar R\"atsch},
title= {An Introduction to Boosting and Leveraging},
booktitle = {Advanced Lectures on Machine Learning (LNAI2600)},
pages={119--184},
publisher={Springer},
editor={S. Mendelson and A. Smola},
year={2003}
}

@article{MerhavFe93
,author=	{N. Merhav and M. Feder}
,title=		{Universal Schemes for Sequential Decision from
		 Individual Data Sequences}
,journal=	{IEEE Transactions on Information Theory}
,volume=	39
,number=	4
,year=		1993
,pages=		{1280-1292}
}

@Article{MerhavFeGu93,
  author = 	 {Neri Merhav and Meir Feder and Michael Gutman},
  title = 	 {Some Properties of Sequential Predictors for Binary
		{M}arkov Sources},
  journal = 	 ieeeit,
  year = 	 1993,
  volume =	 39,
  number =	 3,
  month =	 may,
  pages =	 {887-892}
}

@inproceedings{Merialdo91
,author=	{Bernard Merialdo}
,title=		{Tagging text with a probabilistic model}
,year=		1991
}

@misc{MerzMu98,
   author = "C. J. Merz and P. M. Murphy",
   year = "1998",
   title = {{UCI} Repository of machine learning databases},
   url = "http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html",
   institution = "University of California, Irvine, Department of Information
                     and Computer Sciences" ,
   note= "www.ics.uci.edu/$\sim$mlearn/MLRepository.html"
}

@InProceedings{MerlerFuLaSp01,
  author = 	 {Stefano Merler and Cesare Furlanello and Barbara
                  Larcher and Andrea Sboner},
  title = 	 {Tuning cost-sensitive boosting and its application
                  to melanoma diagnosis},
  booktitle = 	 {Multiple Classifier Systems: Proceedings of the 2nd
                  International Workshop},
  pages =	 {32-42},
  year =	 2001
}

@misc{MerzMu99,
   author = "C. J. Merz and P. M. Murphy",
   year = "1999",
   title = "{UCI} Repository of machine learning databases",
   url = "http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html",
   institution = "University of California, Irvine, Department of Information
                     and Computer Sciences" ,
   note= "www.ics.uci.edu/$\sim$mlearn/MLRepository.html"
}

@incollection{Michalski86,
author=   	{Michalski, Ryszard},
title=    	{Understanding the Nature of Learning: Issues and Research 
		Directions},
booktitle=	{Machine Learning, An Artificial Intelligence Approach 
		(Volume II)},
publisher=	{Morgan Kaufman},
year=     	1986,
pages=    	{3--25}
}

@book{MichalskiCaMi83,
editor = 	{Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell},
title = 	{Machine Learning: An Artificial Intelligence Approach},
publisher = 	{Morgan Kaufmann},
year = 		1983,
comment = 	{This is volume I of the series.
		 address = Los Altos, California}
}

@book{MichalskiCaMi86,
editor = 	{Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell},
title = 	{Machine Learning: An Artificial Intelligence Approach},
publisher = 	{Morgan Kaufmann},
year = 		1986,
volume=		{II},
comment=	{address = {Los Altos, California}}
}

@article{Miller56,
author=		{Miller, G.},
title=		{The magic number seven, plus or minus two:  Some limits on
		our capacity for processing information},
journal=	{Psychology Review},
volume=		63,
year=		1956,
pages=		{81--97},
comment=	{The classic paper that says humans can hold 7 +/- 2 'chunks'
		in short term memory.}
}

@InProceedings{Minka01,
  author = 	 {Thomas P. Minka},
  title = 	 {Automatic choice of dimensionality for {PCA}},
  booktitle = 	 nips13,
  pages =	 {598-604},
  year =	 2001
}

@book{MinskyPa69,
author = 	{Minsky, Marvin and Seymour Papert},
title = 	{Perceptrons: An Introduction to Computational Geometry},
publisher = 	{The MIT Press},
year = 		1969,
comment = 	{Classic analysis of the capabilities of the perceptron.}
}

@inproceedings{Mitchell77,
author=   	{Mitchell, Tom M.},
title=    	{Version Spaces: A Candidate Elimination Approach to Rule 
		Learning},
booktitle=	{Proceedings of the
		 5th International Joint Conference on Artificial
		 Intelligence},
year=     	1977,
month=    	Aug,
pages=    	{305--310}
}

@article{Mitchell93
,author=	{Douglas W. Mitchell}
,title=		{Computationally convenient optimal intertemporal
		 portfolios under linear constraints}
}

@book{Mitchell97,
author=		{Tom M. Mitchell},
title=		{Machine Learning},
publisher=	{McGraw Hill},
year=		1997
}

@incollection{MitchellBuDeDiRoWa90,
author=		{Tom Mitchell and Bruce Buchanan and Gerald DeJong and
		 Thomas Dietterich and Paul Rosenbloom and Alex Waibel},
title=		{Machine Learning},
booktitle=	{Annual Review of Computer Science},
volume=		4,
year=		{1990},
editor=		{Joseph F. Traub and Barbara J. Grosz and Butler W.
		 Lampson and Nils J. Nilsson},
publisher=	{Annual Reviews},
pages=		{417--433},
comment=	{address= Palo Alto}
}

@book{MitchellCaMi86,
editor=		{Tom M. Mitchell and Jaime G. Carbonell and
		 Ryszard S. Michalski},
title=		{Machine Learning: A Guide to Current Research},
publisher=	{Kluwer Academic Publishers},
year=		1986,
comment=	{address= Boston/Dordrecht/Lancaster}
}

@incollection{Moore56,
author=		{Edward F. Moore},
title=		{Gedanken-Experiments on Sequential Machines},
pages=		{129--153},
booktitle=	{Automata Studies},
editor=		{C. E. Shannon and J. McCarthy},
year=		1956,
publisher=	{Princeton University Press}
}

@unpublished{MooreAt94
,author=	{Andrew W. Moore and Christopher G. Atkeson}
,title=		{The parti-game algorithm for variable resolution
		 reinforcemnt learning in multidimensional
		 state-spaces}
}

@unpublished{MooreAt??
,author=	{Andrew W. Moore and Christopher G. Atkeson}
,title=		{Prioritized sweeping: reinforcement learning with
		 less data and less real time}
}

@inproceedings{MoulinierRaGa96
,author=        {Isabelle Moulinier and Gailius {Ra\v{s}kinis} and
                  Jean-Gabriel Ganascia}
,title=         {Text categorization: a symbolic approach}
,booktitle=     {Fifth Annual Symposium on Document Analysis and
                  Information Retrieval}
,pages=         {87-99}
,year=          1996
}

@InProceedings{MorenoLoRa01,
  author = 	 {Pedro J. Moreno and Beth Logan and Bhiksha Raj},
  title = 	 {A boosting approach for confidence scoring},
  booktitle = 	 {Proceedings of the 7th European Conference on Speech
                  Communication and Technology},
  year =	 2001
}

@article{MurphyPa94
,author=	{Patrick M. Murphy and Michael J. Pazzani}
,title=		{Exploring the decision forest: an empirical
		 investigation of {Occam's} razor in decision tree
		 induction}
}

@Article{MozerWoGrJoKa00,
  author = 	 {Michael C. Mozer and Richard Wolniewicz and David
                  B. Grimes and Eric Johnson and Howard Kaushansky},
  title = 	 {Predicting subscriber dissatisfaction and improving
    retention in the wireless telecommunications industry},
  journal = 	 {IEEE Transactions on Neural Networks},
  year = 	 2000,
  volume =	 11,
  pages =	 {690-696}
}

@book{Murray73,
	author = "J.D. Murray",
	title = "Asymptotic Analysis",
	year=1973,
	publisher="Springer Verlag"
	}

@article{NakamuraAb95
,author=	{Atsuyoshi Nakamura and Naoki Abe}
,title=		{Exact learning of linear combinations of monotone
		 terms from function value queries}
}

@article{NarendraTh74,
author=   	{Narendra, Kumpati S. and M. A. L. Thathachar},
title=    	{Learning Automata -- A Survey},
journal=  	{IEEE Transactions on Systems, Man, and Cybernetics},
year=     	1974,
volume=   	{SMC-4},
number=   	4,
pages=    	{323--334},
comment=  	{Stochastic automata that adapt behavior under reinforcement 
		schemes}
}

@book{NarendraTh89
,author=   	{Kumpati S. Narendra and Mandayam A. L. Thathachar}
,title=    	{Learning Automata: An Introduction}
,year=		1989
,publisher=	{Prentice Hall}
}

@slides{Neapolitan97
,author=         {Rich Neapolitan}
,title=          {Learning causes from statistical data}
}

@inproceedings{Natarajan87,
author=   	{B. K. Natarajan},
title=    	{On Learning Boolean Functions},
booktitle=	stoc87,
address=  	{New York, New York},
year=     	1987,
month=    	May,
pages=    	{296--304}
}

@inproceedings{Natarajan89,
author=   	{B. K. Natarajan},
title = 	{On Learning From Excercises},
booktitle = 	colt89,
address = 	{Santa Cruz, Cal.\ },
year = 		1989,
month=          August,
pages=          {72--87}
}

@Book{Neveu75,
  author = 	 {J. Neveu},
  title = 	 {Discrete-Parameter Martingales},
  publisher = 	 {North Holland},
  year = 	 1975
}

@inproceedings{NgGoLo97
,author=         {Hwee Tou Ng and Wei Boon Goh and Kok Leong Low}
,title=          {Feature Selection, Perceptron Learning, and a
                  Usability Case Study for Text Categorization}
,booktitle=      sigir97
,year=           1997
,pages=          {67-73}
}

@inproceedings{Niblett87,
author=	   	{Niblett, T.},
title=	   	{Constructing Decision Trees in Noisy Domains},
booktitle= 	{Progress in Machine Learning--Proceedings of EWSL 87: 
	   	2nd European Working Session on Learning},
address=   	{Bled, Yugoslavia},
year=	   	1987,
editor=	   	{Bratko, I. and N. Lavrac},
month=	   	may,
pages=	   	{67--78}
}

@unpublished{NiyogiBe9?
,author=	{Partha Niyogi and Robert C. Berwick}
,title=		{A dynamical systems model for language change}
}

@techreport{NiyogiGi94
,author=	{Partha Niyogi and Federico Girosi}
,title=		{On the relationship between generalization error,
		 hypothesis complexity, and sample complexity for
		 radial basis functions}
}

@article{NolanPo87,
author=		{Deborah Nolan and David Pollard},
title=		{U-processes: Rates of Convergence},
journal=	{Annals of Statistics},
volume=		15,
number=		2,
pages=		{780--799},
year=		1987
}

@InProceedings{Novikoff62,
  author = 	 {A. B. J. Novikoff},
  title = 	 {On convergence proofs on perceptrons},
  booktitle = 	 {Proceedings of the Symposium on the Mathematical
		  Theory of Automata},
  volume =	 {XII},
  year =	 1962,
  pages =	 {615--622}
}

@techreport{Oja95
,author=	{Erkki Oja}
,title=		{The nonlinear {PCA} learning rule and signal
		separation --- mathematical analysis}
}

@inproceedings{OliverHa94
,author=	{Jonathan J. Oliver and David Hand}
,title=		{Averaging over decision stumps}
,pages=		{231--241}
,year=		1994
,booktitle=	ecml94
,publisher=	{Springer-Verlag}
}

@inproceedings{OliverHa95
,author=	{Jonathan J. Oliver and David J. Hand}
,title=		{On pruning and averaging decision trees}
,pages=		{430--437}
,year=		1995
,booktitle=	ml95
}

@article{Omohundro87,
author=		{Omohundro, S.},
title=		{Efficient algorithms with neural networks behavior},
journal=	{Complex Systems},
year=		1987,
volume=		1,
pages=		{273--347}
}

@article{OshersonStWe84,
author=   	{Osherson, Daniel N. and Michael Stob and Scott Weinstein},
title=    	{Learning Theory and Natural Language},
journal=  	{Cognition},
year=     	1984,
volume=   	17,
pages=    	{1--28},
comment=  	{Presents argument that number of natural languages is finite.}
}

@InProceedings{OnodaRaMu00,
  author = 	 {Takashi Onoda and Gunnar R\"atsch and Klaus-Robert M\"uller},
  title = 	 {Applying support vector machines and boosting to a
                  non-intrusive monitoring system for household
                  electric appliances with inverters},
  booktitle = 	 {Proceedings of the Second ICSC Symposium on Neural
                  Computation},
  year =	 2000
}

@book{OshersonStWe86,
author=   	{Osherson, Daniel N. and Michael Stob and Scott Weinstein},
title=    	{Systems that Learn: An Introduction to Learning Theory
		for Cognitive and Computer Scientists},
publisher=  	{MIT Press},
year=     	1986,
comment=  	{Comprehensive recursion-theoretic treatment.}
}

@unpublished{OshersonStWe86b,
author=   	{Osherson, Daniel N. and Michael Stob and Scott Weinstein},
title=    	{Mathematical Learners pay a price for {B}ayesianism},
year=     	1986,
note=     	{(MIT Dept.\ of Brain and Cognitive Science)}
}

@article{OshersonWe82,
author=   	{Osherson, Daniel N. and Scott Weinstein},
title=    	{Criteria of Language Learning},
journal=  	InfCtrl,
year=     	1982,
volume=   	52,
pages=    	{123--138},
comment=  	{Studies relationship between intensional/extensional 
		learning.}
}

@book{OsteyeeGo74,
author=   	{Osteyee, David Bridston},
title=    	{Information, Weight of Evidence, the Singularity between 
		Probability Measures and Signal Detection},
year=     	1974,
publisher= 	{Springer-Verlag},
series=    	{Lecture Notes in Mathematics},
number=    	376,
comment=  	{Text.}
}

@Book{Owen82,
  author = 	 {Guillermo Owen},
  title = 	 {Game Theory},
  publisher = 	 {Academic Press},
  year = 	 1982,
  edition =	 {second}
}

@techreport{PagalloHa89,
author=		{Giulia Pagallo and David Haussler},
title=		{A Greedy Method for Learning {$\mu$DNF} Functions
		 under the Uniform Distribution},
institution=	ucsccrl,
number=		{UCSC-CRL-89-12},
year=		1989,
month=		jun
}

@book{Palay85,
author=   	{Palay, Andrew J.},
title=    	{Searching with Probabilities},
year=     	1985,
publisher= 	{Pitman},
comment=  	{Integration of probabilities into B* algorithm (Ph.D. thesis
	  	with Hans Berliner).}
}

@article{PanconesiSr97,
	author={Alessandro Panconesi and Aravind Srinivasan},
	title={Randomized Distributed Edge Coloring via an Extension of the {Chernoff-Hoeffding} Bounds},
	journal={SIAM Journal of Computing},
	volume={26},
	number={2},
	pages={350-368},
	month={April},
	year={1997}}

@Unpublished{Panchenko01,
  author = 	 {Dmitriy Panchenko},
  title = 	 {New zero-error bounds for voting algorithms},
  note = 	 {Unpublished manuscript},
  year =	 2001
}

@article{PaoCa78,
author=   	{Pao, T. W. and J. W. {Carr III}},
title=    	{A solution of the syntactical induction-inference problem 
		for regular languages},
journal=  	{Computat. Lang.},
volume=   	3,
year=     	1978,
pages=    	{53--64}
}

@techreport{PapadimitriouTs85,
author=   	{Papadimitriou, Christos H. and John N. Tsitsiklis},
title=    	{The Complexity of Markov Decision Processes},
year=     	1985,
institution=  	{MIT Laboratory for Information and Decision Sciences},
number=   	{LIDS-P-1479},
comment=  	{Ordinary versions are complete for P; partially observed 
		are PSPACE-complete.}
}

@unknown{PapkaCaBa9?
,author=       {Ron Papka and James P. Callan and Andrew G. Barto}
,title=        {Text-based information retrieval using exponentiated
                  gradient descent}
}

@Book{Parsons86,
  author =	 {Thomas W. Parsons},
  title = 	 {Voice and Speech Processing},
  publisher = 	 {McGraw-Hill},
  year = 	 1986
}

@article{PaskVF60,
author=   	{Pask, Gordon and Heinz Von Foerster},
title=    	{A predictive model for self organizing systems, Parts I and 
		II},
journal=  	{Cybernetica},
year=     	{1960 and 1961},
volume=   	{III and IV},
pages=    	{258--300 and 20--55},
comment=  	{n-person game theory. Learning automata.}
}

@book{PatelRe82,
author=		{Jagdish K. Patel and Campbell B. Read},
title=		{Handbook of the Normal Distribution},
year=		1982,
publisher=	{Marcel Dekker}
}

@techreport{Paturi88,
author=   	{Paturi, Ramamohan},
title=    	{The Light Bulb Problem},
institution= 	{Computer Science and Engineering},
address=  	{University of California, San Diego},
month=	  	aug,
year=     	1988
}

@article{Pearl78,
author=   	{Pearl, Judea},
title=    	{On the Connection Between the Complexity and Credibility of 
		Inferred Models},
journal=  	{Journal of General Systems},
year=     	1978,
volume=   	4,
pages=    	{255--264},
comment=  	{Studies tradeoff between uniqueness and ambiguity in the 
		selection of hypotheses.
           	Introduces the use of the Vapnik-Chervonenkis dimension.}
}

@Article{PazzaniBi97,
  title =        "Learning and Revising User Profiles: The
                 Identification of Interesting Web Sites",
  author =       "Michael Pazzani and Daniel Billsus",
  journal =      ml,
  volume =       27,
  pages =        {313-331},
  year =         1997
}

@article{Pearl79,
author=   	{Pearl, Judea},
title=    	{Capacity and Error Estimates for Boolean Classifiers with 
		Limited Complexity},
journal=  	{IEEE Transactions on Pattern Analysis and Machine 
		Intelligence},
year=     	1979,
month=    	Oct,
volume=   	{PAMI-1},
number=   	4,
pages=    	{350--355},
comment=  	{An extension of [Pe78].}
}

@unpublished{Pearl84,
author=   	{Pearl, Judea},
title=    	{Jeffrey's Rule and the Problem of Autonomous Inference 
		Agents},
year=     	1984,
note=     	{Class notes for 274A, Fall 1984},
comment=  	{Stresses dangers of using posterior probabilities as new prior
	   	probabilities.}
}

@techreport{Pearl85a,
author=   	{Pearl, Judea},
title=    	{Bayesian Networks: A Model of Self-Activated Memory for 
	   	Evidential Reasoning},
institution= 	{UCLA Computer Science Department},
year=     	1985,
month=    	June,
number=   	{CSD-850021, R-43},
comment=  	{Describes Bayesian networks and propagation in 
		singly-connected networks.}
}

@techreport{Pearl85b,
author=   	{Pearl, Judea},
title=    	{How to do with Probabilities What People Say You Can't},
institution= 	{UCLA Computer Science Department},
year=     	1985,
month=    	Sep,
number=   	{CSD-850031, R-49},
comment=  	{Describes Bayesian networks and propagation of beliefs.}
}

@techreport{PearlPa85,
author=   	{Pearl, Judea and Azaria Paz},
title=    	{GRAPHOIDS: A Graph-Based Logic for Reasoning about Relevance
	  	Relations, or When would x tell you more about y if you already
	  	know z},
institution= 	{UCLA Computer Science Department},
year=     	1985,
month=    	Dec,
comment=  	{Axiomatic definition of graphoids.}
}

@inproceedings{PearlPa86,
author=   	{Pearl, Judea and Azaria Paz},
title=    	{On the Logic of Representing Dependencies by Graphs},
booktitle=  	{Proceedings 1986 Canadian AI Conference},
year=     	1986,
month=    	May,
comment=  	{Defines and studies graphoids for representing dependencies}
}

@Book{PecaricPrTo92,
  author =	 {Josip E. Pe\v{c}ari\'{c} and Frank Proschan and Y. L. Tong},
  title = 	 {Convex Functions, Partial Orderings, and Statistical
                  Applications},
  publisher = 	 {Academic Press},
  year = 	 1992
}

@inproceedings{PereiraRiSp??
,author=	{Fernando Pereira and Michael Riley and Richard
		 Sproat}
,title=		{Weighted rational transductions and their application
		 to human language processing}
}

@article{PereiraSi97,
  author = "Fernando C. Pereira and Yoram Singer",
  title = "An efficient extension to mixture techniques for
		prediction and decision trees",
  journal={Machine Learning},
  volume={36},
	year={1999}}
}

@mastersthesis{Perugini89,
author = 	{Nancy Perugini},
title = 	{Neural Network Learning: Effects of Network and Training
		 Set Size},
school =       	{MIT Department of Electrical Engineering and Computer 
		 Science},
month = 	Jun,
year = 		1989
}

@article{Peterson01,
   author="A. Townsend Peterson",
   title="Predicting species' geographic distributions based on ecological niche modeling",
   journal="The Condor",
   year=2001,
   volume=103,
   pages="599-605"
}

@article{PhandinhLeLe86,
author=		{Phan Dinh Dieu and Le Cong Thanh and Le Tuan Hoa},
title=		{Average Polynomial Time Complexity of Some {NP}-Complete
		Problems},
journal=	{Theoretical Computer Science},
volume=		46,
number=		{2, 3},
year=		1986,
pages=		{219--327}
}

@techreport{PinkerPi87,
author=   	{Steven Pinker and Alan Prince},
title=    	{On Language and Connectionism:
           	Analysis of a Parallel Distributed Processing Model
           	of Language Acquisition},
institution=	{MIT Center for Cognitive Science},
year=      	1987,
number=    	{Occasional Paper \#33},
comment=  	{Critique of Rumelhart and McClelland's connectionist approach 
		to learning English past tense construction.}
}

@inproceedings{Pisoni??
,author=	{David B. Pisoni}
,title=		{Some comments on invariance, variability and
		 perceptual normalization in speech recognition}
,comment=	{from pereira}
}

@techreport{Pitt85,
author=   	{Pitt, Leonard Brian},
title=    	{Probabilistic Inductive Inference},
institution= 	{Yale University Computer Science Department},
year=     	1985,
month=    	Jun,
number=   	{YALEU/DCS/TR-400},
comment=  	{Ph.D. thesis. Defines probabilistic inference and shows 
		probabilistic inference equivalence to teams of inductive 
		inference machines; and shows strict hierarchy on probability 
		with cut-points 1/2, 1/3, 1/4, ...}
}

@techreport{Pitt89,
author=		{Pitt, Leonard},
title=		{Inductive Inference, {DFA}s, and Computational Complexity},
institution=	{University of Illinois at Urbana-Champaign, Department
		 of Computer Science},
month=		jul,
year=		1989,
number=		{UIUCDCS-R-89-1530},
note=		{Also appears in {\it Proceedings of the 1989
		 International Workshop on Analogical and Inductive
		 Inference}, Springer-Verlag Lecture Notes in Computer
		 Science}
}

@techreport{PittSm86,
author=   	{Pitt, Leonard and Carl H. Smith},
title=    	{Probability and Plurality for Aggregations of Learning 
		Machines},
institution= 	{University of Maryland Computer Science Department},
year=     	1986,
month=    	Jul,
number=   	{CS-TR-1686},
comment=  	{Show that one can not always trade off probabilism for 
		plurality and vice-versa.}
}

@techreport{PittVa86,
author=   	{Pitt, Leonard and Leslie G. Valiant},
title=    	{Computational Limitations on Learning from Examples},
institution= 	{Harvard University Aiken Computation Laboratory},
year=     	1986,
month=    	Jul,
comment=  	{It is NP-Complete to learn disjunction of two monomials, 
		Boolean threshold functions, Boolean formulae where each 
		variable occurs at most once.}
}

@article{PittVa88,
author=   	{Pitt, Leonard and Leslie G. Valiant},
title=    	{Computational Limitations on Learning from Examples},
journal = 	jacm,
volume =  	35,
number =  	4,
month=		Oct,
year = 	  	1988,
pages =   	{965--984}
}

@unpublished{PittWa88,
author=   	{Pitt, Leonard and Manfred K. Warmuth},
title=    	{The Minimum {DFA} Consistency Problem Cannot be
           	Approximated within any Polynomial},
year=     	1988,
note=     	{(unpublished manuscript)}
}

@inproceedings{PittWa88b,
author=   	{Pitt, Leonard and Manfred K. Warmuth},
title = 	{Reductions Among Prediction Problems:  On the Difficulty of
		Predicting Automata},
booktitle = 	{3rd IEEE Conference on Structure in Complexity Theory},
year = 		{1988},
pages = 	{60--69},
month = 	jun
}

@techreport{PittWa88c,
author=		{Pitt, Leonard and Manfred K. Warmuth},
title=		{Prediction Preserving Reducibility},
institution=	ucsccrl,
year=		1988,
month=		Nov,
number=		{UCSC-CRL-88-26}
}

@inproceedings{PittWa89,
author=   	{Pitt, Leonard and Manfred K. Warmuth},
title=    	{The Minimum Consistent {DFA} Problem Cannot be
           	Approximated within any Polynomial},
booktitle=      stoc89,
month=          May,
year = 		1989,
note=		{Available as Technical Report UIUCDCS-R-89-1499,
		 University of Illinois at Urbana-Champaign,
		 Department of Computer Science.
		 To appear, {\it Journal of the Association for
		                Computing Machinery}}
}

@article{PittWa90,
author=		{Pitt, Leonard and Manfred K. Warmuth},
title=		{Prediction-Preserving Reducibility},
journal=	jcss,
volume=		41,
number=		3,
month=		dec,
year=		1990,
pages=		{430--467}
}

@article{PittWa93,
author=   	{Pitt, Leonard and Manfred K. Warmuth},
title=    	{The Minimum Consistent {DFA} Problem Cannot be
           	Approximated within any Polynomial},
journal=	jacm,
volume=		40,
number=		1,
pages=		{95--142},
month=		jan,
year=		1993
}

@article{PlotkinShTa95
,author=	{Serge A. Plotkin and David B. Shmoys and \'Eva
		 Tardos}
,title=		{Fast approximation algorithms for fractional packing
		 and covering problems}
,journal=	{Mathematics of Operations Research}
,volume=	20
,number=	2
,year=		1995
,month=		may
,pages=		{257-301}
}

@book{Pollard84,
author = 	{David Pollard},
title = 	{Convergence of Stochastic Processes},
publisher = 	{Springer-Verlag},
year = 		1984
}


@techreport{Porat87,
author=   	{Sara Porat},
title=    	{Stability and Looping in Connectionist Models with 
		Assymmetric Weights},
institution= 	{University of Rochester Computer Science Department},
year=     	{1987},
month=    	Mar,
number=   	{TR 210},
comment=  	{Show that determining whether a network stabilizes is 
		NP-hard, under both synchronous and fair asynchronous updating
		rules.}
}

@article{Powell73
,author=        {M. J. D. Powell}
,title=         {On search directions for minimization algorithms}
}

@article{Quinlan83,
author=   	{Quinlan, J. R.},
title=    	{Inferno: A Cautious Approach to Uncertain Inference},
journal=  	{The Computer Journal},
year=     	1983,
volume=   	26,
number=   	3,
pages=    	{255--269},
comment=  	{Survey of previous inference schemes. Proposes `cautious' 
		scheme.}
}

@article{Quinlan86,
author=   	{Quinlan, J. R.},
title=    	{Induction of Decision Trees},
journal=  	ml,
year=     	1986,
volume=   	1,
pages=    	{81--106},
comment=  	{Overview of the induction of decision trees.  Proposes 
		information-theoretic measure for choosing decision 
		attributes.  Discusses issue of noise and unknown attribute 
		values.}
}

@article{Quinlan86b,
author=   	{Quinlan, J. R.},
title=    	{Simplifying Decision Trees},
journal=  	{International Journal of Man-Machine Studies},
year=     	1987,
note=     	{(To appear.)}
}

@incollection{Quinlan86c,
author = 	{J. Ross Quinlan},
title = 	{The Effect of Noise on Concept Learning},
booktitle = 	{Machine Learning, An Artificial Intelligence Approach
		(Volume II)},
publisher = 	{Morgan Kaufmann},
year = 		{1986},
chapter = 	{6},
pages = 	{149--166},
comment = 	{An empirical study of the effects of noise on a
		particular learning algorithm.  Concludes that classification 
		noise is more harmful than attribute noise.}
}

@book{Quinlan93
,author=	{J. Ross Quinlan}
,title=		{C4.5: Programs for Machine Learning}
,year=		1993
,publisher=	{Morgan Kaufmann}
}

@inproceedings{Quinlan96
,author=	{J. R. Quinlan}
,title=		{Bagging, Boosting, and {C4.5}}
,pages=		{725-730}
,year=		1996
,booktitle=	{Proceedings of the Thirteenth National Conference on
		Artificial Intelligence}
}

@InProceedings{Quinlan96b,
  author = 	 {J. R. Quinlan},
  title = 	 {Boosting First-Order Learning}
}

@article{QuinlanRi89,
author =  	{Quinlan, J. Ross and Ronald L. Rivest},
title =   	{Inferring Decision Trees Using the Minimum Description Length
		 Principle},
journal = 	infcomp,
volume = 	80,
number = 	3,
year = 		1989,
month = 	Mar,
pages = 	{227--248},
note = 		{(An early version appeared as MIT LCS Technical report 
		MIT/LCS/TM-339 (September 1987).)}
}

@article{RabinerJu86,
author=   	{Rabiner, L. R. and B. H. Juang},
title=    	{An Introduction to Hidden Markov Models},
journal=  	{IEEE ASSP Magazine},
year=     	1986,
month=    	Jan,
volume=   	3,
number=   	1,
pages=    	{4--16},
comment=  	{Good introductory overview.}
}

@book{RabinerJu93
,author=   	{Lawrence Rabiner and Bing-Hwang Juang}
,title=    	{Fundamentals of Speech Recognition}
,year=     	1993
,publisher=	{Prentice Hall}
}

@article{RabinerLeSo83,
author=   	{Rabiner, L. R. and S. E. Levinson and M. M. Sondhi},
title=    	{On the Application of Vector Quantization and Hidden Markov 
		Models to Speaker-Independent, Isolated Word Recognition},
journal=  	{Bell System Technical Journal},
year=     	1983,
month=    	Apr,
volume=   	62,
number=   	4,
pages=    	{1075--1105},
comment=  	{Gets 96.5 percent accuracy on 100-speaker set for digits.}
}

@book{RabinerSc78
,author=	{Lawrence R. Rabiner and Ronald W. Schafer}
,title=		{Digital Processing of Speech Signals}
,year=		1978
,publisher=	{Prentice-Hall}
}

@Article{RatschOnMu01,
  author = 	 {G. R\"atsch and T. Onoda and K.-R. M\"uller},
  title = 	 {Soft Margins for {AdaBoost}},
  journal = 	 ml,
  year = 	 2001,
  volume=  42,
  number=  3,
  pages=  {287-320}
}

@InProceedings{RatschWa02,
author = {Gunnar R\"atsch and Manfred Warmuth},
title = {Maximizing the Margin with Boosting},
booktitle= colt02,
pages = {334-350},
year=2002
}

@InProceedings{RatschWaMiOnLeMu00,
  author = 	 {Gunnar R\"atsch and Manfred Warmuth and Sebastian
                  Mika and Takashi Onoda and Steven Lemm and
                  Klaus-Robert M\"uller},
  title = 	 {Barrier boosting},
  booktitle = 	 colt00,
  pages =	 {170-179},
  year =	 2000
}

@unknown{RaoTi97
,author=        {J. Sunil Rao and Robert Tibshirani}
,title=         {The out-of-bootstrap method for model averaging and
                  selection}
}

@inproceedings{Raghavan88,
author=   	{Raghavan, Prabhakar},
title=    	{Learning in Threshold Networks},
booktitle=	colt88,
month=    	Aug,
year=     	1988,
publisher = 	{Morgan-Kaufmann},
pages = 	{19--27}
}

@article{RednerWa84
,author=	{Richard A. Redner and Homer F. Walker}
,title=		{Mixture densities, maximum likelihood and the {EM}
		 algorithm}
,year=		1984
}

@inproceedings{ResnickIaSuBeRi94,
	author = "Paul Resnick and Neophytos Iacovou and Mitesh Sushak and
		Peter Bergstrom and John Riedl",
	title = "GroupLens: An Open Architecture for Collaborative
		Filtering of Netnews",
	booktitle = "Proceedings of Computer Supported Cooperative Work",
	year = 1995
}


@inproceedings{RiccardiGoLjRi97,
	author = "G. Riccardi and A. L. Gorin and A. Ljolje and M. Riley",
	title = "Spoken language understanding for automated call routing",
	booktitle = "Proceedings of the 1997 IEEE International
                  Conference on Acoustics, Speech, and Signal Processing",
	year = 1997,
	pages = "1143--1146"
}

@techreport{Rietman94
,author=	{E. A. Rietman}
,title=		{Classical control theory, {K}olmogorov's theorem, and
		automata networks}
}

@InProceedings{RidgewayMaRi99,
  author = 	 {Greg Ridgeway and David Madigan and Thomas Richardson},
  title = 	 {Boosting Methodology for Regression Problems},
  booktitle = 	 {Proceedings of the International Workshop on AI and
                  Statistics},
  pages =	 {152-161},
  year =	 1999
}

@book{Rijsbergen79,
	author = {C. J. van Rijsbergen},
	address = {London},
	publisher = {Butterworths},
	title = {Information Retrieval},
	year = {1979}
}

@article{Rissanen78,
author=   	{Rissanen, Jorma},
title=    	{Modeling By Shortest Data Description},
journal=  	{Automatica},
year=     	1978,
volume=   	14,
pages=    	{465--471},
comment=  	{Proposes that the best model is the one that minimizes the
		overall description length of the data, including the 
		parameters of the model.}
}

@article{Rissanen83,
author=   	{Rissanen, Jorma},
title=    	{A Universal Prior for Integers and Estimation by Minimum
	  	Description Length},
journal=  	{The Annals of Statistics},
year=     	1983,
volume=   	11,
number=   	2,
pages=    	{416--431}
}

@article{Rissanen83b,
  author =       "Jorma Rissanen",
  title =        "A Universal Data Compression System",
  journal =      ieeeit,
  volume =       "IT-29",
  number =       "5",
  month =        sep,
  year =         "1983",
  pages =        "656--664"
}

@article{Rissanen86a,
author=	  	{Rissanen, Jorma},
title=    	{Stochastic Complexity and Modeling},
journal=  	{The Annals of Statistics},
year=     	1986,
volume=   	14,
number=   	3,
pages=    	{1080--1100},
comment=  	{Minimum Description Length Principle, and applications.}
}

@techreport{Rissanen86b,
author=   	{Rissanen, Jorma},
title=    	{Stochastic Complexity and Sufficient Statistics},
institution= 	{IBM Research Laboratory (San Jose)},
year=     	1986,
comment=  	{Defines notion of stochastic complexity of a string relative
		to a class of models.  Describes to ways to approximate 
		stochastic complexity.}
}

@Book{Rissanen89,
  author = 	 {Jorma Rissanen},
  title = 	 {Stochastic Complexity in Statistical Inquiry},
  publisher = 	 {World Scientific},
  year = 	 1989,
  volume =	 15,
  series =	 {Series in Computer Science}
}

@article {Rissanen96,
    AUTHOR = {Rissanen, Jorma J.},
     TITLE = {Fisher information and stochastic complexity},
   JOURNAL = {IEEE Trans. Inform. Theory},
    VOLUME = 42,
      YEAR = 1996,
    NUMBER = 1,
     PAGES = {40--47},
      ISSN = {0018-9448},
}

@article{RissanenLa81,
author=   	{Rissanen, Jorma and Langdon, Jr., Glen G.},
title=    	{Universal Modeling and Coding},
journal=  	{IEEE Transactions on Information Theory},
volume=   	{IT-27},
number=   	1,
year=     	1981,
month=    	Jan,
pages=    	{12--23},
comment=  	{Overview of first-in first-out arithmetic codes. Proves that
		alphabet extensions don't help coding efficiency.}
}


@article{RissanenSpYu92,
author=		{Jorma Rissanen and Terry P. Speed and Bin Yu},
title=		{Density Estimation by Stochastic Complexity},
journal=	{IEEE Transactions on Information Theory},
volume=		38,
number=		2,
month=		mar,
year=		1992,
pages=		{315--323}
}

@article{Rivest87,
author = 	{Rivest, Ronald L.},
title = 	{Learning Decision Lists},
journal = 	ml,
year = 		1987,
volume = 	2,
number = 	3,
pages = 	{229--246}
}

@misc{Rivest88p,
author = 	{Rivest, Ronald L.},
year = 		{Personal communication}
}

@inproceedings{RivestSc87a,
author=   	{Rivest, Ronald L. and Robert E. Schapire},
title=    	{A New Approach to Unsupervised Learning in Deterministic
           	Environments},
booktitle=	{Proceeding of the Fourth International Workshop on
           	Machine Learning},
comment=	{editor=   	{Pat Langley},
		 address=  	{Irvine, California}},
month=    	Jun,
pages=    	{364--375},
year=     	1987
}

@inproceedings{RivestSc87b,
author=   	{Rivest, Ronald L. and Robert E. Schapire},
title=    	{Diversity-Based Inference of Finite Automata},
booktitle=	focs87,
comment=	{address=  	{Los Angeles, California}},
month=    	Oct,
pages=    	{78--87},
year=     	1987,
note=		{To appear, {\it Journal of the Association for
		 Computing Machinery}}
}

@inproceedings{RivestSc89,
author = 	{Ronald L. Rivest and Robert E. Schapire},
title = 	{Inference of Finite Automata Using Homing Sequences},
booktitle = 	stoc89,
year = 		1989,
month = 	May,
pages = 	{411-420},
note=		{To appear, {\it Information and Computation}}
}

@incollection{RivestSc90,
author=		{Rivest, Ronald L. and Robert E. Schapire},
title=   	{A new approach to unsupervised learning in deterministic
		environments},
booktitle=	{Machine Learning: An Artificial Intelligence Approach},
editor=   	{Yves Kodratoff and Ryszard Michalski},
volume=		{III},
publisher=	{Morgan Kaufmann},
year=     	{1990},
pages=		{670--684}
}

@article{RivestSc93,
author = 	{Ronald L. Rivest and Robert E. Schapire},
title = 	{Inference of Finite Automata Using Homing Sequences},
journal=	infcomp,
volume=		103,
number=		2,
month=		apr,
year=		1993,
pages=		{299--347}
}

@article{RivestSc94
,author=   	{Rivest, Ronald L. and Robert E. Schapire}
,title=    	{Diversity-Based Inference of Finite Automata}
,journal=	jacm
,year=		1994
,volume=	41
,number=	3
,pages=		{555--589}
,month=		may
}

@inproceedings{RivestSl88a,
author=		{Rivest, Ronald L. and Robert Sloan},
title=		{A New Model for Inductive Inference},
booktitle=	{Proceedings of the Second Conference on Theoretical
		Aspects of Reasoning about Knowledge},
publisher=      {Morgan Kaufmann},
month=		Mar,
year=		1988,
editor=		{Moshe Vardi},
pages=		{13--27}
}

@inproceedings{RivestSl88b,
author = 	{Ronald L. Rivest and Robert Sloan},
title = 	{Learning Complicated Concepts Reliably and Usefully},
year = 		1988,
month =		aug,
booktitle = 	{Proceedings AAAI-88},
orgainzation=	{American Association for Artificial Intelligence},
pages=		{635--639}
}

@article{Robbins52
,author=	{H. Robbins}
,title=		{Some aspects of the sequential design of experiments}
,journal=	{Bulletin American Mathematical Society}
,volume=	55
,year=		1952
,pages=		{527--535}
}

@article{Robbins56
,author=   	{Herbert Robbins}
,title=		{A sequential decision problem with a finite memory}
,journal=       {Proceedings of the National Academy of Science}
,volume=        {42}
,pages=         {920--933}
,year=          {1956}
}

@bookchapter{Rocchio6?
,author=	{J. J. Rocchio, Jr.}
,title=		{Relevance feedback in information retrieval}
}

@incollection{Rocchio71,                                              
    Author="J. Rocchio",                                    
    Title="Relevance feedback information retrieval",
    Booktitle="The {Smart} retrieval system---experiments in automatic
        document processing",
    Pages="313-323",
   publisher= {Prentice Hall},
    Year=1971
}

@Book{Rockafellar70,
  author =	 {R. Tyrrell Rockafellar},
  title = 	 {Convex Analysis},
  publisher = 	 {Princeton University Press},
  year = 	 1970
}

@Unpublished{RocheryScRaGu01,
  author = 	 {Marie Rochery and Robert Schapire and Mazin Rahim
                  and Narendra Gupta},
  title = 	 {{BoosTexter} for text categorization in spoken
                  language dialogue},
  note = 	 {Unpublished manuscript},
  year =	 2001
}

@inproceedings{RocheryScRaGuRiBaAlDo02,
  author = 	 {M. Rochery and R. Schapire and M. Rahim and N. Gupta
                  and G. Riccardi and S. Bangalore and H. Alshawi and
                  S. Douglas},
  title = 	 {Combining prior knowledge and boosting for call
                  classification in spoken language dialogue},
  booktitle = 	 {International Conference on Accoustics, Speech and Signal
                   Processing},
  year =	 2002
}

@article{Rodriguez89
,author=	{Carlos C. Rodriguez}
,title=		{The metrics induced by the {K}ullback number}
}

@article{RonRu??
,author=	{Dana Ron and Ronitt Rubinfeld}
,title=		{Learning fallible deterministic finite automata}
}

@inproceedings{RonSiTi94
,author=	{Dana Ron and Yoram Singer and Naftali Tishby}
,title=		{Learning probabilistic automata with variable memory
		 length}
,booktitle=	colt94
,year=		1994
,pages=		{35--46}
}

@article{RoseGuFo90
,author=	{Kenneth Rose and Eitan Gurewitz and Geoffrey C. Fox}
,title=		{Vector quantization by deterministic annealing}
}

@article{Rosenblatt58,
author = 	{Rosenblatt, F.},
title = 	{The Perceptron: A Probabilistic Model for Information Storage
		 and Organization in the Brain},
journal = 	{Psychological Review},
year = 		1958,
volume = 	65,
pages = 	{386--407},
comment = 	{Classic article introducing the perceptron model},
note = 		{(Reprinted in {\sl Neurocomputing} (MIT Press, 1988).)}
}

@Book{Rosenblatt62,
  author = 	 {Rosenblatt, F.},
  title = 	 {Principles of Neurodynamics},
  publisher = 	 {Spartan},
  year = 	 1962,
  address =	 {New York}
}

@techreport{RossetZhHa03,
   author= {Saharon Rosset and Ji Zhu and Trevor Hastie},
   title = {Boosting as a Regularized Path to a Maximum Margin Classifier},
   institution = {Department of Statistics, Stanford University},
   year=2003
}

@article{RothBe91,
author=		{Ron M. Roth and Gyora M. Benedek},
title=		{Interpolation and approximation of sparse
		 multivariate polynomials over {GF(2)}},
journal=	sicomp,
volume=		20,
number=		2,
pages=		{291--314},
month=		apr,
year=		1991
}

@inproceedings{Rudich85,
author = 	{Rudich, S.},
title = 	{Inferring the structure of a {M}arkov chain from its output},
booktitle = 	focs85,
month = 	Oct,
year = 		1985,
pages = 	{321--326}
}

@inproceedings{RudinScDa04,
  author    = {Cynthia Rudin and
               Robert E. Schapire and
               Ingrid Daubechies},
  title     = {Boosting Based on a Smooth Margin},
  booktitle = colt04,
  year      = {2004},
  pages     = {502-517}
}

@incollection{RumelhartHiMc86,
author=   	{Rumelhart, David E. and Geoffrey E. Hinton and J. L. 
		McClelland},
title=    	{A General Framework for Parallel Distributed Processing},
chapter=  	2,
booktitle=	{Parallel Distributed Processing (Volume I: Foundations)},
editor=   	{David E. Rumelhart and James L. McClelland},
publisher=	{MIT Press},
year=     	1986,
pages=    	{45--76},
comment=  	{Overview of various models}
}

@techreport{RumelhartHiWi85,
author=   	{Rumelhart, David E. and Geoffrey E. Hinton and Ronald J.
		Williams},
title=    	{Learning Internal Representations by Error Propagation},
institution=  	{Institute for Cognitive Science, U.C. San Diego},
year=     	1985,
month=    	Sep,
number=   	{ICS Report 8506},
note=     	{To appear in {\sl Parallel Distributed Processing: 
		Explorations in the Microstructure of Cognition}, Vol. 1, 
		edited by Rumelhart and McClelland (MIT Press)},
comment=  	{Introduces `generalized delta rule' for back-propagating 
		information in a network of deterministic sigmoid (logistic) 
		rules.}
}

@incollection{RumelhartHiWi86,
author=   	{Rumelhart, David E. and Geoffrey E. Hinton and Ronald J. 
		Williams},
title=    	{Learning Internal Representations by Error Propagation},
booktitle=	{Parallel Distributed Processing -- Explorations in the 
		Microstructure of Cognition},
editor=   	{David E. Rumelhart and James L. McClelland},
publisher= 	{MIT Press},
year=      	1986,
chapter=   	8,
pages=     	{318--362},
comment=   	{Classic paper introducing the generalized delta rule 
		(back-propagation).}
}

@book{RumelhartMc86,
editor=   	{Rumelhart, David E. and McClelland, James L.},
title=    	{Parallel Distributed Processing},
publisher=	{MIT Press},
year=     	1986,
comment=  	{Overview of various models}
}

@article{RumelhartZi85,
author=   	{Rumelhart, David E. and David Zipser},
title=    	{Feature Discovery by Competitive Learning},
journal=  	{Cognitive Science},
year=     	1985,
volume=   	9,
pages=    	{75--112},
comment=  	{Historical survey of perceptrons etc.; competitive learning 
		in a layered network with inhibitory clusters}
}

@Article{Rustichini99,
  author = 	 {Aldo Rustichini},
  title = 	 {Minimizing Regret: The General Case},
  journal = 	 {Games and Economic Behavior},
  year = 	 1999,
  volume =	 29,
  pages =	 {224-243}
}

@phdthesis{Sakakibara91,
author=		{Yasubumi Sakakibara},
title=		{Algorithmic Learning of Formal Languages and Decision
		 Trees},
month=		oct,
year=		1991,
school=		{Tokyo Institute of Technology},
note=		{Research Report IIAS-RR-91-22E,
		 International Institute for Advanced Study of Social
		 Information Science, Fujitsu Laboratories, Ltd.}
}

@book{Salton89,
        author = {Gerard Salton},
        publisher = {Addison-Wesley},
        title = {Automatic text processing: the transformation,
                analysis and retrieval of information by computer},
        year = {1989}
}

@article{Salton91,
    Author="Gerard Salton",                                 
    Title="Developments in Automatic Text Retrieval",       
    Journal="Science",
    Volume=253,
    Pages="974-980",
    Year="1991"
}

@book{SaltonMc83,
        author = {Gerard Salton and Michael J. McGill},
        publisher = {McGraw-Hill},
        title = {Introduction to Modern Information Retrieval},
        year = {1983}
}

@techreport{Salzberg88,
author = 	{Steven Salzberg},
title = 	{Exemplar-based learning:  theory and implementation},
institution = 	{Harvard University},
year = 		{1988},
type = 		{Center for Research in Computing Technology},
number = 	{TR-10-88},
address = 	{Cambridge, Mass.},
month = 	oct,
comment= 	{Actually uses learning of differences of orthogonal
		rectangles stuff in a practical application (analyzing breast 
		cancer statistics)} 
}

@article{Samaranayake92
,author=	{K. Samaranayake}
,title=		{Stay-with-a-winner rule for dependent {B}ernoulli
		bandits}
}

@article{Samuel59,
author = 	{A. L. Samuel},
title = 	{Some studies in machine learning using the game of checkers},
journal = 	{IBM Journal of Research and Development},
year = 		1959,
month = 	Jul,
volume = 	3,
pages = 	{211--229},
note = 		{(Reprinted in {\em Computers and Thought}, (eds. 
		  E. A. Feigenbaum and J. Feldman), McGraw-Hill, 1963,
		  pages 39--70).}
}

@article{Samuelson69
,author=	{Paul A. Samuelson}
,title=		{Lifetime portfolio selection by dynamic stochastic
		 programming}
,year=		1969
}

@article{Samuelson71
,author=	{Paul A. Samuelson}
,title=		{The ``fallacy'' of maximizing the geometric mean in
		 long sequences of investing or gambling}
,year=		1979
}

@article{Samuelson79
,author=	{Paul A. Samuelson}
,title=		{Why we should not make mean log of wealth big though
		 years to act are long}
,year=		1979
}

@article{SanthaVa86,
author=		{M. Santha and U. V. Vazirani},
title=		{Generating Quasi-random Sequences from Semi-random
		 Sources},
journal=	jcss,
volume=		33,
number=		1,
month=		aug,
year=		1986,
pages=		{75--87}
}

@techreport{Sarkar91
,author=	{D. Sarkar}
,title=		{Nonsubscriber card behavior score model development
		--- overlimit accounts}
}

@misc{SauerHiFa01,
   author={Sauer, J. R. and J. E. Hines and J. Fallon},
   year=2001,
   title="The {N}orth {A}merican Breeding Bird Survey, Results and
                  Analysis 1966--2000, {V}ersion 2001.2",
   note="USGS Patuxent Wildlife Research Center, Laurel, MD",
 howpublished="http://www.mbr-pwrc.usgs.gov/bbs/bbs.html"
}

@unknown{Saul98
,author=         {Lawrence K. Saul}
,title=          {Automatic segmentation of continuous trajectories
                  with invariance to nonlinear warpings of time}
,year=           1998
}

@article{SatoAbTa88
,author=	{Mitsuo Sato and Kenichi Abe and Hiroshi Takeda}
,title=		{Learning control of finite markov chains with an
		 explicit trade-off between estimation and control}
}

@unknown{SaulPe??
,author=        {Lawrence Saul and Fernando Pereira}
,title=         {Aggregate and mixed-order {Markov} models for
                  statistical language processing}
}

@unknown{SaulRa98
,author=         {Lawrence K. Saul and Mazin Rahim}
,title=          {Maximum likelihood and minimum classification error
                  factor analysis for automatic speech recognition}
,year=           1998
}

@Article{Sauer72,
  author = 	 {N. Sauer},
  title = 	 {On the density of families of sets},
  journal = 	 {Journal of Combinatorial Theory Series A},
  year = 	 1972,
  volume =	 13,
  pages =	 {145-147}
}

@unpublished{SaulJo93
,author=	{Lawrence Saul and Michael Jordan}
,title=		{Learning in Boltzman Trees}
}

@article{Schaffer93
,author=	{Cullen Schaffer}
,title=		{Overfitting avoidance as bias}
}

@mastersthesis{Schapire88,
author=		{Schapire, Robert Elias},
title=		{Diversity-based Inference of Finite Automata},
school=		mit,
year=		1988,
month=		may,
note=           {Supervised by Ronald L. Rivest.
		 Technical Report MIT/LCS/TR-413,
		 MIT Laboratory for Computer Science}
}

@inproceedings{Schapire89,
author=		{Schapire, Robert E.},
title=		{The Strength of Weak Learnability},
booktitle=	focs89,
pages=		{28--33},
month=    	Oct,
year=		1989
}

@inproceedings{Schapire90,
author=		{Schapire, Robert E.},
title=		{Pattern Languages Are Not Learnable},
booktitle=	colt90,
pages=		{122--129},
month=		Aug,
year=		1990
}

@article{Schapire90b,
author=		{Robert E. Schapire},
title=		{The Strength of Weak Learnability},
journal=	ml,
year=		1990,
volume=		5,
number=		2,
pages=		{197--227}
}

@techreport{Schapire90c,
author=		{Robert E. Schapire},
title=		{The Emerging Theory of Average-case Complexity},
institution=	mitlcs,
year=		1990,
month=		Jun,
number=		{Technical memo MIT/LCS/TM-431}
}

@phdthesis{Schapire91,
author=		{Robert Elias Schapire},
title=		{The Design and Analysis of Efficient Learning Algorithms},
year=		1991,
month=		feb,
school=		mit,
note=		{Supervised by Ronald~L. Rivest.
		 Technical Report MIT/LCS/TR-493,
		 MIT Laboratory for Computer Science}
}

@inproceedings{Schapire91b,
author=		{Robert E. Schapire},
title=		{Learning Probabilistic Read-once Formulas on Product
		 Distributions},
booktitle=	colt91,
month=		aug,
year=		1991,
note=		{To appear, {\it Machine Learning}}
}

@book{Schapire92,
author=		{Robert E. Schapire},
title=		{The Design and Analysis of Efficient Learning
		 Algorithms},
publisher=	{MIT Press},
year=		1992
}

@article{Schapire94
,author=	{Robert E. Schapire}
,title=		{Learning Probabilistic Read-once Formulas on Product
		 Distributions}
,journal=	ml
,volume=	14
,number=	1
,pages=		{47--81}
,year=		1994
}

@inproceedings{Schapire97
,author=	{Robert E. Schapire}
,title=		{Using output codes to boost multiclass learning problems}
,booktitle=	ml97
,year=		1997
,pages=         {313-321}
}

@inproceedings{SchapireFrBaLe97
,author=	{Robert E. Schapire and Yoav Freund and Peter Bartlett
		 and Wee Sun Lee}
,title=		{Boosting the margin: A new explanation for the
		 effectiveness of voting methods}
,booktitle=	ml97
,year=		1997
,pages=         {322-330}
,note=          {To appear, {\em The Annals of Statistics}}
}

@InProceedings{Schapire99b,
  author = 	 {Robert E. Schapire},
  title = 	 {Drifting Games},
  booktitle = 	 colt99,
  year =	 1999
}

@InProceedings{Schapire99c,
  author = 	 {Robert E. Schapire},
  title = 	 {A brief introduction to boosting},
  booktitle = 	 {Proceedings of the Sixteenth International Joint
                  Conference on Artificial Intelligence},
  year =	 1999
}

@inproceedings{Schapire99d,
author=          {Robert E. Schapire},
title=           {Theoretical views of boosting and applications},
booktitle=       {Tenth International Conference on Algorithmic
                  Learning Theory},
year=            1999,
url=       {http://www.research.att.com/\~schapire/papers/Schapire99d.ps.gz}
}

@article{Schapire01,
  author = 	 {Robert E. Schapire},
  title = 	 {Drifting Games},
  journal =      ml,
  year =	 {2001},
  volume = 	 {43},
  number = 	 {3},
  pages = 	 {265--291},
  month = 	 {June}
}

@Article{SchapireFrBaLe98,
  author = 	 {Robert E. Schapire and Yoav Freund and Peter Bartlett
		 and Wee Sun Lee},
  title = 	 {Boosting the margin: A new explanation for the
		 effectiveness of voting methods},
  journal = 	 annstat,
  year = 	 1998,
  month = 	 {October},
  volume = 	 26,
  number = 	 5,
  pages=         {1651-1686}
}

@InProceedings{Schapire02,
  author = 	 {Robert E. Schapire},
  title = 	 {The boosting approach to machine learning: An overview},
  booktitle = 	 {MSRI Workshop on Nonlinear Estimation and Classification},
  year =	 2002
}

@InProceedings{Schapire03,
  author = 	 {Robert E. Schapire},
  editor = {D. D. Denison, M. H. Hansen, C. Holmes, B. Mallick, B. Yu},
  title = 	 {The boosting approach to machine learning: An overview},
  booktitle = 	 {Nonlinear Estimation and Classification},
  publisher=   {Springer},
  year =	 2002
}

@inproceedings{SchapireSe93
,author=	{Robert E. Schapire and Linda M. Sellie}
,title=		{Learning sparse multivariate polynomials over a field
		 with queries and counterexamples}
,booktitle=	colt93
,month=		jul
,year=		1993
,pages=		{17--26}
}

@inproceedings{SchapireRoRaGu02,
  author = 	 {Robert E. Schapire and Marie Rochery and Mazin Rahim
                  and Narendra Gupta},
  title = 	 {Incorporating prior knowledge into boosting},
  booktitle = 	 ml02,
  year =	 2002
}

@article{SchapireSe96
,author=	{Robert E. Schapire and Linda M. Sellie}
,title=		{Learning sparse multivariate polynomials over a field
		 with queries and counterexamples}
,journal=	jcss
,month=		apr
,year=		1996
,pages=		{201-213}
,volume=	52
,number=	2
}

@inproceedings{SchapireSi98
,author=	{Robert E. Schapire and Yoram Singer}
,title=		{Improved boosting algorithms using confidence-rated predictions}
,booktitle=     colt98
,year=		1998
,pages=         {80-91}
,note=          {To appear, {\it Machine Learning}}
}

@article{SchapireSi98b,
  author = 	 {Robert E. Schapire and Yoram Singer},
  title = 	 {{BoosTexter}: A boosting-based system for
                  text categorization},
journal = 	 ml,
  year =	 {{to appear}}
}

@Article{SchapireSi99,
 author=	{Robert E. Schapire and Yoram Singer},
 title=		{Improved boosting algorithms using confidence-rated
                 predictions},
 journal = 	 ml,
  year = 	 1999,
  volume =	 37,
  number=        3,
  month =        dec,
  pages =	 {297-336}
}

@article{SchapireSi00,
  author = 	 {Robert E. Schapire and Yoram Singer},
  title = 	 {{BoosTexter}: A boosting-based system for
                  text categorization},
journal = 	 ml,
  year =	 2000,
volume=          39,
number=          {2/3},
pages=           {135-168},
month=           {May/June}
}

@InProceedings{SchapireSiSi98,
  author = 	 {Robert E. Schapire and Yoram Singer and Amit Singhal},
  title = 	 {Boosting and {R}occhio applied to text filtering},
  booktitle = 	 {Proceedings of the 21st Annual
                  International Conference on Research and Development
                  in Information Retrieval},
  year =	 1998,
 comment=  {booktitle was: SIGIR '98: Proceedings of the 21st Annual
                  International Conference on Research and Development
                  in Information Retrieval}
}

@inproceedings{SchapireWa94
,author=	{Robert E. Schapire and Manfred K. Warmuth}
,title=		{On the worst-case analysis of temporal-difference
		 learning algorithms}
,booktitle=	ml94
,month=		jul
,year=		1994
,pages=		{266--274}
,publisher=	{Morgan Kaufmann}
,note=		{To appear, {\it Machine Learning}}
}

@InProceedings{SchapireStMcLiCs02,
  author = 	 {Robert E. Schapire and
                  Peter Stone and
                  David McAllester and
                  Michael L. Littman and
                  J\'anos A. Csirik},
  title = 	 {Modeling Auction Price Uncertainty
         Using Boosting-based Conditional Density Estimation},
  booktitle = 	 ml02,
  year =	 2002
}

@Article{SchapireWa96,
  author = 	 {Robert E. Schapire and Manfred K. Warmuth},
  title = 	 {On the worst-case analysis of temporal-difference
		 learning algorithms},
  journal = 	 ml,
  year = 	 1996,
  volume =	 22,
  number =	 {1/2/3},
  pages =	 {95-121}
}

@TechReport{ScholkopfSmWiBa98,
	author = {B. Sch\"olkopf and A. Smola and R. Williamson and P. Bartlett},
	title = "New Support Vector Algorithms",
	institution = "NeuroColt2",
	year = "1998",
	number = "NC2-TR-1998-053"
}

@InProceedings{Schwenk99,
  author = 	 {Holger Schwenk},
  title = 	 {Using boosting to improve a hybrid {HMM}/neural
                  network speech recognizer},
  booktitle = 	 {IEEE International Conference On Acoustics, Speech,
                  and Signal Processing},
  pages =	 {II:1009-1012},
  year =	 1999
}

@InProceedings{SchwenkBe98,
  author = 	 {Holger Schwenk and Yoshua Bengio},
  title = 	 {Training methods for adaptive boosting of neural
                  networks},
pages={647-653},
  booktitle = 	 nips10,
  year =	 1998
}

@article{SejnowskiRo87,
author=   	{Sejnowski, Terrence J. and Charles R. Rosenberg},
title=    	{Parallel Networks that Learn to Pronounce English Text},
journal=  	{Journal of Complex Systems},
year=     	1987,
month=    	Feb,
volume=   	1,
number=   	1,
pages=    	{145--168},
comment=  	{Classic paper covering the NETtalk system, which learns to 
		convert English text to speech.}
}

@unpublished{SelfCh87,
author=   	{Self, Matthew and Cheeseman, Peter C.},
title=    	{Bayesian Prediction for Artificial Intelligence},
note=     	{(unpublished manuscript)}
}

@book{Seneta81,
author=		{E. Seneta},
title=		{Non-negative Matrices and Markov Chains},
edition=	{second},
year=		1981,
publisher=	{Springer-Verlag}
}

@article{SeungSoTi92
,author=	{H. S. Seung and H. Sompolinsky and N. Tishby}
,title=		{Statistical mechanics of learning from example}
,year=		1992
}

@inproceedings{ShackelfordVo88,
author = 	{George Shackelford and Dennis Volper},
title = 	{Learning {k-DNF} with Noise in the Attributes},
booktitle = 	{First Workshop on Computatinal Learning Theory},
year = 		{1988},
address = 	{Cambridge, Mass.\},
month = 	aug,
pages = 	{97--103},
publisher = 	{Morgan Kaufmann},
comment= 	{Shows how to pac learn kDNF with random attribute noise of
		up to one half.}
}

@Book{ShaferVo01,
  author =	 {Glenn Shafer and Vladimir Vovk},
  title = 	 {Probability and Finance, it's only a game!},
  publisher = 	 {Wiley},
  year = 	 2001
}

@Article{Shannon48,
  author = 	 {C. E. Shannon},
  title = 	 {A Mathematical Theory of Communication},
  journal = 	 {The Bell System Technical Journal},
  year = 	 1948
}

@article{Shannon49,
author=   	{Shannon, Claude},
title=    	{Communication theory of secrecy systems},
journal=  	{Bell System Technical Journal},
year=     	1949,
month=    	Oct,
volume=   	28,
pages=    	{656--715}
}

@techreport{Shapiro81,
author=   	{Shapiro, Ehud Y.},
title=    	{Inductive Inference of Theories From Facts},
institution= 	{Yale University Department of Computer Science},
year=     	1981,
month=    	Feb,
number=   	{Research Report 192},
comment=  	{Uses Horn clauses as representation and backtracing as a 
		method to discover axiom system for a given set of examples}
}

@inproceedings{ShardanandMa95,
	author = "Upendra Shardanand and Pattie Maes",
	title = "Social Information Filtering: Algorithms for Automating
		``Word of Mouth''",
	booktitle = "Human Factors in Computing Systems {CHI'95}
                  Conference Proceedings",
	year = 1995
}

@book{ShavlikDi90,
editor=		{Jude W. Shavlik and Thomas G. Dietterich},
title=		{Readings in Machine Learning},
publisher=	{Morgan Kaufmann},
year=		1990,
comment=	{address= San Mateo}
}

@InProceedings{Shawe-TaylorBaWiAn96,
  author =       "John Shawe-Taylor and Peter~L. Bartlett
		and Robert C.~Williamson and Martin Anthony",
title =        "A Framework for Structural Risk Minimisation",
  booktitle =    colt96,
  year =         "1996",
  pages =        "68--76",
comment=		{See also NeuroCOLT tech report NC-TR-96-053}
}

@TechReport{Shawe-TaylorBaWiAn96b,
author = "John Shawe-Taylor and Peter~L. Bartlett
        and Robert C.~Williamson and Martin Anthony",
title = "Structural risk minimization over data-dependent
hierarchies",
institution = "Neurocolt",
year = "1996",
number = "NC-TR-96-053",
}

@techreport{Shawe-TaylorCr98
,author=         {John Shawe-Taylor and Nello Cristianini}
,title=          {Robust Bounds on Generalization from the Margin
                  Distribution}
,institution=    {NeuroCOLT2}
,year=           1998
,number=         {NC2-TR-1998-029}
,month=          oct
}

@InProceedings{Shawe-TaylorWi97,
  author = 	 {John Shawe-Taylor and Robert C.~Williamson},
  title = 	 {A {PAC} Analysis of a {Bayesian} Estimator},
  booktitle = 	 colt97,
  year =	 1997,
  pages =	 {2--9}
}

@article{ShimojoIc89,
author = 	{Shinsuke Shimojo and Shin'ichi Ichikawa},
title = 	{Intuitive reasoning about probability:
		 Theoretical and experimental analyses of the 
		 ``problem of the three prisoners''},
journal = 	{Cognition},
volume = 	32,
year = 		1989,
pages = 	{1--24}
}

@article{ShinoharaMi91,
author=		{Ayumi Shinohara and Satoru Miyano},
title=		{Teachability in Computational Learning},
journal=	{New Generation Computing},
volume=		8,
year=		1991,
pages=		{337--347}
}

@InProceedings{SchohnCo00,
  author = 	 {Greg Schohn and David Cohn},
  title = 	 {Less is More: Active Learning with Support Vector Machines},
  booktitle = 	 ml00,
  year =	 2000
}

@article{ShoreGr82,
author=   	{Shore, John E. and Robert M. Gray},
title=    	{Minimum Cross-Entropy Pattern Classification and Cluster 
		Analysis},
journal=  	{IEEE Transactions on Pattern Analysis and Machine 
		Intelligence},
year=     	1982,
month=    	Jan,
volume=   	{PAMI-4},
number=   	1,
pages=    	{11--17},
comment=  	{Uses minimum cross-entropy distribution to derive variation on
	   	nearest-neighbor classification rules.}
}

@article{ShoreJo80,
author=   	{Shore, John E. and Rodney W. Johnson},
title=    	{Axiomatic Derivation of the Principle of Maximum Entropy and
	   	the Principle of Minimum Cross-Entropy},
journal=  	{IEEE Transactions on Information Theory},
volume=   	{IT-26},
number=   	1,
year=     	1980,
month=    	Jan,
pages=    	{26--37},
comment=  	{Derives maximum entropy principle from principles of 
		uniqueness, invariance under coordinate systems, system 
		independence, and subset independence}
}

@article{ShoreJo81,
author=   	{Shore, John E. and Rodney W. Johnson},
title=    	{Properties of Cross-Entropy Minimization},
journal=  	{IEEE Transactions on Information Theory},
volume=   	{IT-27},
number=   	4,
year=     	1981,
month=    	Jul,
pages=    	{472--482},
comment=  	{General overview of properties.}
}

@InProceedings{Shtarkov75,
  author = 	 {Yu. M. Shtarkov},
  title = 	 {Coding of Descrete Sources with Unknown Statistics},
  booktitle = 	 {Topics in Information Theory},
  editor =	 {I. Csiszar and P. Elias},
  year =	 1975,
  publisher =	 {North Holland, Amsterdam},
  pages =	 {559--574}
}

@Article{Shtarkov87,
  author = 	 {Y. M. Shtar`kov},
  title = 	 {Universal Sequential Coding of Single Messages},
  journal = 	 {Problems of information Transmission (translated
		  from Russian)},
  year = 	 1987,
  volume =	 23,
  month =	 {July-September},
  pages =	 {175--186}
}

@article {ShtarkovChWi95,
    AUTHOR = {Shtarkov, Yu. M. and Chokens, Ch. Dzh. and Willems,
              Frans M. J.},
     TITLE = {Multi-alphabetical universal coding of memoryless sources},
   JOURNAL = {Problemy Peredachi Informatsii},
  FJOURNAL = {Rossi\u\i skaya Akademiya Nauk. Problemy Peredachi
              Informatsii},
    VOLUME = 31,
      YEAR = 1995,
    NUMBER = 2,
     PAGES = {20--35},
}

@unpublished{Shvaytser88,
author = 	{Haim Shvaytser},
title = 	{Linear Manifolds are Learnable From Positive Examples},
note = 		{Unpublished manuscript},
month = 	apr,
year = 		{1988}
}

@InBook{Siegel94,
  author =	 {Siegel, Eric V.},
  title = 	 {Competitively evolving decision trees against fixed
                  training cases for natural language processing},
  year = 	 1994
}

@inproceedings{SiegelmannSo92,
author=		{Hava T. Siegelmann and Eduardo D. Sontag},
title=		{On the computational power of neural nets},
booktitle=	colt92,
year=		1992,
month=		jul,
pages=		{440--449}
}

@article{Simon54,
author=   	{Simon, Herbert A.},
title=    	{Spurious Correlation: A Causal Interpretation},
year=     	1954,
journal=  	{Journal of the American Statistical Association},
pages=    	{407--479},
comment=  	{Concludes we need a priori assumptions of independence or 
		causality.}
}

@article{Simon56,
author=   	{Simon, Herbert A.},
title=    	{Rational Choice and the Structure of the Environment},
journal=  	{Psychological Review},
year=     	1956,
volume=   	63,
number=   	2,
pages=    	{129--138},
comment=  	{Model of a creature with multiple goals (e.g. food, water),
		in a tree-structured environment with limited look-ahead.}
}

@incollection{Simon83,
author=   	{Simon, Herber A.},
title=    	{Why Should Machines Learn?},
booktitle=	{Machine Learning, An Artificial Intelligence Approach},
editor=   	{R. S. Michalski and J. G. Carbonell and T. M. Mitchell},
publisher=	{Tioga},
address=  	{Palo Alto, California},
year=     	1983}

@article{Simon96
,author=        {Hans Ulrich Simon}
,title=         {General bounds on the number of examples needed for
                  learning probabilistic concepts}
}

@article{SinclairJe89,
author = 	{Sinclair, Alistair and Mark Jerrum},
title = 	{Approximate Counting, Uniform Generation and 
		 Rapidly Mixing Markov Chains},
journal = 	infcomp,
year = 		1989,
month = 	Jul,
volume = 	82,
number = 	1,
pages = 	{93--133}
}


@Article{Singer97,
  author = 	 {Yoram Singer},
  title = 	 {Adaptive mixtures of probabilistic transducers},
  journal =  {Neural Computation},
  year = 	 {1997},
  volume = 	 9,
  number = 	 8,
  pages =	 {1711--1734}
}

@Article{Singer97b,
  author = 	 {Yoram Singer},
  title = 	 {Switching Portfolios},
  journal = 	 {International Journal of Neural Systems},
  year = 	 1997,
  volume =       8,
  number=        4,
  pages=         {445-455},
  month=         aug
}

@inproceedings{Singer00,
	author = {Yoram Singer},
	title = {Leveraged Vector Machines},
	booktitle = nips12,
	year = 2000
}

@Article{SinghLiKeWa02,
  author = 	 {Satinder Singh and Diane Litman and Michael Kearns and
                  Marilyn Walker},
  title = 	 {Optimizing Dialogue Management with Reinforcement
                  Learning: Experiments with the {NJFun} System},
  journal = 	 jair,
  year = 	 2002,
  volume =	 16,
  pages =	 {105-133}
}

@unpublished{Skalak97
,author=	{David B. Skalak}
,title=		{The Sources of Increased Accuracy for Two Proposed
		 Boosting Algorithms}
}

@unpublished{Sloan87,
author=   	{Sloan, Robert H.},
title=    	{Some Notes on {C}hernoff Bounds},
note=     	{(Unpublished)},
year=     	1987}

@inproceedings{Sloan88,
author = 	{Robert H. Sloan},
title = 	{Types of Noise in Data for Concept Learning},
booktitle=	colt88,
year = 		{1988},
pages = 	{91--96},
month = 	aug
}

@article{Solomonoff64a,
author=   	{Solomonoff, R. J.},
title=    	{A Formal Theory of Inductive Inference. Part I.},
year=     	1964,
journal=  	InfCtrl,
volume=   	7,
pages=    	{1--22},
comment=  	{Concerned with extrapolation of sequences.  Defines 
		probability of extension via likelihood random TM program 
		will generate it.}
}

@article{Solomonoff64b,
author=   	{Solomonoff, R. J.},
title=    	{A Formal Theory of Inductive Inference. Part II.},
year=     	1964,
journal=  	InfCtrl,
volume=   	7,
pages=    	{224--254},
comment=  	{Continues Part I.  Inference of probabilities and grammars.}
}

@article{SompolinskyBa93
,author=	{H. Sompolinsky and N. Barkai}
,title=		{Theory of learning from examples}
}

@article{Specht67,
author = 	{D. F. Specht},
title = 	{Generation of polynomial discriminant functions for pattern
		recognition},
journal = 	{IEEE Transactions on Electronic Computers},
volume = 	{EC-16},
year = 		1967,
number = 	3,
pages = 	{308--319}
}

@book{Spencer87,
author=		{Spencer, Joel},
title=		{Ten Lectures on the Probabilistic Method},
publisher=	{Society for Industrial and Applied Mathematics},
address=	{Philadelphia},
year=		1987
}

@article {Spencer92,
    AUTHOR = {Spencer, Joel},
     TITLE = {Ulam's searching game with a fixed number of lies},
   JOURNAL = {Theoret. Comput. Sci.},
  FJOURNAL = {Theoretical Computer Science},
    VOLUME = {95},
      YEAR = {1992},
    NUMBER = {2},
     PAGES = {307--321},
      ISSN = {0304-3975},
     CODEN = {TCSDI},
   MRCLASS = {90D40},
  MRNUMBER = {92k:90146},
   OPTNOTE = {The analysis in this paper is closely related to the
              Binomial Weights algorithm. CesabianchiFrHeWa96}
}


@article{Staiger98
,author=        {Ludwig Staiger}
,title=         {A tight upper bound on {Kolmogorov} complexity and
                  uniformly optimal prediction}
}

@article{Stockmeyer85,
author=		{L. Stockmeyer},
title=		{On Approximation Algorithms for {\#P}},
journal=	sicomp,
volume=		14,
year=		1985,
pages=		{849--861},
comment=	{fill in first name and journal number}
}

@article{StockwellPe99,
   author="D. Stockwell and D. Peters",
   title="The {GARP} modelling system: problems and solutions to automated spatial prediction",
   journal="International Journal of Geographical Information Science",
   volume=13,
   year=1999,
   pages="143-158"
}  

@Book{StolerBu92,
  author =	 {J. Stoler and R. Bulrisch},
  title = 	 {Introduction to Numerical Analysis},
  publisher = 	 {Springer-Verlag},
  year = 	 1992
}

@Article{Stone94,
  author = 	 {Charles J. Stone},
  title = 	 {The Use of Polynomial Splines and their Tensor
                  Products in Multivariate Function Estimation},
  journal = 	 annstat,
  year = 	 1994,
  volume =	 22,
  number =	 1,
  pages =	 {118-184}
}

@inproceedings{StoneScCsLiMc02,
  author = 	 {Peter Stone and Robert E. Schapire and J\'{a}nos A. Csirik
                  and Michael L. Littman and David McAllester},
  title = 	 {{ATTac}-2001:  A Learning, Autonomous Bidding Agent},
booktitle=      {Workshop on Agent Mediated Electronic Commerce IV},
year=            2002
}

@book{Strang86,
author=   	{Strang, Gilbert},
title=    	{Introduction to Applied Mathematics},
publisher=	{Wellesley-Cambridge Press},
year=     	{1986}
}

@article{Suchanskiy87
,author=        {M. Ye. Suchanskiy}
,title=         {Adaptive algorithm for determination of weakly
                  efficient variant under randomness}
}

@article{SussmanWi92
,author=	{Gerald Jay Sussman and Jack Wisdom}
,title=		{Chaotic evolution of the solar system}
,year=		1992
}

@article{Sutton88,
author=		{Richard S. Sutton},
title=		{Learning to predict by the methods of temporal
		 differences},
journal=	ml,
volume=		3,
pages=		{9--44},
year=		1988
}

@article{SuttonBa81,
author=   	{Sutton, Richard S. and Andrew G. Barto},
title=    	{Toward a Modern Theory of Adaptive Networks: Expectation and
		Prediction},
journal=  	{Psychological Review},
year=     	1981,
volume=   	88,
number=   	2,
pages=    	{135--170},
comment=  	{Model of neuron where neuron increases output in expectation 
		of being stimulated}
}

@unpublished{SuttonBa85,
author=   	{Sutton, Richard S. and Andrew G. Barto},
title=    	{An adaptive network that constructs and uses an internal model
	   	of its world},
year=     	1985,
comment=  	{Maze-learning with adaptive predictor neuron models.}
}

@Book{SuttonBa98,
  author =	 {Richard S. Sutton and Andrew G. Barto},
  title = 	 {Reinforcement Learning: An Introduction},
  publisher = 	 {MIT Press},
  year = 	 1998
}

@Article{Suzuki95,
  author = 	 {J. Suzuki},
  title = 	 {Some notes on universal noiseless coding},
  journal = 	 {IEICE Trans. Fundamentals},
  year = 	 1995,
  volume =	 {E78-A},
  number =	 12,
  month =	 {Dec}
}

@article {Takeuchi97,
    AUTHOR = {Takeuchi, Jun-ichi},
     TITLE = {Characterization of the {B}ayes estimator and the {M}{D}{L}
              estimator for exponential families},
   JOURNAL = {IEEE Trans. Inform. Theory},
  FJOURNAL = {Institute of Electrical and Electronics Engineers.
              Transactions on Information Theory},
    VOLUME = 43,
      YEAR = 1997,
    NUMBER = 4,
     PAGES = {1165--1174},
}
@unpublished{TakimotoHiMaVo97
,author=        {Eiji Takimoto and Ken'ichi Hirai and Akira Maruoka
                  and Volodya Vovk}
,title=         {Simple algorithms for predicting nearly as well as
                  the best pruning of a decision tree}
}

@article{TakimotoMa??
,author=	{Eiji Takimoto and Akira Maruoka}
,title=		{Conservativeness and monotonicity for learning
		algorithms}
}

@article{Talagrand94
,author=        {M. Talagrand}
,title=         {Sharper bounds for Gaussian and empirical processes}
,year=          1994
}

@article{Tarjan75,
author=   	{Tarjan, Robert E.},
title=    	{Efficiency of a good but not linear set union algorithm},
journal=  	jacm,
volume=   	22,
number=   	2,
month=    	Apr,
year=     	1975,
pages=    	{215--225}
}

@techreport{TarsiPe84,
author=   	{Tarsi, Michael and Judea Pearl},
title=    	{Algorithmic Reconstruction of Trees},
institution= 	{UCLA Computer Science Department},
year=     	1984,
month=    	Dec,
number=   	{UCLA-ENG-8498},
comment=  	{Describes how to build a tree with n leaves in time n log(n) 
	   	using only test as to whether deepest common ancester of u 
		and v is on the path from root to w, if the max degree is 
		bounded.}
}

@article{Tesauro87,
author = 	{Tesauro, Gerald},
title = 	{Scaling relationships in back-propagation learning:
		 dependence on training set size},
journal = 	{Complex Systems},
year = 		1987,
volume = 	1,
pages = 	{367--372},
comment = 	{Studies learning of 32-bit parity with 8 or 16 or 32 hidden
	         units.  Training time seems to go as 4/3 power of number of
		 samples, up to point where capacity is exceeded.}
}

@article{TesauroSe87,
author = 	{Gerald Tesauro and Terrence J. Sejnowski},
title = 	{A `Neural' Network that Learns to Play Backgammon},
journal = 	{Artificial Intelligence},
volume = 	{39},
number = 	{3},
year = 		1989,
month = 	Jul,
pages = 	{357--390}
}		  
		  
@article{TesauroSe89,
author = 	{Gerald Tesauro and Terrence J. Sejnowski},
title = 	{A Parallel Network that Learns to Play Backgammon},
journal = 	{Artificial Intelligence},
volume = 	{in press},
year = 		1989
}

@inproceedings{ThomasKrScSh??
,author=	{Timothy R. Thomas and Charlotte Kruger and Clint
		 Scovel and Joseph Shumate}
,title=		{Text to information: sampling uncertainty in an
		 example from physician/patient encounters}
}

@techreport{Tibshirani96
,title=       "Bias, variance and prediction error for classification rules"
,author=      {Robert Tibshirani}
,institution= {University of Toronto}		  
,month=		nov
,year=        1996
,comment=      "available electronically from http://utstat.toronto.edu/tibs/research.html"
}

@TechReport{TieuVi99,
  author = 	 {Kinh H. Tieu and Paul Viola},
  title = 	 {Boosting Image Database Retrieval},
  institution =  {MIT Artificial Intelligence Laboratory},
  year = 	 1999,
  number =	 1669
}

@article{TikochinskyTiLe84,
author=   	{Tikochinsky, Y. and N. Z. Tishby and R. D. Levine},
title=    	{Consistent Inference of Probabilities for Reproducible 
		Experiments},
journal=  	{Physical Review Letters},
year=     	1984,
volume=   	52,
number=   	16,
pages=    	{1357--1360},
comment=  	{Justification for maximum-entropy approach.}
}

@InProceedings{TieuVi00,
  author = 	 {Kinh Tieu and Paul Viola},
  title = 	 {Boosting image retrieval},
  booktitle = 	 {Proceedings of the IEEE Conference on Computer
                  Vision and Pattern Recognition},
  year =	 2000
}

@InProceedings{Tipping99,
  author = 	 {Michael E. Tipping},
  title = 	 {Probabilistic Visualisation of High-dimensional
                  Binary Data},
  booktitle = 	 nips11,
  pages =	 {592-598},
  year =	 1999
}

@Article{TippingBi99,
  author = 	 {M. E. Tipping and C. M. Bishop},
  title = 	 {Probabilistic principal component analysis},
  journal = 	 {Journal of the Royal Statistical Society, Series B},
  year = 	 1999,
  volume =	 61,
  number =	 3,
  pages =	 {611-622}
}

@article{TishbyGo94
,author=        {Naftali Tishby and Allen Gorin}
,title=         {Algebraic learning of statistical associations for
                  language acquisition}
}

@Book{TitteringtonSmMa85,
  author =	 {D.M. Titterington and A.F.M. Smith and U.E. Makov},
  title = 	 {Statistical Analysis of Finite Mixture Distributions},
  publisher = 	 {John Wiley \& Sons},
  year = 	 1985
}
		  
@Article{TongKo01,
  author = 	 {Simon Tong and Daphne Koller},
  title = 	 {Support Vector Machine Active Learning with Applications to Text Classification},
  journal = 	 jmlr,
  year = 	 2001,
  volume =	 2,
  pages =	 {45-66},
  month =	 nov
}

@Article{Topsoe79,
  author = 	 {F. Topsoe},
  title = 	 {Information theoretical optimization techniques},
  journal = 	 {Kybernetika},
  year = 	 1979,
  volume =	 15,
  pages =	 {7-17}
}

@book{TrakhtenbrotBa73,
author=		{B. A. Trakhtenbrot and Ya. M. Barzdin'},
title=		{Finite Automata: Behavior and Synthesis},
publisher=	{North-Holland},
year=		1973
}

@article{Tsitsiklis??
,author=	{John N. Tsitsiklis}
,title=		{Asynchronous stochastic approximation and {Q}-learning}
}

@article{TsitsiklisRo??
,author=	{John N. Tsitsiklis and Benjamin Van Roy}
,title=		{Feature-based methods for large scale dynamic programming}
}

@article{Turing50,
author = 	{A. M. Turing},
title = 	{Computing Machinery and intelligence},
journal = 	{Mind},
volume = 	59,
year = 		1950,
month = 	Oct,
pages = 	{433--460},
note = 		{(Reprinted in {\em Computers and Thought}, (eds. 
		  E. A. Feigenbaum and J. Feldman), McGraw-Hill, 1963,
		  pages 11--38).},
comment = 	{Classic article on whether computers can think; introduces
		 the `Turing test'.}
}

@InProceedings{TurScHa03,
  author = 	 {Gokhan Tur and Robert E. Schapire and Dilek Hakkani-T\"ur},
  title = 	 {Active Learning for Spoken Language Understanding},
  booktitle = 	 {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  year =	 2003
}

@article{Valiant79
,author=	{L. G. Valiant}
,title=		{The complexity of enumeration and reliability
		 problems}
,journal=	sicomp
,volume=	8
,number=	3
,pages=		{410--421}
,year=		1979
}

@article{Valiant84,
author=   	{Valiant, L.~G.},
title=    	{A Theory of the Learnable},
journal=  	cacm,
year=     	1984,
month=    	Nov,
volume=   	27,
number=   	11,
pages=    	{1134--1142},
comment=  	{Defines `learnability' wrt EXAMPLES and ORACLE using arbitrary
		probability measure on event space.  Shows k-CNF learnable from
		examples only.}
}

@inproceedings{Valiant85,
author= 	{Valiant, L. G.},
title=		{Learning disjunctions of conjunctions},
booktitle=	ijcai85,
pages=		{560--566},
year=		1985,
month=		aug
}

@Book{VanDerVaartWe96,
  author =	 {Aad W. Van Der Vaart and Jon A. Wellner},
  title = 	 {Weak Convergence and Empirical Processes : With Applications to Statistics},
  publisher = 	 {Springer Series},
  year = 	 1996,
  key =		 {ISBN: 0387946403},
  series =	 {Springer Series in Statistics},
  month =	 {March},
  note =	 {This book is recommended by Dudley as a reference for the analysis of BootStrap}
}

@article{VanLehn87,
author= 	{VanLehn, Kurt},
title= 		{Learning One Subprocedure per Lesson},
journal= 	{Artificial Intelligence},
volume = 	31,
number = 	1,
month = 	Jan,
year= 		1987,
pages= 		{1--40}
}

@book{Vapnik82,
author = 	{Vapnik, V. N.},
title = 	{Estimation of Dependences Based on Empirical Data},
publisher = 	{Springer-Verlag},
year = 		{1982},
comment=	{address=NY}
}

@incollection{Vapnik92,
author=		{V. Vapnik},
title=		{Principles of Risk Minimization for Learning Theory},
booktitle=	{Advances in Neural Information Processing Systems 4},
editor=		{John E. Moody and Steve J. Hanson and Richard P. Lippmann},
publisher=	{Morgan Kaufmann},
year=		1992,
pages=		{831--838}
}

@Book{Vapnik95,
  author =	 {Vladimir N. Vapnik},
  title = 	 {The Nature of Statistical Learning Theory},
  publisher = 	 {Springer},
  year = 	 1995
}

@Book{Vapnik98,
  author = 	 {Vladimir N. Vapnik},
  title = 	 {Statistical Learning Theory},
  publisher = 	 {Wiley},
  year = 	 {1998}
}

@article{VapnikCh71,
author=   	{Vapnik, V. N. and A. Ya. Chervonenkis},
title=    	{On the Uniform Convergence of Relative Frequencies of Events
		to their probabilities},
journal=  	{Theory of Probability and its applications},
volume=   	{XVI},
number=   	2,
year=     	1971,
pages=    	{264--280},
comment=  	{Shows that a sufficient condition for the probability of every
	   	set converging to its correct probability is finite VC 
		dimension.}
}


@Book{VapnikCh74,
  author = 	 {V. N. Vapnik and A. Ya. Chervonenkis},
  title = 	 {Theory of pattern recognition},
  publisher = 	 {Nauka},
  year = 	 1974,
  address =	 {Moscow},
  note =	 {(In Russian)}
}

@article{VardiShKa85
,author=	{Y. Vardi and L. A. Shepp and L. Kaufman}
,title=		{A statistical model for positron emission tomography}
}

@article{Vazirani87,
author=		{U. V. Vazirani},
title=		{Strong communication complexity or generating
		 quasi-random sequences from two communicating
		 semi-random sources},
journal=	{Combinatorica},
volume=		7,
year=		1987,
pages=		{375--392}
}

@inproceedings{VaziraniVa85,
author=		{U. V. Vazirani and V. V. Vazirani},
title=		{Random polynomial time is equal to slightly-random
		 polynomial time},
booktitle=	focs85,
year=		1985,
month=		oct,
pages=		{417--428}
}

@inproceedings{VenkatesanLe88,
author=		{Ramarathnam Venkatesan and Leonid A. Levin},
title=		{Random Instances of a Graph Coloring Problem Are Hard},
booktitle=	stoc88,
year=		1988,
month=		May,
pages=		{217--222}
}

@inproceedings{Verbeurgt90,
author=		{Karsten Verbeurgt},
title=		{Learning {DNF} under the Uniform Distribution in
		 Quasi-polynomial Time},
booktitle=	colt90,
pages=		{314--326},
month=		Aug,
year=		1990
}

@inproceedings{Vovk90
, author =      "Volodimir G. Vovk"
, title =       "Aggregating Strategies"
, booktitle =   colt90
, year =        1990
, pages =       "371--383"
}

@article{Vovk92
,author=	{V. G. Vovk}
,title=		{Universal forcasting algorithms}
,year=		1992
,journal=       {Information and Compuatation}
,number=        96
,pages=         {245--277}		  
}

@techreport{Vovk93
,author=	{V. G. Vovk}
,title=		{On-line learning in a finite-state environment:
		 decision theoretic approach}
,year=		1993
}

@article{Vovk93b
,author=	{V. G. Vovk}
,title=		{A logic of probability, with application to the
		 foundations of statistics}
,journal=	{Journal of the Royal Statistical Society Series
		 B-Methodological}
,volume=	55
,number=	2
,year=		1993
,pages=		{317-351}
}

@inproceedings{Vovk94
,author=	{V. G. Vovk}
,title=		{An optimal-control application of two paradigms of
		 on-line learning}
,booktitle=	colt94
,year=		1994
,pages=		{98--109}
}

@article{Vovk98
,author=	{V. G. Vovk}
,title=		{A game of prediction with expert advice}
,journal=       jcss
,year=          1998
,volume=        56
,number=        2
,month=         apr
,pages=         {153-173}
}

@unpublished{Vovk97
,author=	{V. Vovk}
,title=         {Probability theory for the {Brier} game}
}

@unpublished{Vovk??
,author=	{V. G. Vovk}
,title=		{An optimality property of the weighted majority
		algorithm}
}

@techreport{WaibelHaHiSh87,
author = 	{A. Waibel and T. Hanazawa and G. Ginton and K Shikano},
title = 	{Phoneme Recognition Using Time-Delay Neural Networks},
year = 		1987,
institution = 	{ATR Interpreting Telephony Laboratories}
}

@phdthesis{Wallace89,
author = 	{Richard Scott Wallace},
title = 	{Finding Natural Clusters Through Entropy Minimization},
school = 	{Carnegie Mellon Computer Science Department},
year =		1989,
month = 	jun
}
           
@InProceedings{WalkerRaRo01,
  author = 	 {Marilyn A. Walker and Owen Rambow and Monica Rogati},
  title = 	 {{SPoT}: A Trainable Sentence Planner},
  booktitle = 	 {Proceedings of the 2nd Annual Meeting of the North
                  American Chapter of the Associataion for
                  Computational Linguistics},
  year =	 2001
}

@article{WallaceBo68,
author=   	{Wallace, C.S. and D.M. Boulton},
title=    	{An Information Measure for Classification},
journal=  	{The Computer Journal},
year=     	1968,
volume=   	11,
number=   	2,
pages=    	{185--194},
comment=  	{Introduces idea that `best classification is that which 
		results in the briefest recording of all the attribute 
		information'}
}

@book{Walrand88
,author=	{Jean Walrand}
,book=		{An Introduction to Queueing Networks}
,year=		1988
,publisher=	{Prentice Hall}
}

@InProceedings{Watanabe99,
  author = 	 {Osamu Watanabe},
  title = 	 {From computational learning theory to discovery science},
  booktitle = 	 {Proceedings of the 26th International Colloquium on
                  Automata, Languages and Programming },
  pages =	 {134-148},
  year =	 1999
}

@inproceedings{Watkins87,
author=	   	{Watkins, C.J.C.H.},
title=	   	{Combining Cross--Validation and Search},
booktitle= 	{Progress in Machine Learning--Proceedings of EWSL 87: 
	   	2nd European Working Session on Learning},
address=   	{Bled, Yugoslavia},
year=	   	1987,
editor=	   	{Bratko, I. and N. Lavrac},
month=	   	may,
pages=	   	{79--90}
}

@phdthesis{Watkins89,
author=		{C. J. C. H. Watkins},
title=		{Learning from delayed rewards},
school=		{University of Cambridge, England},
year=		1989
}

@article{WatkinsDa92
,author=	{Christopher J. C. H. Watkins and Peter Dayan}
,title=		{Q-learning}
,journal=	ml
,volume=	8
,year=		1992
,pages=		{279--292}
}

@Book{Watson52,
  author = 	 {G. N. Watson},
  title = 	 {Theory of Bessel functions},
  publisher = 	 {Cambridge University Press},
  year = 	 1952
}

@article{WeinbergerLeZi92,
  author =       "Marcelo J. Weinberger and Abraham Lempel and Jacob Ziv",
  title =        "A Sequential Algorithm for the Universal Coding of
		  Finite-Memory Sources",
  journal =      ieeeit,
  volume =       "38",
  number =       "3",
  month =        may,
  year =         "1992",
  pages =        "1002--1014"
}

@article{WeinbergerMeFe94,
  author =       "Marcelo J. Weinberger and Neri Merhav and Meir Feder",
  title =        "Optimal Sequential Probability Assignment for Individual Sequences",
  journal =      ieeeit,
  volume =       "40",
  number =       "2",
  month =        mar,
  year =         "1994",
  pages =        "384--396"
}

@article{WeinbergerRiFe95,
  author =       "Marcelo J. Weinberger and Jorma J. Rissanen and Meir Feder",
  title =        "A Universal Finite Memory Source",
journal=	ieeeit,
volume=		{41},
number=		3,
year=		1995,
pages=		{643-652}
}

@article{WeinbergerSe97
,author =       {Marcelo J. Weinberger and Gadiel Seroussi}
,title=         {Sequential prediction and ranking in universal
                  context modeling and data compression}
}

@article{WeissApDaJoOlGoHa99,
	author = {S. M. Weiss and C. Apte and F. J. Damerau and
		D. E. Johnson and F. J. Oles and T. Goetz and T. Hampp},
	title = {Maximizing Text-Mining Performance},
	journal = {IEEE Intelligent Systems},
	year = 1999
}

@article{WenocurDu81,
author=   	{Wenocur, R. S. and R. M. Dudley},
title=    	{Some Special {V}apnik-{C}hervonenkis Classes},
journal=  	{Discrete Mathematics},
year=     	1981,
volume=   	33,
pages=    	{313--318},
comment=  	{Shows VC dimension of half-spaces in n dimensions is n.}
}

@article{White89,
author=		{Halbert White},
title=		{Learning in Artificial Neural Networks: A Statistical
		 Perspective},
journal=	{Neural Computation},
volume=		1,
number=		4,
pages=		{425--464},
year=		1989
}

@article{WidrowGuMa73
,author=	{Bernard Widrow and Narendra K. Gupta and Sidhartha Maitra}
,title=		{Punish/reward: Learning with a critic in adaptive
		 threshold systems}
,journal=	{IEEE Transactions on Systems, Man, and Cybernetics}
,volume=	{SMC-3}
,number=	5
,month=		sep
,year=		1973
,pages=		{455--465}
}

@article{WidrowHo60,
author = 	{Bernard Widrow and Marcian E. Hoff},
title = 	{Adaptive Switching Circuits},
journal =	{1960 IRE WESCON Convention Record},
publisher = 	{IRE},
year = 		1960,
pages = 	{96--104},
note = 		{Reprinted in {\sl Neurocomputing} (MIT Press, 1988).}
}

@techreport{WileyTe85,
author=   	{Wiley, R. Paul and Robert R. Tenney},
title=    	{Performace Evaluation of Stochastic Timed Decision-Free
		Petri Nets},
institution=  	{MIT Laboratory for Information and Decision Sciences},
year=     	1985,
month=    	Mar,
number=   	{LIDS-P-1443},
comment=  	{Interesting model for a stochastic system.}
}

@article{Wilf68,
author=		{Herbert S. Wilf},
title=		{Hadamard determinants, {M}\"obius functions, and the
		 chromatic number of a graph},
journal=	{Bulletin of the American Mathematical Society},
volume=		74,
number=		5,
month=		sep,
year=		1968,
pages=		{960--964}
}

@article{Wilf84,
author=		{Herbert S. Wilf},
title=		{Backtrack:  An {$O(1)$} expected time graph coloring
		algorithm},
journal=	{Information Processing Letters},
volume=		18,
number=		3,
month=		mar,
year=		1984,
pages=		{119--121}
}

@inproceedings{WillemsShTj93
,author=	{F. M. J. Willems and Y. M. Shtarkov and Tj. J. Tjalkens}
,title=		{Context tree weighting: a sequential universal source
		 coding procedure for {FSMX} sources}
,booktitle=	{Proceedings 1993 IEEE International Symposium on
		 Information Theory}
,year=		1993
,pages=		{59}
}

@article{WillemsShTj95
,author=	{Frans M. J. Willems and Yuri M. Shtarkov and Tjalling
		J. Tjalkens}
,title=		{The context tree weighting method: basic properties}
,journal=	ieeeit
,volume=	{41}
,number=	3
,year=		1995
,pages=		{653-664}
}

@article{WillemsShTj96
,author=	{Frans M. J. Willems and Yuri M. Shtarkov and Tjalling
		J. Tjalkens}
,title=		{Context weighting for general finite-context sources}
,journal=	ieeeit
,year=		1996
}

@inproceedings{Wilson85,
author=   	{Wilson, Stewart W.},
title=    	{Knowledge Growth in an Artificial Animal},
booktitle= 	{Proceedings of an International Conference on Genetic 
		Algorithms and their Applications},
year=	  	1985,
month=    	Jul,
pages=    	{16--23},
comment=  	{Empirical results for a `critter' wandering around in the 
		woods trying to find food, using a genetic algorithm a la
		Holland to learn general situation/action rules.}
}

@unpublished{WinklerZu??
,author=	{Peter Winkler and David Zuckerman}
,title=		{Multiple cover time}
}

@incollection{Winston75,
author = 	{Winston, Patrick H.},
title = 	{Learning Structural Descriptions from Examples},
booktitle = 	{The Psychology of Computer Vision},
publisher = 	{McGraw-Hill},
year = 		1975,
address = 	{New York},
editor = 	{Winston, Patrick H.}
}

@article{Witten77,
author = 	{Witten, Ian H.},
title = 	{An Adaptive Optimal Controller for Discrete-Time
		Markov Environments},
journal = 	infctrl,
year = 		1977,
volume = 	34,
pages = 	{286--295},
comment = 	{Controller sees state of environment at each instant, and
		 gets reward from that state.  Uses discounted expectation
		 of future reward to guide action selection.}
}

@article{Witten77b
,author=	{Ian H. Witten}
,title=		{Exploring, modelling and controlling discrete
		 sequential environments}
,year=		1977
}

@inproceedings{WrightGoRi97,
	author = "J. H. Wright and A. L. Gorin and G. Riccardi",
	title = "Automatic acquisition of salient grammar fragments
		for call-type classification",
	booktitle = "Proceedings of the 5th European Conference 
            on Speech Communication and Technology",
	year = 1997,
	pages = "1419--1422"
}

@article{Wyner72,
author=		{Wyner, A. D.},
title=		{An upper bound on the entropy series},
journal= 	infctrl,
volume=		20,
year=		1972,
pages=		{176--181}	
}

@Article{XieBa97,
        author = "Qun Xie and Andrew Barron",
        title = "Minimax Redundancy for the class of Memoryless
		  Sources",
  journal = 	 ieeeit,
  year = 	 1997,
  volume =	 43,
  month =        May,
  pages =	 {446--657}
}

@Article{XieBa00,
  author = 	 {Qun Xie and Andrew R. Barron},
  title = 	 {Asymptotic Minimax Regret for Data Compression, Gambling, and Prediction},
  journal = 	 ieeeit,
  year = 	 2000,
  volume =	 46,
  number =       2,
  pages =	 {431--445}
}


@article{YakowitzSP68,
author=		{S. J. Yakowitz and J. D. Spragins},
title=		{On the identifiability of finite mixtures},
journal=       {The Annals of Mathematical Statistics},
volume=		39,
year=		1968,
pages=		{258--263}
}

@inproceedings{Yamanishi90,
author=		{Kenji Yamanishi},
title=		{A Learning Criterion for Stochastic Rules},
booktitle=	colt90,
month=    	Aug,
year=     	1990,
pages=		{67--81},
note=		{To appear, {\it Machine Learning}}
}

@article{Yamanishi92,
author=		{Kenji Yamanishi},
title=		{Learning Nonparametric Densities in Terms of Finite
		 Dimensional Parametric Hypotheses},
journal=	{IEICE Transactions: D Information and Systems},
volume=		{E75D},
number=		4,
year=		1992,
pages=		{459--469}
}

@article{Yamanishi92b,
author=		{Kenji Yamanishi},
title=		{A Learning Criterion for Stochastic Rules},
journal=	ml,
volume=		9,
number=		{2/3},
year=		1992,
month=		jul,
pages=		{165--203}
}

@article{Yamanishi95
,author=	{Kenji Yamanishi}
,title=		{Probably almost discriminitive learning}
,year=		1995
,journal=	ml
}

@article{Yamanishi95b
,author=	{Kenji Yamanishi}
,title=		{A loss bound model for on-line stochastic prediction
		algorithms}
,journal=	infcomp
,volume=	119
,number=	1
,year=		1995
,pages=		{39-54}
}

@Article{Yamanishi98,
  author = 	 {Kenji Yamanishi},
  title = 	 {A Decision-Theoretic Extension of Stochastic Complexity and Its Applications to Learning},
  journal = 	 ieeeit,
  year = 	 1998,
  volume =	 44,
  number =       4,
  pages =	 {1424--1439}
}

@inproceedings{Yang94
,author=	{Yiming Yang}
,title=		{Expert network: effective and efficient learning from
		 human decisions in text categorization and retrieval}
,booktitle= sigir94
,pages= {13--22}
,year= 1994
}

@article{Yang99,
	Author = {Yiming Yang},
	Title = {An evaluation of statistical approaches to text categorization},
	Journal = {Information Retrieval},
	year = 1999,
	note = "to appear"
}

@article{YangCh94,
	Author = "Y. Yang and C. G. Chute",
	Title = "An example-based mapping method for text classification
		and retrieval",
	Journal = "ACM Transactions on Information Systems",
	volume = 12,
	number = 3,
	year = 1994,
        pages= {253-277}
}

@inproceedings{Yao82,
author = 	{Andrew C. Yao},
title = 	{Theory and Applications of Trapdoor Functions},
booktitle = 	focs82,
year = 		{1982},
pages = 	{80--91}
} 

@unpublished{Young87p,
author=   	{Neal Young},
title=    	{Private communication} ,
note=     	{},
year=     	1987
}

@InProceedings{Young95,
  author = 	 {Neal Young},
  title = 	 {Randomized rounding without solving the linear program},
  booktitle = 	 {Proceedings of the Sixth Annual ACM-SIAM Symposium
		  on Discrete Algorithms},
  year =	 1995,
  pages =	 {170-178}
}


@unpublished{Young9?
,author=	{Neal Young}
,title=		{Randomized rounding without solving the linear
		 program, part two}
}

@article{Zadeh65,
author=		{L. A. Zadeh},
title=		{Fuzzy Sets},
year=		1965,
month=		jun,
journal=	infctrl,
number=		3,
volume=		8,
pages=		{338--353}
}

@PhdThesis{Zhang94,
  author = 	 {Zhongxin Zhang},
  title = 	 {Discrete Noninformative Prioirs},
  school = 	 {Yale University},
  year = 	 1994
}

@inproceedings{Zippel79,
author=		{Richard Zippel},
title=		{Probabilistic Algorithms for Sparse Polynomials},
booktitle=	eurosam79,
month=		jun,
year=		1979,
publisher=	{Springer-Verlag},
pages=		{216--226}
}

@article{Zippel90,
author=		{Richard Zippel},
title=		{Interpolating Polynomials from their Values},
journal=	symcomp,
year=		1990,
volume=		9,
pages=		{375--403}
}

@Article{Ziv78,
  author = 	 {Jacob Ziv},
  title = 	 {Coding Theorems for Individual Sequences},
  journal = 	 {IEEE Transactions on Information Theory},
  year = 	 1978,
  volume =	 24,
  number =	 4,
  month =	 {July},
  pages =	 {405--412}
}