/*
 * This file is part of Jstacs.
 *
 * Jstacs is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Jstacs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Jstacs.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.classifier.assessment;


import de.jstacs.WrongAlphabetException;
import de.jstacs.classifier.AbstractClassifier;
import de.jstacs.classifier.ClassDimensionException;
import de.jstacs.classifier.MeasureParameters;
import de.jstacs.classifier.assessment.RepeatedHoldOutAssessParameterSet;
import de.jstacs.data.Sample;
import de.jstacs.data.Sample.PartitionMethod;
import de.jstacs.models.Model;
import de.jstacs.utils.ProgressUpdater;

/**
 * This class implements a repeated holdout experiment for assessing classifiers.
 * The methodology used by a repeated holdout experiment is as follows.
 * The user supplies a data-set for each class the classifiers are capable to
 * predict. In each step the given data-sets are randomly, mutually exclusive 
 * partitioned into a test- and a train-data-set of user specified size.
 * Afterwards the train-data-sets are used to train the classifiers and
 * the test-data-sets are used to assess the performance of the classifiers
 * to predict the elements therein using user specified assessment-measures.
 * Additional the user defines how often this procedure is repeated.
 * 
 * @author andr|e gohr (a0coder (nospam:@) gmail (nospam:.) com)
 *
 */
public class RepeatedHoldOutExperiment extends ClassifierAssessment
{
//	**********************
//	class variables
//	**********************
	
//	**********************
//	class methods
//	**********************

	// **********************
	// member variables
	// **********************

	// **********************
	// constructors
	// **********************

	/**
	 * Creates a new {@link RepeatedHoldOutExperiment} from an array of {@link AbstractClassifier}s and a two-dimensional array
	 * of {@link Model}s, which are combined to additional classifiers. If <code>buildClassifiersByCrossProduct</code> is <code>true</code>,
	 * the cross product of all {@link Model}s in <code>aMs</code> is built to obtain these classifiers.
	 * @param aCs the pre-defined classifiers
	 * @param aMs the {@link Model}s that are used to build additional classifiers
	 * @param buildClassifiersByCrossProduct Determines how classifiers are constructed using the given models. Suppose a k-class problem. In this
	 *            case, each classifier is supposed to consist of k models, one responsible for each class. <br>
	 *            Let S_i be the set of all models in aMs[i]. Let S be the set S_1 x S_2 x ... x S_k (cross-product).<br>
	 *            <br>
	 *            true: all possible classifiers consisting of a subset (set of k models) of S are constructed <br>
	 *            false: one classifier consisting of the models aMs[0][i],aMs[1][i],...,aMs[k][i] for a fixed i is
	 *            constructed . In this case, all second dimensions of aMs have to be equal, say m. In total m
	 *            classifiers are constructed.
	 * @param checkAlphabetConsistencyAndLength indicates if alphabets and lengths shall be checked for consistency
	 */
	protected RepeatedHoldOutExperiment
	(AbstractClassifier[] aCs, Model[][] aMs,
	boolean buildClassifiersByCrossProduct, boolean checkAlphabetConsistencyAndLength)
	throws IllegalArgumentException, WrongAlphabetException, CloneNotSupportedException, ClassDimensionException{
	super(aCs, aMs, buildClassifiersByCrossProduct,checkAlphabetConsistencyAndLength);	
	}

	/**
	 * Creates a new {@link RepeatedHoldOutExperiment} from a set of {@link AbstractClassifier}s.
	 * @param aCs
	 *            contains the classifiers to be assessed<br>
	 *            If model-based classifiers are trained, the order of models in classifiers determines, which model
	 *            will be trained using which sample in method assess().<br>
	 *            For a two class-problem, it is recommended to
	 *            <ul>
	 *            <li> initiate the classifiers with models in order (foreground-model (positive class), background-model
	 *            (negative-class))
	 *            <li> to initiate a assessment-object using models in order (foreground-model (positive class),
	 *            background-model (negative-class))
	 *            <li> to give data s in order (s[0] contains foreground-data, s[1] contains background data)
	 *            </ul>
	 * @throws IllegalArgumentException
	 * @throws WrongAlphabetException
	 *             if not all given classifiers are defined on the same <code>AlphabetContainer</code>
	 * @throws ClassDimensionException 
	 * @throws CloneNotSupportedException 
	 */
	public RepeatedHoldOutExperiment
	( AbstractClassifier... aCs ) 
	throws IllegalArgumentException,WrongAlphabetException,
	CloneNotSupportedException, ClassDimensionException{
	super( aCs );
	}

	/**
	 * Creates a new {@link RepeatedHoldOutExperiment} from a set of {@link Model}s. The argument <code>buildClassifiersByCrossProduct</code>
	 * determines how these {@link Model}s are combined to classifiers.
	 * @param buildClassifiersByCrossProduct
	 *            <br>
	 *            Determines how classifiers are constructed using the given models. Suppose a k-class problem. In this
	 *            case, each classifier is supposed to consist of k models, one responsible for each class. <br>
	 *            Let S_i be the set of all models in aMs[i]. Let S be the set S_1 x S_2 x ... x S_k (cross-product).<br>
	 *            <br>
	 *            true: all possible classifiers consisting of a subset (set of k models) of S are constructed <br>
	 *            false: one classifier consisting of the models aMs[0][i],aMs[1][i],...,aMs[k][i] for a fixed i is
	 *            constructed . In this case, all second dimensions of aMs have to be equal, say m. In total m
	 *            classifiers are constructed.
	 * @param aMs
	 *            <br>
	 *            Contains the models in the following way (suppose a k-class problem): the first dimension encodes the
	 *            class (here it is k), the second dimension (aMs[i]) contains the models according to class i.<br>
	 *            If models are trained directly (during assessment), the order of given models during initiation of
	 *            this assessment-object determines, which sample will be used for training which model. In general the
	 *            first model will be trained using the first sample in s... . <br>
	 *            For a two class-problem, it is recommended to
	 *            <ul>
	 *            <li> initiate the classifiers with models in order (foreground-model (positive class), background-model
	 *            (negative-class))
	 *            <li> to initiate a assessment-object using models in order (foreground-model (positive class),
	 *            background-model (negative-class))
	 *            <li> to give data s in order (s[0] contains foreground-data, s[1] contains background data)
	 *            </ul>
	 * @throws WrongAlphabetException
	 *             if not all given models are defines on the same <code>AlphabetContainer</code>
	 */
	public RepeatedHoldOutExperiment
	( boolean buildClassifiersByCrossProduct, Model[]... aMs )
	throws IllegalArgumentException, WrongAlphabetException, CloneNotSupportedException,ClassDimensionException{
	super( buildClassifiersByCrossProduct, aMs );
	}
	
	/**
	 * This constructor allows to assess a collection of given <code>AbstractClassifier</code>s and those constructed
	 * using the given <code>AbstractModel</code>s by a {@link RepeatedHoldOutExperiment}. <br>
	 * 
	 * @param aCs
	 *            contains some <code>AbstractClassifier</code> that should be assessed in addition to the
	 *            <code>AbstractClassifiers</code> constructed using the given <code>AbstractModels</code>
	 * @param buildClassifiersByCrossProduct
	 *            <br>
	 *            Determines how classifiers are constructed using the given models. Suppose a k-class problem. In this
	 *            case, each classifier is supposed to consist of k models, one responsible for each class. <br>
	 *            Let S_i be the set of all models in aMs[i]. Let S be the set S_1 x S_2 x ... x S_k (cross-product).<br>
	 *            <br>
	 *            true: all possible classifiers consisting of a subset (set of k models) of S are constructed <br>
	 *            false: one classifier consisting of the models aMs[0][i],aMs[1][i],...,aMs[k][i] for a fixed i is
	 *            constructed . In this case, all second dimensions of aMs have to be equal, say m. In total m
	 *            classifiers are constructed.
	 * @param aMs
	 *            <br>
	 *            Contains the models in the following way (suppose a k-class problem): the first dimension encodes the
	 *            class (here it is k), the second dimension (aMs[i]) contains the models according to class i.<br>
	 *            If models are trained directly (during assessment), the order of given models during initiation of
	 *            this assessment-object determines, which sample will be used for training which model. In general the
	 *            first model will be trained using the first sample in s... . <br>
	 *            For a two class-problem, it is recommended to
	 *            <ul>
	 *            <li> initiate the classifiers with models in order (foreground-model (positive class), background-model
	 *            (negative-class))
	 *            <li> to initiate a assessment-object using models in order (foreground-model (positive class),
	 *            background-model (negative-class))
	 *            <li> to give data s in order (s[0] contains foreground-data, s[1] contains background data)
	 *            </ul>
	 * @throws WrongAlphabetException
	 *             if not all given models are defines on the same <code>AlphabetContainer</code>
	 */
	public RepeatedHoldOutExperiment
	( AbstractClassifier[] aCs, boolean buildClassifiersByCrossProduct, Model[]... aMs )
	throws IllegalArgumentException, WrongAlphabetException, CloneNotSupportedException,ClassDimensionException{
	super( aCs, buildClassifiersByCrossProduct, aMs );
	}
	
	
//	**********************
//	member methods
//	**********************
	

	@Override
	/**
	 * @param mp
	 *            	defines which performance-measures are used to assess classifiers
	 * @param pU 	A <code>KFoldCrossValidation</code> is not allowe to be aborted. The given
	 * 				<code>ProgressUpdater</code> is never used in this method.
	 * @param s 	containes the data to be used for assessment. The order of samples is important. <br>
	 * 				If model-based classifiers are trained, the order of models in classifiers determines, which 
	 * 				model will be trained using which sample. The first model in classifier will be trained using the
	 * 				first sample in s. If models are trained directly, the order of given models during initiation
	 * 				of this assessment-object determines, which sample will be used for training which model. In general
	 * 				the first model will be trained using the first sample in s... . <br>
	 * 				For a two class-problem, it is recommended to 
	 * 				<ul>
	 * 				<li> initiate the clasifiers with models in order (foreground-model (positive class), background-model (negative-class))
	 * 				<li> to initiate a assessment-object using models in order  (foreground-model (positive class), background-model (negative-class))
	 * 				<li> to give data s in order (s[0] containes foreground-data, s[1] containes background data)
	 * 				</ul>
	 * @param assessPS contains parameters for a run of this <code>RepeatedHoldOutExperiment</code>. 
	 * 				Must be of type <code>RepeatedHoldOutAssessParameterSet</code>.
	 * @throws IllegalArgumentException if given assessPS is not of type <code>RepeatedHoldOutAssessParameterSet</code>
	 */
	protected boolean evaluateClassifier
	( MeasureParameters mp, ClassifierAssessmentAssessParameterSet assessPS, 
	Sample[] s,	ProgressUpdater pU ) 
	throws IllegalArgumentException, Exception{
		
		RepeatedHoldOutAssessParameterSet tempAssessPS = null;

		try
		{
			tempAssessPS = (RepeatedHoldOutAssessParameterSet) assessPS;
		}
		catch( ClassCastException e )
		{
			throw new IllegalArgumentException( "Given AssessParameterSet assessPS is not of type "
					+ "RepeatedHoldOutAssessParameterSet." );
		}

		PartitionMethod splitMethod = tempAssessPS.getDataSplitMethod();
		int subSeqL = tempAssessPS.getElementLength();
		boolean exceptionIfMPNotComputable = tempAssessPS.getExceptionIfMPNotComputable();
		int repeats = tempAssessPS.getRepeats();
		double[] percents = tempAssessPS.getPercents();

		if( percents.length != this.myAbstractClassifier[0].getNumberOfClasses() )
		{
			throw new IllegalArgumentException( "Given RepeatedHoldOutAssessParameterSet contains "
					+ "a invalid parameter percents. Percents (double[], percentage of test-data of all "
					+ "given class specific data) must contain as much entries "
					+ "as classes the local classifers are able to distinguish." );
		}

		Sample[][] sTrainTestClassWise = new Sample[2][s.length];
		Sample[] temp;

		pU.setMax( repeats );

		for( int iteration = 0; iteration < repeats; iteration++ )
		{
			for( int classes = 0; classes < s.length; classes++ )
			{
				// temp[0] -> train
				// temp[1] -> test
				temp = s[classes].partition( percents[classes], splitMethod, subSeqL );

				sTrainTestClassWise[0][classes] = temp[0];
				sTrainTestClassWise[1][classes] = temp[1];
			}

			train( sTrainTestClassWise[0] );
			test( mp, exceptionIfMPNotComputable, sTrainTestClassWise[1] );
			
			pU.setValue( iteration + 1 );
			if( pU.isCancelled() )
			{
				break;
			}
		}

		return true;
	}
}
