Performing a 10-fold cross validation: Difference between revisions

From Jstacs
Jump to navigationJump to search
No edit summary
No edit summary
 
(8 intermediate revisions by 2 users not shown)
Line 1: Line 1:
<source lang="java">
<source lang="java5">
//create a DNA-alphabet
//create a Sample for each class from the input data, using the DNA alphabet
AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() );
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );
 
//the length of our input sequences
//the length of our input sequences
int length = 16;
int length = data[0].getElementLength();


//create a Sample for each class from the input data, using the alphabet from above
data[1] = new Sample( new DNASample( args[1] ), length );
Sample[] data = new Sample[]{
                            new Sample( container, new StringExtractor( new File(args[0]), 100 ) ),
AlphabetContainer container = data[0].getAlphabetContainer();
                            new Sample( container, new StringExtractor( new File(args[1]), 100 ), length )
};


//create a new PWM
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
//the alphabet:
//the alphabet and the length of the model:
container,
container, length,  
//the length of the model
length,  
//the equivalent sample size to compute hyper-parameters
//the equivalent sample size to compute hyper-parameters
4,  
4,  
Line 25: Line 23:
//we want to estimate the MAP-parameters
//we want to estimate the MAP-parameters
LearningType.ML_OR_MAP ) );
LearningType.ML_OR_MAP ) );
 
//create a new mixture model using 2 PWMs
//create a new mixture model using 2 PWMs
MixtureModel mixPwms = new MixtureModel(
MixtureModel mixPwms = new MixtureModel(
Line 36: Line 34:
//the equivalent sample sizes
//the equivalent sample sizes
new double[]{pwm.getESS(),pwm.getESS()},
new double[]{pwm.getESS(),pwm.getESS()},
//the hyper-parameters to draw the initial component weights (hidden variables)
//the hyper-parameters to draw the initial sequence-specific component weights (hidden variables)
1,
1,
//stopping criterion
//stopping criterion
1E-6,
new SmallDifferenceOfFunctionEvaluationsCondition(1E-6),
//parameterization of the model, THETA complies with the
//parameterization of the model, LAMBDA complies with the
//parameterization by probabilities
//parameterization by log-probabilities
Parameterization.THETA);
Parameterization.LAMBDA);
 
//create a new inhomogeneous Markov model of order 3
//create a new inhomogeneous Markov model of order 3
BayesianNetworkModel mm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( container, length, 256, "my PWM", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) );
BayesianNetworkModel mm = new BayesianNetworkModel(  
 
new BayesianNetworkModelParameterSet( container, length, 256, "my iMM(3)", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) );
//create a new PWM scoring function
//create a new PWM scoring function
BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction(
BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction(
//the alphabet
//the alphabet and the length of the scoring function
container,
container, length,  
//the length of the scoring function
length,  
//the equivalent sample size for the plug-in parameters
//the equivalent sample size for the plug-in parameters
4,  
4,  
Line 59: Line 56:
//a PWM is an inhomogeneous Markov model of order 0
//a PWM is an inhomogeneous Markov model of order 0
new InhomogeneousMarkov(0));
new InhomogeneousMarkov(0));
 
//create a new mixture scoring function
//create a new mixture scoring function
MixtureScoringFunction dMixPwms = new MixtureScoringFunction(
MixtureScoringFunction dMixPwms = new MixtureScoringFunction(
//the number of starts
//the number of starts
10,
2,
//we use plug-in parameters
//we use plug-in parameters
true,
true,
//the two components, which are PWMs
//the two components, which are PWMs
dPwm,dPwm);
dPwm,dPwm);
 
//create a new scoring function that is an inhomogeneous Markov model of order 3
//create a new scoring function that is an inhomogeneous Markov model of order 3
BayesianNetworkScoringFunction dMm = new BayesianNetworkScoringFunction(container, length, 4, true, new InhomogeneousMarkov(3));
BayesianNetworkScoringFunction dMm = new BayesianNetworkScoringFunction(container, length, 4, true, new InhomogeneousMarkov(3));
 
//create the classifiers
//create the classifiers
int threads = AbstractMultiThreadedOptimizableFunction.getNumberOfAvailableProcessors();
AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{
AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{
                              //model based with mixture model and Markov model
  //model based with mixture model and Markov model
                              new ModelBasedClassifier( mixPwms, mm ),
  new ModelBasedClassifier( mixPwms, mm ),
                              //conditional likelihood based classifier
  //conditional likelihood based classifier
                              new CLLClassifier( new CLLClassifierParameterSet(container, length,  
  new MSPClassifier( new GenDisMixClassifierParameterSet(container, length,  
                              //method for optimizing the conditional likelihood
  //method for optimizing the conditional likelihood and
                              Optimizer.QUASI_NEWTON_BFGS,
  //other parameters of the numerical optimization
                              //parameters of the numerical optimization
  Optimizer.QUASI_NEWTON_BFGS, 1E-2, 1E-2, 1, true, KindOfParameter.PLUGIN, false, threads),
                              1E-6, 1E-6, 1E-2, true, true, false),
  //mixture scoring function and Markov model scoring function
                              //mixture scoring function and Markov model scoring function
  dMixPwms,dMm )
                              dMixPwms,dMm )
};
};
 
//create an new k-fold cross validation using above classifiers
//create an new k-fold cross validation using above classifiers
KFoldCrossValidation cv = new KFoldCrossValidation( classifiers );
KFoldCrossValidation cv = new KFoldCrossValidation( classifiers );
 
//we use a specificity of 0.999 to compute the sensitivity and a sensitivity of 0.95 to compute FPR and PPV
//we use a specificity of 0.999 to compute the sensitivity and a sensitivity of 0.95 to compute FPR and PPV
MeasureParameters mp = new MeasureParameters(false, 0.999, 0.95, 0.95);
MeasureParameters mp = new MeasureParameters(false, 0.999, 0.95, 0.95);
//we do a 10-fold cross validation and partition the data by means of the number of symbols
//we do a 10-fold cross validation and partition the data by means of the number of symbols
KFoldCVAssessParameterSet cvpars = new KFoldCVAssessParameterSet(PartitionMethod.PARTITION_BY_NUMBER_OF_SYMBOLS, length, true, 10);
KFoldCVAssessParameterSet cvpars = new KFoldCVAssessParameterSet(PartitionMethod.PARTITION_BY_NUMBER_OF_SYMBOLS, length, true, 10);
 
//compute the result of the cross validation and print them to System.out
//compute the result of the cross validation and print them to System.out
System.out.println( cv.assess( mp, cvpars, data ) );
System.out.println( cv.assess( mp, cvpars, data ) );
</source>
</source>

Latest revision as of 08:51, 6 June 2011

//create a Sample for each class from the input data, using the DNA alphabet
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );

//the length of our input sequences
int length = data[0].getElementLength();

data[1] = new Sample( new DNASample( args[1] ), length );
 
AlphabetContainer container = data[0].getAlphabetContainer();

//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
		//the alphabet and the length of the model:
		container, length, 
		//the equivalent sample size to compute hyper-parameters
		4, 
		//some identifier for the model
		"my PWM", 
		//we want a PWM, which is an inhomogeneous Markov model (IMM) of order 0
		ModelType.IMM, (byte) 0, 
		//we want to estimate the MAP-parameters
		LearningType.ML_OR_MAP ) );
 
//create a new mixture model using 2 PWMs
MixtureModel mixPwms = new MixtureModel(
		//the length of the mixture model
		length, 
		//the two components, which are PWMs
		new Model[]{pwm,pwm},
		//the number of starts of the EM
		10,
		//the equivalent sample sizes
		new double[]{pwm.getESS(),pwm.getESS()},
		//the hyper-parameters to draw the initial sequence-specific component weights (hidden variables)
		1,
		//stopping criterion
		new SmallDifferenceOfFunctionEvaluationsCondition(1E-6),
		//parameterization of the model, LAMBDA complies with the
		//parameterization by log-probabilities
		Parameterization.LAMBDA);
 
//create a new inhomogeneous Markov model of order 3
BayesianNetworkModel mm = new BayesianNetworkModel( 
		new BayesianNetworkModelParameterSet( container, length, 256, "my iMM(3)", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) );
 
//create a new PWM scoring function
BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction(
		//the alphabet and the length of the scoring function
		container, length, 
		//the equivalent sample size for the plug-in parameters
		4, 
		//we use plug-in parameters
		true, 
		//a PWM is an inhomogeneous Markov model of order 0
		new InhomogeneousMarkov(0));
 
//create a new mixture scoring function
MixtureScoringFunction dMixPwms = new MixtureScoringFunction(
		//the number of starts
		2,
		//we use plug-in parameters
		true,
		//the two components, which are PWMs
		dPwm,dPwm);
 
//create a new scoring function that is an inhomogeneous Markov model of order 3
BayesianNetworkScoringFunction dMm = new BayesianNetworkScoringFunction(container, length, 4, true, new InhomogeneousMarkov(3));
 
//create the classifiers
int threads = AbstractMultiThreadedOptimizableFunction.getNumberOfAvailableProcessors();
AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{
							   //model based with mixture model and Markov model
							   new ModelBasedClassifier( mixPwms, mm ),
							   //conditional likelihood based classifier
							   new MSPClassifier( new GenDisMixClassifierParameterSet(container, length, 
									   //method for optimizing the conditional likelihood and 
									   //other parameters of the numerical optimization
									   Optimizer.QUASI_NEWTON_BFGS, 1E-2, 1E-2, 1, true, KindOfParameter.PLUGIN, false, threads),
									   //mixture scoring function and Markov model scoring function
									   dMixPwms,dMm )
};
 
//create an new k-fold cross validation using above classifiers
KFoldCrossValidation cv = new KFoldCrossValidation( classifiers );
 
//we use a specificity of 0.999 to compute the sensitivity and a sensitivity of 0.95 to compute FPR and PPV
MeasureParameters mp = new MeasureParameters(false, 0.999, 0.95, 0.95);
//we do a 10-fold cross validation and partition the data by means of the number of symbols
KFoldCVAssessParameterSet cvpars = new KFoldCVAssessParameterSet(PartitionMethod.PARTITION_BY_NUMBER_OF_SYMBOLS, length, true, 10);
 
//compute the result of the cross validation and print them to System.out
System.out.println( cv.assess( mp, cvpars, data ) );