Performing a 10-fold cross validation: Difference between revisions
From Jstacs
Jump to navigationJump to search
No edit summary |
No edit summary |
||
Line 3: | Line 3: | ||
AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() ); | AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() ); | ||
//the length of our input sequences | //the length of our input sequences | ||
int length = | int length = 7; | ||
//create a Sample for each class from the input data, using the alphabet from above | //create a Sample for each class from the input data, using the alphabet from above | ||
Sample[] data = new Sample[]{ | Sample[] data = new Sample[]{ new Sample( container, new StringExtractor( new File(args[0]), 100 ) ), | ||
new Sample( container, new StringExtractor( new File(args[1]), 100 ), length ) }; | |||
}; | |||
//create a new PWM | //create a new PWM | ||
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( | BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( | ||
//the alphabet | //the alphabet and the length of the model: | ||
container, length, | |||
length, | |||
//the equivalent sample size to compute hyper-parameters | //the equivalent sample size to compute hyper-parameters | ||
4, | 4, | ||
Line 36: | Line 32: | ||
//the equivalent sample sizes | //the equivalent sample sizes | ||
new double[]{pwm.getESS(),pwm.getESS()}, | new double[]{pwm.getESS(),pwm.getESS()}, | ||
//the hyper-parameters to draw the initial component weights (hidden variables) | //the hyper-parameters to draw the initial sequence-specific component weights (hidden variables) | ||
1, | 1, | ||
//stopping criterion | //stopping criterion | ||
1E-6, | 1E-6, | ||
//parameterization of the model, | //parameterization of the model, LAMBDA complies with the | ||
//parameterization by probabilities | //parameterization by log-probabilities | ||
Parameterization. | Parameterization.LAMBDA); | ||
//create a new inhomogeneous Markov model of order 3 | //create a new inhomogeneous Markov model of order 3 | ||
BayesianNetworkModel mm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( container, length, 256, "my PWM", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) ); | BayesianNetworkModel mm = new BayesianNetworkModel( | ||
new BayesianNetworkModelParameterSet( container, length, 256, "my PWM", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) ); | |||
//create a new PWM scoring function | //create a new PWM scoring function | ||
BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction( | BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction( | ||
//the alphabet | //the alphabet and the length of the scoring function | ||
container, length, | |||
length, | |||
//the equivalent sample size for the plug-in parameters | //the equivalent sample size for the plug-in parameters | ||
4, | 4, | ||
Line 74: | Line 69: | ||
//create the classifiers | //create the classifiers | ||
AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{ | AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{ | ||
//model based with mixture model and Markov model | |||
new ModelBasedClassifier( mixPwms, mm ), | |||
//conditional likelihood based classifier | |||
new CLLClassifier( new CLLClassifierParameterSet(container, length, | |||
//method for optimizing the conditional likelihood and | |||
//other parameters of the numerical optimization | |||
Optimizer.QUASI_NEWTON_BFGS, 1E-6, 1E-6, 1E-2, true, true, false), | |||
//mixture scoring function and Markov model scoring function | |||
dMixPwms,dMm ) | |||
}; | }; | ||
Revision as of 11:35, 5 September 2008
//create a DNA-alphabet
AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() );
//the length of our input sequences
int length = 7;
//create a Sample for each class from the input data, using the alphabet from above
Sample[] data = new Sample[]{ new Sample( container, new StringExtractor( new File(args[0]), 100 ) ),
new Sample( container, new StringExtractor( new File(args[1]), 100 ), length ) };
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
//the alphabet and the length of the model:
container, length,
//the equivalent sample size to compute hyper-parameters
4,
//some identifier for the model
"my PWM",
//we want a PWM, which is an inhomogeneous Markov model (IMM) of order 0
ModelType.IMM, (byte) 0,
//we want to estimate the MAP-parameters
LearningType.ML_OR_MAP ) );
//create a new mixture model using 2 PWMs
MixtureModel mixPwms = new MixtureModel(
//the length of the mixture model
length,
//the two components, which are PWMs
new Model[]{pwm,pwm},
//the number of starts of the EM
10,
//the equivalent sample sizes
new double[]{pwm.getESS(),pwm.getESS()},
//the hyper-parameters to draw the initial sequence-specific component weights (hidden variables)
1,
//stopping criterion
1E-6,
//parameterization of the model, LAMBDA complies with the
//parameterization by log-probabilities
Parameterization.LAMBDA);
//create a new inhomogeneous Markov model of order 3
BayesianNetworkModel mm = new BayesianNetworkModel(
new BayesianNetworkModelParameterSet( container, length, 256, "my PWM", ModelType.IMM, (byte) 3, LearningType.ML_OR_MAP ) );
//create a new PWM scoring function
BayesianNetworkScoringFunction dPwm = new BayesianNetworkScoringFunction(
//the alphabet and the length of the scoring function
container, length,
//the equivalent sample size for the plug-in parameters
4,
//we use plug-in parameters
true,
//a PWM is an inhomogeneous Markov model of order 0
new InhomogeneousMarkov(0));
//create a new mixture scoring function
MixtureScoringFunction dMixPwms = new MixtureScoringFunction(
//the number of starts
10,
//we use plug-in parameters
true,
//the two components, which are PWMs
dPwm,dPwm);
//create a new scoring function that is an inhomogeneous Markov model of order 3
BayesianNetworkScoringFunction dMm = new BayesianNetworkScoringFunction(container, length, 4, true, new InhomogeneousMarkov(3));
//create the classifiers
AbstractScoreBasedClassifier[] classifiers = new AbstractScoreBasedClassifier[]{
//model based with mixture model and Markov model
new ModelBasedClassifier( mixPwms, mm ),
//conditional likelihood based classifier
new CLLClassifier( new CLLClassifierParameterSet(container, length,
//method for optimizing the conditional likelihood and
//other parameters of the numerical optimization
Optimizer.QUASI_NEWTON_BFGS, 1E-6, 1E-6, 1E-2, true, true, false),
//mixture scoring function and Markov model scoring function
dMixPwms,dMm )
};
//create an new k-fold cross validation using above classifiers
KFoldCrossValidation cv = new KFoldCrossValidation( classifiers );
//we use a specificity of 0.999 to compute the sensitivity and a sensitivity of 0.95 to compute FPR and PPV
MeasureParameters mp = new MeasureParameters(false, 0.999, 0.95, 0.95);
//we do a 10-fold cross validation and partition the data by means of the number of symbols
KFoldCVAssessParameterSet cvpars = new KFoldCVAssessParameterSet(PartitionMethod.PARTITION_BY_NUMBER_OF_SYMBOLS, length, true, 10);
//compute the result of the cross validation and print them to System.out
System.out.println( cv.assess( mp, cvpars, data ) );