Train classifiers using MCL and MSP

From Jstacs
Jump to navigationJump to search
//read FastA-files
Sample[] data = {
         new DNASample( args[0] ),
         new DNASample( args[1] )
};
AlphabetContainer container = data[0].getAlphabetContainer();

//equivalent sample size =^= ESS
double essFg = 4, essBg = 4;
//create ScoringFunction, here PWM
NormalizableScoringFunction pwmFg = new BayesianNetworkScoringFunction( container, data[0].getElementLength(), essFg, true, new InhomogeneousMarkov(0) );
NormalizableScoringFunction pwmBg = new BayesianNetworkScoringFunction( container, data[1].getElementLength(), essBg, true, new InhomogeneousMarkov(0) );

//create parameters of the classifier
int threads = AbstractMultiThreadedOptimizableFunction.getNumberOfAvailableProcessors();
GenDisMixClassifierParameterSet cps = new GenDisMixClassifierParameterSet( container, data[0].getElementLength(),
		//optimization parameter
		Optimizer.QUASI_NEWTON_BFGS, 1E-9, 1E-11, 1, false, KindOfParameter.ZEROS, true, threads
);
//create classifiers
MSPClassifier[] cl = {
         //MCL
         new MSPClassifier( cps, pwmFg, pwmBg ),
         //MSP with composite prior (here this equivalent to a transformed product-Dirichlet)
         new MSPClassifier( cps, new CompositeLogPrior(), pwmFg, pwmBg ),
};

//do what ever you like

//e.g., train
for( int i = 0; i < cl.length; i++ ){
	cl[i].train( data );
}

//e.g., evaluate (normally done on a test data set)
MeasureParameters mp = new MeasureParameters( false, 0.95, 0.999, 0.999 );
for( int i = 0; i < cl.length; i++ ){
	System.out.println( cl[i].evaluate( mp, true, data ) );
}