Training a classifier and classifying new sequences

From Jstacs
Jump to navigationJump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
//create a Sample for each class from the input data, using the DNA alphabet
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );

//the length of our input sequences
int length = data[0].getElementLength();

data[1] = new Sample( new DNASample( args[1] ), length );


//sequences that will be classified
Sample toClassify = new DNASample( args[2] );
 
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
		//the alphabet and the length of the model:
		data[0].getAlphabetContainer(), length, 
		//the equivalent sample size to compute hyper-parameters
		4, 
		//some identifier for the model
		"my PWM", 
		//we want a PWM, which is an inhomogeneous Markov model (IMM) of order 0
		ModelType.IMM, (byte) 0, 
		//we want to estimate the MAP-parameters
		LearningType.ML_OR_MAP ) );
 
//create a classifier with a PWM in the foreground and a PWM in the background
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm );
 
//train the classifier
classifier.train( data );
 
//use the trained classifier to classify new sequences
byte[] result = classifier.classify( toClassify );
 
System.out.println( Arrays.toString( result ) );