Training a classifier and classifying new sequences: Difference between revisions
From Jstacs
Jump to navigationJump to search
(New page: <source lang="java5"> //create a DNA-alphabet AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() ); //the length of our input sequences int length = 7; //create a Sam...) |
No edit summary |
||
Line 1: | Line 1: | ||
<source lang="java5"> | <source lang="java5"> | ||
//create a Sample for each class from the input data, using the DNA alphabet | |||
Sample[] data = new Sample[2]; | |||
data[0] = new DNASample( args[0] ); | |||
//the length of our input sequences | //the length of our input sequences | ||
int length = | int length = data[0].getElementLength(); | ||
data[1] = new Sample( new DNASample( args[1] ), length ); | |||
//sequences that will be classified | //sequences that will be classified | ||
Sample toClassify = new | Sample toClassify = new DNASample( args[2] ); | ||
//create a new PWM | //create a new PWM | ||
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( | BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet( | ||
//the alphabet and the length of the model: | //the alphabet and the length of the model: | ||
data[0].getAlphabetContainer(), length, | |||
//the equivalent sample size to compute hyper-parameters | //the equivalent sample size to compute hyper-parameters | ||
4, | 4, | ||
Line 25: | Line 25: | ||
//we want to estimate the MAP-parameters | //we want to estimate the MAP-parameters | ||
LearningType.ML_OR_MAP ) ); | LearningType.ML_OR_MAP ) ); | ||
//create a classifier with a PWM in the foreground and a PWM in the background | //create a classifier with a PWM in the foreground and a PWM in the background | ||
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm ); | ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm ); | ||
//train the classifier | //train the classifier | ||
classifier.train( data ); | classifier.train( data ); | ||
//use the trained classifier to classify new sequences | //use the trained classifier to classify new sequences | ||
byte[] result = classifier.classify( toClassify ); | byte[] result = classifier.classify( toClassify ); | ||
System.out.println( Arrays.toString( result ) ); | System.out.println( Arrays.toString( result ) ); | ||
</source> | </source> |
Revision as of 14:25, 2 December 2009
//create a Sample for each class from the input data, using the DNA alphabet
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );
//the length of our input sequences
int length = data[0].getElementLength();
data[1] = new Sample( new DNASample( args[1] ), length );
//sequences that will be classified
Sample toClassify = new DNASample( args[2] );
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
//the alphabet and the length of the model:
data[0].getAlphabetContainer(), length,
//the equivalent sample size to compute hyper-parameters
4,
//some identifier for the model
"my PWM",
//we want a PWM, which is an inhomogeneous Markov model (IMM) of order 0
ModelType.IMM, (byte) 0,
//we want to estimate the MAP-parameters
LearningType.ML_OR_MAP ) );
//create a classifier with a PWM in the foreground and a PWM in the background
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm );
//train the classifier
classifier.train( data );
//use the trained classifier to classify new sequences
byte[] result = classifier.classify( toClassify );
System.out.println( Arrays.toString( result ) );