Training a classifier and classifying new sequences: Difference between revisions

From Jstacs
Jump to navigationJump to search
(New page: <source lang="java5"> //create a DNA-alphabet AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() ); //the length of our input sequences int length = 7; //create a Sam...)
 
No edit summary
Line 1: Line 1:
<source lang="java5">
<source lang="java5">
//create a Sample for each class from the input data, using the DNA alphabet
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );


//create a DNA-alphabet
AlphabetContainer container = new AlphabetContainer( new DNAAlphabet() );
//the length of our input sequences
//the length of our input sequences
int length = 7;
int length = data[0].getElementLength();
 
data[1] = new Sample( new DNASample( args[1] ), length );


//create a Sample for each class from the input data, using the alphabet from above
Sample[] data = new Sample[]{ new Sample( container, new StringExtractor( new File(args[0]), 100) ),
                            new Sample( container, new StringExtractor( new File(args[1]), 100 ), length ) };


//sequences that will be classified
//sequences that will be classified
Sample toClassify = new Sample(container, new StringExtractor( new File(args[2]), 100 ) );
Sample toClassify = new DNASample( args[2] );
 
//create a new PWM
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
//the alphabet and the length of the model:
//the alphabet and the length of the model:
container, length,  
data[0].getAlphabetContainer(), length,  
//the equivalent sample size to compute hyper-parameters
//the equivalent sample size to compute hyper-parameters
4,  
4,  
Line 25: Line 25:
//we want to estimate the MAP-parameters
//we want to estimate the MAP-parameters
LearningType.ML_OR_MAP ) );
LearningType.ML_OR_MAP ) );
 
//create a classifier with a PWM in the foreground and a PWM in the background
//create a classifier with a PWM in the foreground and a PWM in the background
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm );
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm );
 
//train the classifier
//train the classifier
classifier.train( data );
classifier.train( data );
 
//use the trained classifier to classify new sequences
//use the trained classifier to classify new sequences
byte[] result = classifier.classify( toClassify );
byte[] result = classifier.classify( toClassify );
 
System.out.println( Arrays.toString( result ) );
System.out.println( Arrays.toString( result ) );
</source>
</source>

Revision as of 14:25, 2 December 2009

//create a Sample for each class from the input data, using the DNA alphabet
Sample[] data = new Sample[2];
data[0] = new DNASample( args[0] );

//the length of our input sequences
int length = data[0].getElementLength();

data[1] = new Sample( new DNASample( args[1] ), length );


//sequences that will be classified
Sample toClassify = new DNASample( args[2] );
 
//create a new PWM
BayesianNetworkModel pwm = new BayesianNetworkModel( new BayesianNetworkModelParameterSet(
		//the alphabet and the length of the model:
		data[0].getAlphabetContainer(), length, 
		//the equivalent sample size to compute hyper-parameters
		4, 
		//some identifier for the model
		"my PWM", 
		//we want a PWM, which is an inhomogeneous Markov model (IMM) of order 0
		ModelType.IMM, (byte) 0, 
		//we want to estimate the MAP-parameters
		LearningType.ML_OR_MAP ) );
 
//create a classifier with a PWM in the foreground and a PWM in the background
ModelBasedClassifier classifier = new ModelBasedClassifier( pwm, pwm );
 
//train the classifier
classifier.train( data );
 
//use the trained classifier to classify new sequences
byte[] result = classifier.classify( toClassify );
 
System.out.println( Arrays.toString( result ) );